From f3f8c649c964f656f95fd7bee340cdf7c50d12d9 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Tue, 4 Aug 2020 16:09:02 +0800 Subject: [PATCH 01/11] update BiliBiliIE --- youtube_dl/extractor/bilibili.py | 439 +++++++++++++++++++++++-------- 1 file changed, 322 insertions(+), 117 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4dc597e16..5928dcea3 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import hashlib import re +import json from .common import InfoExtractor from ..compat import ( @@ -26,12 +27,11 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?:(?:www|bangumi)\.)? + (?:(?:www)\.)? bilibili\.(?:tv|com)/ (?: (?: - video/[aA][vV]| - anime/(?P\d+)/play\# + video/[aA][vV] )(?P\d+)| video/[bB][vV](?P[^/?#&]+) ) @@ -120,141 +120,346 @@ class BiliBiliIE(InfoExtractor): else: raise ExtractorError('Can\'t extract Bangumi episode ID') + def _getfps(self,s:str)->int: + "convert fps to int" + if s.isnumeric() : + return int(s) + else : + r=re.search(r"([0-9]+)/([0-9]+)",s) + if r!=None : + r=r.groups() + return int(r[0])/int(r[1]) + else : + return 0 + + def _calculate_size(self,durl:list) -> int : + "Calculate total file size." + s=0 + for i in durl : + s=s+i['size'] + return s + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') or mobj.group('id_bv') - anime_id = mobj.group('anime_id') + query=compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + part=None + if 'p' in query and str(query['p'][0]).isnumeric() : + part=int(query['p'][0]) + + #Set Cookies need to parse the Links. + self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value="120")#Set default video quality + self._set_cookie(domain=".bilibili.com",name="CURRENT_FNVAL",value="16") + self._set_cookie(domain=".bilibili.com",name="laboratory",value="1-1")#Use new webpage API + self._set_cookie(domain=".bilibili.com",name="stardustvideo",value="1") + webpage = self._download_webpage(url, video_id) - if 'anime/' not in url: - cid = self._search_regex( - r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', - default=None - ) or compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', - r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] - else: - if 'no_bangumi_tip' not in smuggled_data: - self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( - video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) - headers = { - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': url - } - headers.update(self.geo_verification_headers()) + video_info=re.search(r"window\.__INITIAL_STATE__=([^;]+)",webpage,re.I) + if video_info != None : + video_info=json.loads(video_info.groups()[0]) + else : + self._report_error("") #TODO Should redirect to Bangumi episode ID. + #https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=&jsonp=jsonp have redirect links. + video_data=video_info['videoData'] + uploader_data=video_info['upData'] + aid=video_data['aid'] + bvid=video_data['bvid'] + video_count=video_data['videos'] - js = self._download_json( - 'http://bangumi.bilibili.com/web_api/get_source', video_id, - data=urlencode_postdata({'episode_id': video_id}), - headers=headers) - if 'result' not in js: - self._report_error(js) - cid = js['result']['cid'] + tags_info=self._download_json( + f"https://api.bilibili.com/x/web-interface/view/detail/tag?aid={aid}",video_id, + 'Geting video tags.','Unable to get Login/User Information.') + if tags_info['code']!=0 : + self._report_error(tags_info) + tags_info=tags_info['data'] + tags=[] + for i in tags_info : + tags.append(i['tag_name']) + + user_info=self._download_json("https://api.bilibili.com/x/web-interface/nav",video_id, + "Geting Login/User Information.","Unable to get Login/User Information.") + if user_info['code']!=0 and user_info['code']!=-101 : + self._report_error(user_info) + user_info=user_info['data'] + is_login=user_info['isLogin'] + if is_login: + is_vip=user_info['vipStatus'] + else : + is_vip=0 + is_durl=False # If return the durl Stream, this will be true - headers = { - 'Referer': url + info={ + 'id':video_id, + 'title':video_data['title'], + 'description':video_data['desc'], + 'timestamp':video_data['ctime'], + 'thumbnail':video_data['pic'], + 'uploader':uploader_data['name'], + 'uploader_id':uploader_data['mid'], + 'duration':video_data['duration'], + 'webpage_url':f'https://www.bilibili.com/video/av{aid}', + 'categories':[video_data['tname']], + 'view_count':video_data['stat']['viewseo'], + 'comment_count':video_data['stat']['reply'], + 'tags':tags } - headers.update(self.geo_verification_headers()) - entries = [] + if video_count==1 : + info.update({'alt_title':video_data['pages'][0]['part']}) - RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') - for num, rendition in enumerate(RENDITIONS, start=1): - payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) - sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() + new_api=True #Parse video links from webpage first. + first=True #First Part of List + entries=[] - video_info = self._download_json( - 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), - video_id, note='Downloading video info page', - headers=headers, fatal=num == len(RENDITIONS)) - - if not video_info: + for part_info in video_data['pages'] : + if part != None and part_info["page"]!=part : continue - - if 'durl' not in video_info: - if num < len(RENDITIONS): + uri=f'https://www.bilibili.com/video/av{aid}?p={part_info["page"]}' + if first : + first=False + else : + webpage=self._download_webpage(uri,f"{video_id} Part{part_info['page']}") + headers={'referer':uri} + if new_api: + play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. + if play_info != None : + play_info=json.loads(play_info.groups()[0]) + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + else : + new_api=False + play_info=self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", + f"{video_id} Part{part_info['page']}", + "Geting video links.", + "Unable to get video links.") + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + else : + play_info=self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", + f"{video_id} Part{part_info['page']}", + "Geting video links.", + "Unable to get video links.") + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + if 'durl' in play_info: # Stream for flv player + if video_count > 1 and len(play_info['durl']) > 1 and part==None : + self.report_warning( + f"There are multiply FLV files in this part. Please input \"{uri}\" to extract it.", + f"{video_id} Part{part_info['page']}") continue - self._report_error(video_info) - - for idx, durl in enumerate(video_info['durl']): - formats = [{ - 'url': durl['url'], - 'filesize': int_or_none(durl['size']), - }] - for backup_url in durl.get('backup_url', []): - formats.append({ - 'url': backup_url, - # backup URLs have lower priorities - 'preference': -2 if 'hd.mp4' in backup_url else -3, + is_durl=True + if video_count>1: + info.update({ + 'title':f"{info['title']} - {part_info['part']}", + 'id':f"{video_id} P{part_info['page']}" }) - - for a_format in formats: - a_format.setdefault('http_headers', {}).update({ - 'Referer': url, + video_quality=play_info['quality'] + accept_video_quality_desc=play_info['accept_description'] + accept_video_quality=play_info['accept_quality'] + video_desc_dict={} + for i in range(len(accept_video_quality)) : + video_desc_dict.update({ + accept_video_quality[i]:accept_video_quality_desc[i] }) + video_formats={video_quality:play_info['durl']} + video_formats_size={video_quality:self._calculate_size(play_info['durl'])} #Total Filesize Dict + durl_length=[len(play_info['durl'])] + for video_q in accept_video_quality : + if video_q not in video_formats : + if new_api : + self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value=f"{video_q}") + webpage=self._download_webpage(uri, + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. + if play_info != None : + play_info=json.loads(play_info.groups()[0]) + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + else : + new_api=False + play_info=self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + else : + play_info=self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + if 'durl' in play_info : + video_formats[play_info["quality"]]=play_info['durl'] + video_formats_size[play_info["quality"]]=self._calculate_size(play_info['durl']) + durl_length.append(len(play_info['durl'])) + for i in range(max(durl_length)) : + entry={} + entry.update(info) + entry.update({'id':f"{info['id']} Part{i+1}"}) + formats_output=[] + for video_q in accept_video_quality : + durl=video_formats[video_q] + if i < len(durl) : + video_format=durl[i] + formats_output.append({ + "url":video_format['url'], + "format_id":f"{video_q}", + "ext":"flv", + "http_headers":headers, + "filesize":video_format['size'] + }) + entry['formats']=formats_output + entries.append(entry) + elif 'dash' in play_info : # Stream for dash player + video_quality=play_info['quality'] + accept_video_quality_desc=play_info['accept_description'] + accept_video_quality=play_info['accept_quality'] + accept_audio_quality=[] + dash=play_info['dash'] + video_quality_list=[] + video_desc_dict={} + for i in range(len(accept_video_quality)) : + video_desc_dict.update({ + accept_video_quality[i]:accept_video_quality_desc[i] + }) + video_formats={} + for video_format in dash['video'] : + if video_format['codecs'].startswith('hev') : #Let format id increase 1 to distinguish codec + video_quality_list.append(video_format['id']+1) + video_formats[video_format['id']+1]=video_format + else : + video_quality_list.append(video_format['id']) + video_formats[video_format['id']]=video_format + bs=True #Try to get all video formats + while bs: + bs=False + for video_q in accept_video_quality : + if video_q not in video_formats: + if not is_login and video_q <=32 : + bs=True + elif is_vip<1 and video_q<=80 and video_q!=74 : + bs=True + elif is_vip>0: + bs=True + if new_api : + self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value=f"{video_q}") + webpage=self._download_webpage(uri, + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. + if play_info != None : + play_info=json.loads(play_info.groups()[0]) + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + else : + new_api=False + play_info=self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + else : + play_info=self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + if play_info['code']!=0 : + self._report_error(play_info) + play_info=play_info['data'] + if 'dash' in play_info: + for video_format in play_info['dash']['video'] : + if video_format['codecs'].startswith('hev') : #Let format id increase 1 to distinguish codec + video_format_q=video_format['id']+1 + else : + video_format_q=video_format['id'] + if video_format_q not in video_formats : + video_quality_list.append(video_format_q) + video_formats[video_format_q]=video_format + bs=True + break + self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value="120") + entry={} + entry.update(info) + formats_output=[] + for i in video_quality_list : + video_format=video_formats[i] + formats_output.append( + {"url":video_format['base_url'], + "ext":"mp4", + "format_note":video_desc_dict[video_format['id']], + "format_id":f"{i}", + "vcodec":video_format['codecs'], + "fps":self._getfps(video_format['frame_rate']), + "width":video_format['width'], + "height":video_format['height'], + "http_headers":headers + }) + if 'audio' in dash and dash['audio']!=None : + for audio_format in dash['audio'] : + accept_audio_quality.append(audio_format['id']) + video_formats[audio_format['id']]=audio_format + accept_audio_quality.sort(reverse=True) + for audio_quality in accept_audio_quality : + audio_format=video_formats[audio_quality] + formats_output.append({ + "url":audio_format["base_url"], + "format_id":f"{audio_format['id']}", + "ext":"mp4", + "acodec":audio_format['codecs'], + "http_headers":headers + }) + entry.update({"formats":formats_output}) + if video_count > 1 : + entry.update({"title":f"{info['title']} - {part_info['part']}"}) + entry.update({"id":f"{video_id} P{part_info['page']}"}) + entries.append(entry) - self._sort_formats(formats) - - entries.append({ - 'id': '%s_part%s' % (video_id, idx), - 'duration': float_or_none(durl.get('length'), 1000), - 'formats': formats, + if video_count > 1 : + if len(entries) == 1 and not is_durl: + info.update({ + 'formats':entries[0]['formats'], + 'id':entries[0]['id'] }) - break - - title = self._html_search_regex( - (']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', - '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', - group='title') - description = self._html_search_meta('description', webpage) - timestamp = unified_timestamp(self._html_search_regex( - r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', - default=None) or self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) - - # TODO 'view_count' requires deobfuscating Javascript - info = { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - 'duration': float_or_none(video_info.get('timelength'), scale=1000), - } - - uploader_mobj = re.search( - r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)', - webpage) - if uploader_mobj: + return info info.update({ - 'uploader': uploader_mobj.group('name'), - 'uploader_id': uploader_mobj.group('id'), + "_type":'multi_video', + "entries":entries }) - if not info.get('uploader'): - info['uploader'] = self._html_search_meta( - 'author', webpage, 'uploader', default=None) - - for entry in entries: - entry.update(info) - - if len(entries) == 1: - return entries[0] - else: - for idx, entry in enumerate(entries): - entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - - return { - '_type': 'multi_video', - 'id': video_id, - 'title': title, - 'description': description, - 'entries': entries, - } + return info + else : + if not is_durl: + return entries[0] + else : + if len(entries)>1 : + info.update({ + "_type":'multi_video', + "entries":entries + }) + else : + info.update({ + "formats":entries[0]['formats'] + }) + return info class BiliBiliBangumiIE(InfoExtractor): From 5c5c58243b4b176701894971b1b1c237cbeb57eb Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Tue, 4 Aug 2020 16:49:38 +0800 Subject: [PATCH 02/11] formatting the code --- youtube_dl/extractor/bilibili.py | 495 +++++++++++++++---------------- 1 file changed, 245 insertions(+), 250 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5928dcea3..e8bcfd099 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,26 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib import re import json from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_urlparse, ) from ..utils import ( ExtractorError, int_or_none, - float_or_none, parse_iso8601, smuggle_url, str_or_none, strip_jsonp, - unified_timestamp, unsmuggle_url, - urlencode_postdata, ) @@ -120,23 +115,23 @@ class BiliBiliIE(InfoExtractor): else: raise ExtractorError('Can\'t extract Bangumi episode ID') - def _getfps(self,s:str)->int: + def _getfps(self, s: str) -> int: "convert fps to int" - if s.isnumeric() : + if s.isnumeric(): return int(s) - else : - r=re.search(r"([0-9]+)/([0-9]+)",s) - if r!=None : - r=r.groups() - return int(r[0])/int(r[1]) - else : + else: + r = re.search(r"([0-9]+)/([0-9]+)", s) + if r is not None: + r = r.groups() + return int(r[0]) / int(r[1]) + else: return 0 - - def _calculate_size(self,durl:list) -> int : + + def _calculate_size(self, durl: list) -> int: "Calculate total file size." - s=0 - for i in durl : - s=s+i['size'] + s = 0 + for i in durl: + s = s + i['size'] return s def _real_extract(self, url): @@ -144,320 +139,320 @@ class BiliBiliIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') or mobj.group('id_bv') - query=compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - part=None - if 'p' in query and str(query['p'][0]).isnumeric() : - part=int(query['p'][0]) + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + part = None + if 'p' in query and str(query['p'][0]).isnumeric(): + part = int(query['p'][0]) - #Set Cookies need to parse the Links. - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value="120")#Set default video quality - self._set_cookie(domain=".bilibili.com",name="CURRENT_FNVAL",value="16") - self._set_cookie(domain=".bilibili.com",name="laboratory",value="1-1")#Use new webpage API - self._set_cookie(domain=".bilibili.com",name="stardustvideo",value="1") + # Set Cookies need to parse the Links. + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") # Set default video quality + self._set_cookie(domain=".bilibili.com", name="CURRENT_FNVAL", value="16") + self._set_cookie(domain=".bilibili.com", name="laboratory", value="1-1") # Use new webpage API + self._set_cookie(domain=".bilibili.com", name="stardustvideo", value="1") webpage = self._download_webpage(url, video_id) - video_info=re.search(r"window\.__INITIAL_STATE__=([^;]+)",webpage,re.I) - if video_info != None : - video_info=json.loads(video_info.groups()[0]) - else : - self._report_error("") #TODO Should redirect to Bangumi episode ID. - #https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=&jsonp=jsonp have redirect links. - video_data=video_info['videoData'] - uploader_data=video_info['upData'] - aid=video_data['aid'] - bvid=video_data['bvid'] - video_count=video_data['videos'] + video_info = re.search(r"window\.__INITIAL_STATE__=([^;]+)", webpage, re.I) + if video_info is not None: + video_info = json.loads(video_info.groups()[0]) + else: + self._report_error("") # TODO Should redirect to Bangumi episode ID. + # https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=&jsonp=jsonp have redirect links. + video_data = video_info['videoData'] + uploader_data = video_info['upData'] + aid = video_data['aid'] + bvid = video_data['bvid'] + video_count = video_data['videos'] - tags_info=self._download_json( - f"https://api.bilibili.com/x/web-interface/view/detail/tag?aid={aid}",video_id, - 'Geting video tags.','Unable to get Login/User Information.') - if tags_info['code']!=0 : + tags_info = self._download_json( + f"https://api.bilibili.com/x/web-interface/view/detail/tag?aid={aid}", video_id, + 'Geting video tags.', 'Unable to get Login/User Information.') + if tags_info['code'] != 0: self._report_error(tags_info) - tags_info=tags_info['data'] - tags=[] - for i in tags_info : + tags_info = tags_info['data'] + tags = [] + for i in tags_info: tags.append(i['tag_name']) - - user_info=self._download_json("https://api.bilibili.com/x/web-interface/nav",video_id, - "Geting Login/User Information.","Unable to get Login/User Information.") - if user_info['code']!=0 and user_info['code']!=-101 : - self._report_error(user_info) - user_info=user_info['data'] - is_login=user_info['isLogin'] - if is_login: - is_vip=user_info['vipStatus'] - else : - is_vip=0 - is_durl=False # If return the durl Stream, this will be true - info={ - 'id':video_id, - 'title':video_data['title'], - 'description':video_data['desc'], - 'timestamp':video_data['ctime'], - 'thumbnail':video_data['pic'], - 'uploader':uploader_data['name'], - 'uploader_id':uploader_data['mid'], - 'duration':video_data['duration'], - 'webpage_url':f'https://www.bilibili.com/video/av{aid}', - 'categories':[video_data['tname']], - 'view_count':video_data['stat']['viewseo'], - 'comment_count':video_data['stat']['reply'], - 'tags':tags + user_info = self._download_json("https://api.bilibili.com/x/web-interface/nav", video_id, + "Geting Login/User Information.", "Unable to get Login/User Information.") + if user_info['code'] != 0 and user_info['code'] != -101: + self._report_error(user_info) + user_info = user_info['data'] + is_login = user_info['isLogin'] + if is_login: + is_vip = user_info['vipStatus'] + else: + is_vip = 0 + is_durl = False # If return the durl Stream, this will be true + + info = { + 'id': video_id, + 'title': video_data['title'], + 'description': video_data['desc'], + 'timestamp': video_data['ctime'], + 'thumbnail': video_data['pic'], + 'uploader': uploader_data['name'], + 'uploader_id': uploader_data['mid'], + 'duration': video_data['duration'], + 'webpage_url': f'https://www.bilibili.com/video/av{aid}', + 'categories': [video_data['tname']], + 'view_count': video_data['stat']['viewseo'], + 'comment_count': video_data['stat']['reply'], + 'tags': tags } - if video_count==1 : - info.update({'alt_title':video_data['pages'][0]['part']}) + if video_count == 1: + info.update({'alt_title': video_data['pages'][0]['part']}) - new_api=True #Parse video links from webpage first. - first=True #First Part of List - entries=[] + new_api = True # Parse video links from webpage first. + first = True # First Part of List + entries = [] - for part_info in video_data['pages'] : - if part != None and part_info["page"]!=part : + for part_info in video_data['pages']: + if part is not None and part_info["page"] != part: continue - uri=f'https://www.bilibili.com/video/av{aid}?p={part_info["page"]}' - if first : - first=False - else : - webpage=self._download_webpage(uri,f"{video_id} Part{part_info['page']}") - headers={'referer':uri} + uri = f'https://www.bilibili.com/video/av{aid}?p={part_info["page"]}' + if first: + first = False + else: + webpage = self._download_webpage(uri, f"{video_id} Part{part_info['page']}") + headers = {'referer': uri} if new_api: - play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. - if play_info != None : - play_info=json.loads(play_info.groups()[0]) - if play_info['code']!=0 : + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - new_api=False - play_info=self._download_json( + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", "Geting video links.", "Unable to get video links.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - play_info=self._download_json( + play_info = play_info['data'] + else: + play_info = self._download_json( f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", "Geting video links.", "Unable to get video links.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - if 'durl' in play_info: # Stream for flv player - if video_count > 1 and len(play_info['durl']) > 1 and part==None : + play_info = play_info['data'] + if 'durl' in play_info: # Stream for flv player + if video_count > 1 and len(play_info['durl']) > 1 and part is None: self.report_warning( f"There are multiply FLV files in this part. Please input \"{uri}\" to extract it.", f"{video_id} Part{part_info['page']}") continue - is_durl=True - if video_count>1: + is_durl = True + if video_count > 1: info.update({ - 'title':f"{info['title']} - {part_info['part']}", - 'id':f"{video_id} P{part_info['page']}" + 'title': f"{info['title']} - {part_info['part']}", + 'id': f"{video_id} P{part_info['page']}" }) - video_quality=play_info['quality'] - accept_video_quality_desc=play_info['accept_description'] - accept_video_quality=play_info['accept_quality'] - video_desc_dict={} - for i in range(len(accept_video_quality)) : + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + video_desc_dict = {} + for i in range(len(accept_video_quality)): video_desc_dict.update({ - accept_video_quality[i]:accept_video_quality_desc[i] + accept_video_quality[i]: accept_video_quality_desc[i] }) - video_formats={video_quality:play_info['durl']} - video_formats_size={video_quality:self._calculate_size(play_info['durl'])} #Total Filesize Dict - durl_length=[len(play_info['durl'])] - for video_q in accept_video_quality : - if video_q not in video_formats : - if new_api : - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value=f"{video_q}") - webpage=self._download_webpage(uri, - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") - play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. - if play_info != None : - play_info=json.loads(play_info.groups()[0]) - if play_info['code']!=0 : + video_formats = {video_quality: play_info['durl']} + video_formats_size = {video_quality: self._calculate_size(play_info['durl'])} # Total Filesize Dict + durl_length = [len(play_info['durl'])] + for video_q in accept_video_quality: + if video_q not in video_formats: + if new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=f"{video_q}") + webpage = self._download_webpage(uri, + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - new_api=False - play_info=self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - play_info=self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + play_info = play_info['data'] + else: + play_info = self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - if 'durl' in play_info : - video_formats[play_info["quality"]]=play_info['durl'] - video_formats_size[play_info["quality"]]=self._calculate_size(play_info['durl']) + play_info = play_info['data'] + if 'durl' in play_info: + video_formats[play_info["quality"]] = play_info['durl'] + video_formats_size[play_info["quality"]] = self._calculate_size(play_info['durl']) durl_length.append(len(play_info['durl'])) - for i in range(max(durl_length)) : - entry={} + for i in range(max(durl_length)): + entry = {} entry.update(info) - entry.update({'id':f"{info['id']} Part{i+1}"}) - formats_output=[] - for video_q in accept_video_quality : - durl=video_formats[video_q] - if i < len(durl) : - video_format=durl[i] + entry.update({'id': f"{info['id']} Part{i+1}"}) + formats_output = [] + for video_q in accept_video_quality: + durl = video_formats[video_q] + if i < len(durl): + video_format = durl[i] formats_output.append({ - "url":video_format['url'], - "format_id":f"{video_q}", - "ext":"flv", - "http_headers":headers, - "filesize":video_format['size'] + "url": video_format['url'], + "format_id": f"{video_q}", + "ext": "flv", + "http_headers": headers, + "filesize": video_format['size'] }) - entry['formats']=formats_output + entry['formats'] = formats_output entries.append(entry) - elif 'dash' in play_info : # Stream for dash player - video_quality=play_info['quality'] - accept_video_quality_desc=play_info['accept_description'] - accept_video_quality=play_info['accept_quality'] - accept_audio_quality=[] - dash=play_info['dash'] - video_quality_list=[] - video_desc_dict={} - for i in range(len(accept_video_quality)) : + elif 'dash' in play_info: # Stream for dash player + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + accept_audio_quality = [] + dash = play_info['dash'] + video_quality_list = [] + video_desc_dict = {} + for i in range(len(accept_video_quality)): video_desc_dict.update({ - accept_video_quality[i]:accept_video_quality_desc[i] + accept_video_quality[i]: accept_video_quality_desc[i] }) - video_formats={} - for video_format in dash['video'] : - if video_format['codecs'].startswith('hev') : #Let format id increase 1 to distinguish codec - video_quality_list.append(video_format['id']+1) - video_formats[video_format['id']+1]=video_format - else : + video_formats = {} + for video_format in dash['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_quality_list.append(video_format['id'] + 1) + video_formats[video_format['id'] + 1] = video_format + else: video_quality_list.append(video_format['id']) - video_formats[video_format['id']]=video_format - bs=True #Try to get all video formats + video_formats[video_format['id']] = video_format + bs = True # Try to get all video formats while bs: - bs=False - for video_q in accept_video_quality : + bs = False + for video_q in accept_video_quality: if video_q not in video_formats: - if not is_login and video_q <=32 : - bs=True - elif is_vip<1 and video_q<=80 and video_q!=74 : - bs=True - elif is_vip>0: - bs=True - if new_api : - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value=f"{video_q}") - webpage=self._download_webpage(uri, - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") - play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. - if play_info != None : - play_info=json.loads(play_info.groups()[0]) - if play_info['code']!=0 : + if not is_login and video_q <= 32: + bs = True + elif is_vip < 1 and video_q <= 80 and video_q != 74: + bs = True + elif is_vip > 0: + bs = True + if new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=f"{video_q}") + webpage = self._download_webpage(uri, + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - new_api=False - play_info=self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - play_info=self._download_json( + play_info = play_info['data'] + else: + play_info = self._download_json( f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] + play_info = play_info['data'] if 'dash' in play_info: - for video_format in play_info['dash']['video'] : - if video_format['codecs'].startswith('hev') : #Let format id increase 1 to distinguish codec - video_format_q=video_format['id']+1 - else : - video_format_q=video_format['id'] - if video_format_q not in video_formats : + for video_format in play_info['dash']['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_format_q = video_format['id'] + 1 + else: + video_format_q = video_format['id'] + if video_format_q not in video_formats: video_quality_list.append(video_format_q) - video_formats[video_format_q]=video_format - bs=True + video_formats[video_format_q] = video_format + bs = True break - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value="120") - entry={} + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + entry = {} entry.update(info) - formats_output=[] - for i in video_quality_list : - video_format=video_formats[i] + formats_output = [] + for i in video_quality_list: + video_format = video_formats[i] formats_output.append( - {"url":video_format['base_url'], - "ext":"mp4", - "format_note":video_desc_dict[video_format['id']], - "format_id":f"{i}", - "vcodec":video_format['codecs'], - "fps":self._getfps(video_format['frame_rate']), - "width":video_format['width'], - "height":video_format['height'], - "http_headers":headers - }) - if 'audio' in dash and dash['audio']!=None : - for audio_format in dash['audio'] : + {"url": video_format['base_url'], + "ext": "mp4", + "format_note": video_desc_dict[video_format['id']], + "format_id": f"{i}", + "vcodec": video_format['codecs'], + "fps": self._getfps(video_format['frame_rate']), + "width": video_format['width'], + "height": video_format['height'], + "http_headers": headers + }) + if 'audio' in dash and dash['audio'] is not None: + for audio_format in dash['audio']: accept_audio_quality.append(audio_format['id']) - video_formats[audio_format['id']]=audio_format + video_formats[audio_format['id']] = audio_format accept_audio_quality.sort(reverse=True) - for audio_quality in accept_audio_quality : - audio_format=video_formats[audio_quality] + for audio_quality in accept_audio_quality: + audio_format = video_formats[audio_quality] formats_output.append({ - "url":audio_format["base_url"], - "format_id":f"{audio_format['id']}", - "ext":"mp4", - "acodec":audio_format['codecs'], - "http_headers":headers + "url": audio_format["base_url"], + "format_id": f"{audio_format['id']}", + "ext": "mp4", + "acodec": audio_format['codecs'], + "http_headers": headers }) - entry.update({"formats":formats_output}) - if video_count > 1 : - entry.update({"title":f"{info['title']} - {part_info['part']}"}) - entry.update({"id":f"{video_id} P{part_info['page']}"}) + entry.update({"formats": formats_output}) + if video_count > 1: + entry.update({"title": f"{info['title']} - {part_info['part']}"}) + entry.update({"id": f"{video_id} P{part_info['page']}"}) entries.append(entry) - if video_count > 1 : + if video_count > 1: if len(entries) == 1 and not is_durl: info.update({ - 'formats':entries[0]['formats'], - 'id':entries[0]['id'] + 'formats': entries[0]['formats'], + 'id': entries[0]['id'] }) return info info.update({ - "_type":'multi_video', - "entries":entries + "_type": 'multi_video', + "entries": entries }) return info - else : + else: if not is_durl: return entries[0] - else : - if len(entries)>1 : + else: + if len(entries) > 1: info.update({ - "_type":'multi_video', - "entries":entries + "_type": 'multi_video', + "entries": entries }) - else : + else: info.update({ - "formats":entries[0]['formats'] + "formats": entries[0]['formats'] }) return info From e53d77b452233d355efaa43316234e1f4e3e6ae8 Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Tue, 4 Aug 2020 17:07:50 +0800 Subject: [PATCH 03/11] remove useless links --- youtube_dl/extractor/bilibili.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index e8bcfd099..8db8c8c01 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -47,20 +47,6 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - }, { - # Tested in BiliBiliBangumiIE - 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', - 'only_matching': True, - }, { - 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', - 'md5': '3f721ad1e75030cc06faf73587cfec57', - 'info_dict': { - 'id': '100643', - 'ext': 'mp4', - 'title': 'CHAOS;CHILD', - 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', - }, - 'skip': 'Geo-restricted to China', }, { # Title with double quotes 'url': 'http://www.bilibili.com/video/av8903802/', From afbb51d5b094790d6349382aeea49929911d1cc9 Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Tue, 4 Aug 2020 17:10:23 +0800 Subject: [PATCH 04/11] REMOVE Unspport synix in PYTHON2 --- youtube_dl/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8db8c8c01..2cd26bd70 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -101,7 +101,7 @@ class BiliBiliIE(InfoExtractor): else: raise ExtractorError('Can\'t extract Bangumi episode ID') - def _getfps(self, s: str) -> int: + def _getfps(self, s): "convert fps to int" if s.isnumeric(): return int(s) From fa547cfda842df993bb5ce1d2ec7ac025799a09b Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Tue, 4 Aug 2020 17:36:07 +0800 Subject: [PATCH 05/11] REMOVE synax not support( --- youtube_dl/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 2cd26bd70..f91d7fb56 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -113,7 +113,7 @@ class BiliBiliIE(InfoExtractor): else: return 0 - def _calculate_size(self, durl: list) -> int: + def _calculate_size(self, durl): "Calculate total file size." s = 0 for i in durl: From 96cb93d771490e1311b0bc9e257a1e3cadaa1ecf Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Tue, 4 Aug 2020 18:27:25 +0800 Subject: [PATCH 06/11] change string format method --- youtube_dl/extractor/bilibili.py | 81 +++++++++++++++++--------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index f91d7fb56..805d7d433 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -151,7 +151,7 @@ class BiliBiliIE(InfoExtractor): video_count = video_data['videos'] tags_info = self._download_json( - f"https://api.bilibili.com/x/web-interface/view/detail/tag?aid={aid}", video_id, + "https://api.bilibili.com/x/web-interface/view/detail/tag?aid=%s" % (aid), video_id, 'Geting video tags.', 'Unable to get Login/User Information.') if tags_info['code'] != 0: self._report_error(tags_info) @@ -181,7 +181,7 @@ class BiliBiliIE(InfoExtractor): 'uploader': uploader_data['name'], 'uploader_id': uploader_data['mid'], 'duration': video_data['duration'], - 'webpage_url': f'https://www.bilibili.com/video/av{aid}', + 'webpage_url': 'https://www.bilibili.com/video/av%s' % (aid), 'categories': [video_data['tname']], 'view_count': video_data['stat']['viewseo'], 'comment_count': video_data['stat']['reply'], @@ -198,11 +198,11 @@ class BiliBiliIE(InfoExtractor): for part_info in video_data['pages']: if part is not None and part_info["page"] != part: continue - uri = f'https://www.bilibili.com/video/av{aid}?p={part_info["page"]}' + uri = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, part_info["page"]) if first: first = False else: - webpage = self._download_webpage(uri, f"{video_id} Part{part_info['page']}") + webpage = self._download_webpage(uri, "%s Part%s" % (video_id, part_info['page'])) headers = {'referer': uri} if new_api: play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. @@ -214,8 +214,8 @@ class BiliBiliIE(InfoExtractor): else: new_api = False play_info = self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", - f"{video_id} Part{part_info['page']}", + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=120&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], bvid), + "%s Part%s" % (video_id, part_info['page']), "Geting video links.", "Unable to get video links.") if play_info['code'] != 0: @@ -223,8 +223,8 @@ class BiliBiliIE(InfoExtractor): play_info = play_info['data'] else: play_info = self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", - f"{video_id} Part{part_info['page']}", + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=120&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], bvid), + "%s Part%s" % (video_id, part_info['page']), "Geting video links.", "Unable to get video links.") if play_info['code'] != 0: @@ -233,14 +233,14 @@ class BiliBiliIE(InfoExtractor): if 'durl' in play_info: # Stream for flv player if video_count > 1 and len(play_info['durl']) > 1 and part is None: self.report_warning( - f"There are multiply FLV files in this part. Please input \"{uri}\" to extract it.", - f"{video_id} Part{part_info['page']}") + "There are multiply FLV files in this part. Please input \"%s\" to extract it." % (uri), + "%s Part%s" % (video_id, part_info['page'])) continue is_durl = True if video_count > 1: info.update({ - 'title': f"{info['title']} - {part_info['part']}", - 'id': f"{video_id} P{part_info['page']}" + 'title': "%s - %s" % (info['title'], part_info['part']), + 'id': "%s P%s" % (video_id, part_info['page']) }) video_quality = play_info['quality'] accept_video_quality_desc = play_info['accept_description'] @@ -256,11 +256,11 @@ class BiliBiliIE(InfoExtractor): for video_q in accept_video_quality: if video_q not in video_formats: if new_api: - self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=f"{video_q}") + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) webpage = self._download_webpage(uri, - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. if play_info is not None: play_info = json.loads(play_info.groups()[0]) @@ -270,17 +270,19 @@ class BiliBiliIE(InfoExtractor): else: new_api = False play_info = self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) if play_info['code'] != 0: self._report_error(play_info) play_info = play_info['data'] else: play_info = self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) if play_info['code'] != 0: self._report_error(play_info) play_info = play_info['data'] @@ -291,7 +293,7 @@ class BiliBiliIE(InfoExtractor): for i in range(max(durl_length)): entry = {} entry.update(info) - entry.update({'id': f"{info['id']} Part{i+1}"}) + entry.update({'id': "%s Part%s" % (info['id'], i + 1)}) formats_output = [] for video_q in accept_video_quality: durl = video_formats[video_q] @@ -299,7 +301,7 @@ class BiliBiliIE(InfoExtractor): video_format = durl[i] formats_output.append({ "url": video_format['url'], - "format_id": f"{video_q}", + "format_id": str(video_q), "ext": "flv", "http_headers": headers, "filesize": video_format['size'] @@ -338,11 +340,11 @@ class BiliBiliIE(InfoExtractor): elif is_vip > 0: bs = True if new_api: - self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=f"{video_q}") + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) webpage = self._download_webpage(uri, - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. if play_info is not None: play_info = json.loads(play_info.groups()[0]) @@ -352,18 +354,19 @@ class BiliBiliIE(InfoExtractor): else: new_api = False play_info = self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) if play_info['code'] != 0: self._report_error(play_info) play_info = play_info['data'] else: play_info = self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) if play_info['code'] != 0: self._report_error(play_info) play_info = play_info['data'] @@ -388,7 +391,7 @@ class BiliBiliIE(InfoExtractor): {"url": video_format['base_url'], "ext": "mp4", "format_note": video_desc_dict[video_format['id']], - "format_id": f"{i}", + "format_id": str(i), "vcodec": video_format['codecs'], "fps": self._getfps(video_format['frame_rate']), "width": video_format['width'], @@ -404,15 +407,15 @@ class BiliBiliIE(InfoExtractor): audio_format = video_formats[audio_quality] formats_output.append({ "url": audio_format["base_url"], - "format_id": f"{audio_format['id']}", + "format_id": str(audio_format['id']), "ext": "mp4", "acodec": audio_format['codecs'], "http_headers": headers }) entry.update({"formats": formats_output}) if video_count > 1: - entry.update({"title": f"{info['title']} - {part_info['part']}"}) - entry.update({"id": f"{video_id} P{part_info['page']}"}) + entry.update({"title": "%s - %s" % (info['title'], part_info['part'])}) + entry.update({"id": "%s P%s" % (video_id, part_info['page'])}) entries.append(entry) if video_count > 1: From 6532b27c71af7a51e75f46a32e473ad597cea2fe Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Tue, 4 Aug 2020 18:37:29 +0800 Subject: [PATCH 07/11] display format_note when extractor dash streams --- youtube_dl/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 805d7d433..d9f362a74 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -302,6 +302,7 @@ class BiliBiliIE(InfoExtractor): formats_output.append({ "url": video_format['url'], "format_id": str(video_q), + "format_note": video_desc_dict[video_q], "ext": "flv", "http_headers": headers, "filesize": video_format['size'] From ac144ed961010e19f718539767c58593e2027a9f Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Wed, 5 Aug 2020 06:21:38 +0800 Subject: [PATCH 08/11] fix typo and small bugs in code. --- youtube_dl/extractor/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index d9f362a74..c64013753 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -152,7 +152,7 @@ class BiliBiliIE(InfoExtractor): tags_info = self._download_json( "https://api.bilibili.com/x/web-interface/view/detail/tag?aid=%s" % (aid), video_id, - 'Geting video tags.', 'Unable to get Login/User Information.') + 'Geting video tags.', 'Unable to get video tags.') if tags_info['code'] != 0: self._report_error(tags_info) tags_info = tags_info['data'] @@ -290,6 +290,7 @@ class BiliBiliIE(InfoExtractor): video_formats[play_info["quality"]] = play_info['durl'] video_formats_size[play_info["quality"]] = self._calculate_size(play_info['durl']) durl_length.append(len(play_info['durl'])) + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") for i in range(max(durl_length)): entry = {} entry.update(info) From 6b8d4e09c126bd4bd5a471a7fb7141523ee27c6d Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Wed, 5 Aug 2020 14:56:02 +0800 Subject: [PATCH 09/11] Support to extract bangumi --- youtube_dl/extractor/bilibili.py | 478 +++++++++++++++++++++++++++---- 1 file changed, 426 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index c64013753..bd379cc20 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -449,70 +449,444 @@ class BiliBiliIE(InfoExtractor): class BiliBiliBangumiIE(InfoExtractor): - _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www)\.)? + bilibili\.(?:tv|com)/ + (?: + (?: + bangumi/play/[sS][sS] + )(?P<ssid>\d+)| + bangumi/play/[eE][pP](?P<epid>\d+) + ) + ''' - IE_NAME = 'bangumi.bilibili.com' + IE_NAME = 'bilibili bangumi' IE_DESC = 'BiliBili番剧' - _TESTS = [{ - 'url': 'http://bangumi.bilibili.com/anime/1869', - 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - }, - 'playlist_count': 26, - }, { - 'url': 'http://bangumi.bilibili.com/anime/1869', - 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - }, - 'playlist': [{ - 'md5': '91da8621454dd58316851c27c68b0c13', - 'info_dict': { - 'id': '40062', - 'ext': 'mp4', - 'title': '混沌武士', - 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', - 'timestamp': 1414538739, - 'upload_date': '20141028', - 'episode': '疾风怒涛 Tempestuous Temperaments', - 'episode_number': 1, - }, - }], - 'params': { - 'playlist_items': '1', - }, - }] + _TESTS = [] - @classmethod + @ classmethod def suitable(cls, url): return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) + def _get_episode_list(self, bangumi_info): + ep_list = bangumi_info['epList'] + episode_list = [] + for i in ep_list: + temp = {} + temp.update(i) + episode_list.append(temp) + if 'sections' in bangumi_info: + for section in bangumi_info['sections']: + for i in section['epList']: + temp = {} + temp.update(i) + temp.update({ + "section_title": section['title'], + "section_id": section['id'] + }) + episode_list.append(temp) + return episode_list + + def _report_error(self, error): + if 'message' in error: + raise ExtractorError(error['message']) + elif 'code' in error: + raise ExtractorError(str(error['code'])) + else: + raise ExtractorError(str(error)) + + def _report_warning(self, warning, video_id=None): + if 'message' in warning: + self.report_warning(warning['message'], video_id) + elif 'code' in warning: + self.report_warning(str(warning['code']), video_id) + else: + self.report_warning(str(warning), video_id) + + def _calculate_size(self, durl): + "Calculate total file size." + s = 0 + for i in durl: + s = s + i['size'] + return s + + def _getfps(self, s): + "convert fps to int" + if s.isnumeric(): + return int(s) + else: + r = re.search(r"([0-9]+)/([0-9]+)", s) + if r is not None: + r = r.groups() + return int(r[0]) / int(r[1]) + else: + return 0 + + def _extract_episode(self, episode_info): + epid = episode_info['id'] + uri = "https://www.bilibili.com/bangumi/play/ep%s" % (epid) + if self._epid is None: + video_id = "%s %s" % (self._video_id, episode_info['titleFormat']) + if self._first: + webpage = self._webpage + self._first = False + else: + webpage = self._download_webpage(uri, video_id) + headers = {'referer': uri} + if self._new_api: + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + self._new_api = False + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=120&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], episode_info['bvid'], epid), + video_id, + "Geting video links.", + "Unable to get video links.", + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._new_api = True + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + else: + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=120&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], episode_info['bvid'], epid), + video_id, + "Geting video links.", + "Unable to get video links.", + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + if 'durl' in play_info: # Stream for flv player + if self._video_count > 1 and len(play_info['durl']) > 1 and self._epid is None: + self._report_warning( + "There are multiply FLV files in this episode. Please input \"%s\" to extract it." % (uri), + video_id) + return + self._is_durl = True + if self._epid is not None: + self._info.update({ + "title": "%s - %s %s" % (self._info['title'], episode_info['titleFormat'], episode_info['longTitle']), + "id": video_id, + "episode": episode_info['longTitle'], + "episode_id": episode_info['id'] + }) + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + video_desc_dict = {} + for i in range(len(accept_video_quality)): + video_desc_dict.update({ + accept_video_quality[i]: accept_video_quality_desc[i] + }) + video_formats = {video_quality: play_info['durl']} + video_formats_size = {video_quality: self._calculate_size(play_info['durl'])} + durl_length = [len(play_info['durl'])] + for video_q in accept_video_quality: + if video_q not in video_formats: + if self._new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) + webpage = self._download_webpage( + uri, + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + self._new_api = False + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._new_api = True + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + else: + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + if 'durl' in play_info: + video_formats[play_info["quality"]] = play_info['durl'] + video_formats_size[play_info["quality"]] = self._calculate_size(play_info['durl']) + durl_length.append(len(play_info['durl'])) + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + for i in range(max(durl_length)): + entry = {} + entry.update(self._info) + if self._epid is None: + entry.update({ + "title": "%s - %s %s" % (self._info['title'], episode_info['titleFormat'], episode_info['longTitle']), + "id": video_id, + "episode": episode_info['longTitle'], + "episode_id": episode_info['id'] + }) + else: + entry.update({ + "id": "%s Part%s" % (video_id, i + 1) + }) + formats_output = [] + for video_q in accept_video_quality: + durl = video_formats[video_q] + if i < len(durl): + video_format = durl[i] + formats_output.append({ + "url": video_format['url'], + "format_id": str(video_q), + "format_note": video_desc_dict[video_q], + "ext": "flv", + "http_headers": headers, + "filesize": video_format['size'] + }) + entry['formats'] = formats_output + self._entries.append(entry) + elif 'dash' in play_info: # Stream for dash player + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + accept_audio_quality = [] + dash = play_info['dash'] + video_quality_list = [] + video_desc_dict = {} + for i in range(len(accept_video_quality)): + video_desc_dict.update({ + accept_video_quality[i]: accept_video_quality_desc[i] + }) + video_formats = {} + for video_format in dash['video']: + if video_format['codecs'].startswith('hev'): + video_quality_list.append(video_format['id'] + 1) + video_formats[video_format['id'] + 1] = video_format + else: + video_quality_list.append(video_format['id']) + video_formats[video_format['id']] = video_format + bs = True # Try to get all video formats + while bs: + bs = False + for video_q in accept_video_quality: + if video_q not in video_formats: + if not self._is_login and video_q <= 32: + bs = True + elif self._is_vip < 1 and video_q <= 80 and video_q != 74: + bs = True + elif self._is_vip > 0: + bs = True + if self._new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) + webpage = self._download_webpage( + uri, + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + self._new_api = False + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._new_api = True + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + else: + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + if 'dash' in play_info: + for video_format in play_info['dash']['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_format_q = video_format['id'] + 1 + else: + video_format_q = video_format['id'] + if video_format_q not in video_formats: + video_quality_list.append(video_format_q) + video_formats[video_format_q] = video_format + bs = True + break + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + entry = {} + entry.update(self._info) + entry.update({ + "title": "%s - %s %s" % (self._info['title'], episode_info['titleFormat'], episode_info['longTitle']), + "id": video_id, + "episode": episode_info['longTitle'], + "episode_id": episode_info['id'] + }) + formats_output = [] + for i in video_quality_list: + video_format = video_formats[i] + formats_output.append({ + "url": video_format['base_url'], + "ext": "mp4", + "format_note": video_desc_dict[video_format['id']], + "format_id": str(i), + "vcodec": video_format['codecs'], + "fps": self._getfps(video_format['frame_rate']), + "width": video_format['width'], + "height": video_format['height'], + "http_headers": headers + }) + if 'audio' in dash and dash['audio'] is not None: + for audio_format in dash['audio']: + accept_audio_quality.append(audio_format['id']) + video_formats[audio_format['id']] = audio_format + accept_audio_quality.sort(reverse=True) + for audio_quality in accept_audio_quality: + audio_format = video_formats[audio_quality] + formats_output.append({ + "url": audio_format["base_url"], + "format_id": str(audio_format['id']), + "ext": "mp4", + "acodec": audio_format['codecs'], + "http_headers": headers + }) + entry.update({"formats": formats_output}) + self._entries.append(entry) + def _real_extract(self, url): - bangumi_id = self._match_id(url) + url, smuggled_data = unsmuggle_url(url, {}) - # Sometimes this API returns a JSONP response - season_info = self._download_json( - 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, - bangumi_id, transform_source=strip_jsonp)['result'] + mobj = re.match(self._VALID_URL, url) + ssid = mobj.group('ssid') + epid = mobj.group('epid') + video_id = ssid or epid + if ssid is not None: + ssid = int(ssid) + video_id = "ss" + video_id + if epid is not None: + epid = int(epid) + video_id = "ep" + video_id - entries = [{ - '_type': 'url_transparent', - 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), - 'ie_key': BiliBiliIE.ie_key(), - 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), - 'episode': episode.get('index_title'), - 'episode_number': int_or_none(episode.get('index')), - } for episode in season_info['episodes']] + # Set Cookies need to parse the Links. + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") # Set default video quality + self._set_cookie(domain=".bilibili.com", name="CURRENT_FNVAL", value="16") + self._set_cookie(domain=".bilibili.com", name="laboratory", value="1-1") # Use new webpage API + self._set_cookie(domain=".bilibili.com", name="stardustvideo", value="1") - entries = sorted(entries, key=lambda entry: entry.get('episode_number')) + webpage = self._download_webpage(url, video_id) - return self.playlist_result( - entries, bangumi_id, - season_info.get('bangumi_title'), season_info.get('evaluate')) + bangumi_info = re.search(r"window\.__INITIAL_STATE__=([^;]+)", webpage, re.I) + if bangumi_info is not None: + bangumi_info = json.loads(bangumi_info.groups()[0]) + else: + raise ExtractorError("Can not find the bangumi.") + media_info = bangumi_info['mediaInfo'] + if ssid is None: + ssid = int(media_info['ssId']) + + user_info = self._download_json( + "https://api.bilibili.com/x/web-interface/nav", video_id, + "Geting Login/User Information.", "Unable to get Login/User Information.") + if user_info['code'] != 0 and user_info['code'] != -101: + self._report_error(user_info) + user_info = user_info['data'] + self._is_login = user_info['isLogin'] + if self._is_login: + self._is_vip = user_info['vipStatus'] + else: + self._is_vip = 0 + self._is_durl = False # If return the durl Stream, this will be true + + self._info = { + "series": media_info['series'], + "title": media_info['title'], + "season": media_info['title'], + "season_id": media_info['ssId'], + "id": video_id, + "thumbnail": "https:" + media_info['cover'], + "description": media_info['evaluate'], + "uploader": media_info['upInfo']['name'], + "release_date": media_info['pub']['time'][0:4] + media_info['pub']['time'][5:7] + media_info['pub']['time'][8:10], + "uploader_id": media_info['upInfo']['mid'], + "view_count": media_info['stat']['views'], + "like_count": media_info['stat']['favorites'], + "comment_count": media_info['stat']['reply'], + "webpage_url": "https://www.bilibili.com/bangumi/play/%s" % (video_id) + } + + ep_list = self._get_episode_list(bangumi_info) + if epid is not None: + ep_info = None + for ep in ep_list: + if ep['id'] == epid: + ep_info = ep + break + if ep_info is None: + self._report_error("Can not find the infomation of ep%s." % (epid)) + self._video_count = len(ep_list) + self._new_api = True # Parse video links from webpage first. + self._first = True # First Part + self._webpage = webpage + self._video_id = video_id + self._epid = epid + self._entries = [] + if epid is not None: + self._extract_episode(ep_info) + else: + for ep_info in ep_list: + self._extract_episode(ep_info) + + if epid is None: + self._info.update({ + "_type": 'multi_video', + 'entries': self._entries + }) + return self._info + else: + if len(self._entries) == 1: + return self._entries[0] + else: + self._info.update({ + "_type": 'multi_video', + 'entries': self._entries + }) + return self._info class BilibiliAudioBaseIE(InfoExtractor): From ca4d8aeab31f1257e228bc1f12b3f491703291e8 Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Wed, 5 Aug 2020 15:02:33 +0800 Subject: [PATCH 10/11] remove not use import --- youtube_dl/extractor/bilibili.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index bd379cc20..68f95be7f 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -11,10 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, - parse_iso8601, - smuggle_url, str_or_none, - strip_jsonp, unsmuggle_url, ) From 650850772504835f54e9dcb8cb1f13703e4955ba Mon Sep 17 00:00:00 2001 From: lifegpc <g1710431395@gmail.com> Date: Wed, 5 Aug 2020 17:35:40 +0800 Subject: [PATCH 11/11] Complete redirect to Bangumi episode ID. Fix bug --- youtube_dl/extractor/bilibili.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 68f95be7f..6f774999c 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -139,8 +139,20 @@ class BiliBiliIE(InfoExtractor): if video_info is not None: video_info = json.loads(video_info.groups()[0]) else: - self._report_error("") # TODO Should redirect to Bangumi episode ID. - # https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=&jsonp=jsonp have redirect links. + if mobj.group('id') is not None: + uri = "https://api.bilibili.com/x/web-interface/view/detail?bvid=BV%s&aid=&jsonp=jsonp" % (video_id) + else: + uri = "https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=%s&jsonp=jsonp" % (video_id) + redriect_info = self._download_json( + uri, video_id, + "Geting redriect information.", "Unable to get redriect information.") + if redriect_info['code'] != 0: + self._report_error(redriect_info) + redriect_info = redriect_info['data'] + if 'View' in redriect_info and 'redirect_url' in redriect_info['View']: + return self.url_result(redriect_info['View']['redirect_url']) + else: + raise ExtractorError("Can not find redirect URL.") video_data = video_info['videoData'] uploader_data = video_info['upData'] aid = video_data['aid'] @@ -526,6 +538,8 @@ class BiliBiliBangumiIE(InfoExtractor): uri = "https://www.bilibili.com/bangumi/play/ep%s" % (epid) if self._epid is None: video_id = "%s %s" % (self._video_id, episode_info['titleFormat']) + else: + video_id = self._video_id if self._first: webpage = self._webpage self._first = False