diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5928dcea3..e8bcfd099 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,26 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib import re import json from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_urlparse, ) from ..utils import ( ExtractorError, int_or_none, - float_or_none, parse_iso8601, smuggle_url, str_or_none, strip_jsonp, - unified_timestamp, unsmuggle_url, - urlencode_postdata, ) @@ -120,23 +115,23 @@ class BiliBiliIE(InfoExtractor): else: raise ExtractorError('Can\'t extract Bangumi episode ID') - def _getfps(self,s:str)->int: + def _getfps(self, s: str) -> int: "convert fps to int" - if s.isnumeric() : + if s.isnumeric(): return int(s) - else : - r=re.search(r"([0-9]+)/([0-9]+)",s) - if r!=None : - r=r.groups() - return int(r[0])/int(r[1]) - else : + else: + r = re.search(r"([0-9]+)/([0-9]+)", s) + if r is not None: + r = r.groups() + return int(r[0]) / int(r[1]) + else: return 0 - - def _calculate_size(self,durl:list) -> int : + + def _calculate_size(self, durl: list) -> int: "Calculate total file size." - s=0 - for i in durl : - s=s+i['size'] + s = 0 + for i in durl: + s = s + i['size'] return s def _real_extract(self, url): @@ -144,320 +139,320 @@ class BiliBiliIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') or mobj.group('id_bv') - query=compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - part=None - if 'p' in query and str(query['p'][0]).isnumeric() : - part=int(query['p'][0]) + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + part = None + if 'p' in query and str(query['p'][0]).isnumeric(): + part = int(query['p'][0]) - #Set Cookies need to parse the Links. - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value="120")#Set default video quality - self._set_cookie(domain=".bilibili.com",name="CURRENT_FNVAL",value="16") - self._set_cookie(domain=".bilibili.com",name="laboratory",value="1-1")#Use new webpage API - self._set_cookie(domain=".bilibili.com",name="stardustvideo",value="1") + # Set Cookies need to parse the Links. + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") # Set default video quality + self._set_cookie(domain=".bilibili.com", name="CURRENT_FNVAL", value="16") + self._set_cookie(domain=".bilibili.com", name="laboratory", value="1-1") # Use new webpage API + self._set_cookie(domain=".bilibili.com", name="stardustvideo", value="1") webpage = self._download_webpage(url, video_id) - video_info=re.search(r"window\.__INITIAL_STATE__=([^;]+)",webpage,re.I) - if video_info != None : - video_info=json.loads(video_info.groups()[0]) - else : - self._report_error("") #TODO Should redirect to Bangumi episode ID. - #https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=&jsonp=jsonp have redirect links. - video_data=video_info['videoData'] - uploader_data=video_info['upData'] - aid=video_data['aid'] - bvid=video_data['bvid'] - video_count=video_data['videos'] + video_info = re.search(r"window\.__INITIAL_STATE__=([^;]+)", webpage, re.I) + if video_info is not None: + video_info = json.loads(video_info.groups()[0]) + else: + self._report_error("") # TODO Should redirect to Bangumi episode ID. + # https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=&jsonp=jsonp have redirect links. + video_data = video_info['videoData'] + uploader_data = video_info['upData'] + aid = video_data['aid'] + bvid = video_data['bvid'] + video_count = video_data['videos'] - tags_info=self._download_json( - f"https://api.bilibili.com/x/web-interface/view/detail/tag?aid={aid}",video_id, - 'Geting video tags.','Unable to get Login/User Information.') - if tags_info['code']!=0 : + tags_info = self._download_json( + f"https://api.bilibili.com/x/web-interface/view/detail/tag?aid={aid}", video_id, + 'Geting video tags.', 'Unable to get Login/User Information.') + if tags_info['code'] != 0: self._report_error(tags_info) - tags_info=tags_info['data'] - tags=[] - for i in tags_info : + tags_info = tags_info['data'] + tags = [] + for i in tags_info: tags.append(i['tag_name']) - - user_info=self._download_json("https://api.bilibili.com/x/web-interface/nav",video_id, - "Geting Login/User Information.","Unable to get Login/User Information.") - if user_info['code']!=0 and user_info['code']!=-101 : - self._report_error(user_info) - user_info=user_info['data'] - is_login=user_info['isLogin'] - if is_login: - is_vip=user_info['vipStatus'] - else : - is_vip=0 - is_durl=False # If return the durl Stream, this will be true - info={ - 'id':video_id, - 'title':video_data['title'], - 'description':video_data['desc'], - 'timestamp':video_data['ctime'], - 'thumbnail':video_data['pic'], - 'uploader':uploader_data['name'], - 'uploader_id':uploader_data['mid'], - 'duration':video_data['duration'], - 'webpage_url':f'https://www.bilibili.com/video/av{aid}', - 'categories':[video_data['tname']], - 'view_count':video_data['stat']['viewseo'], - 'comment_count':video_data['stat']['reply'], - 'tags':tags + user_info = self._download_json("https://api.bilibili.com/x/web-interface/nav", video_id, + "Geting Login/User Information.", "Unable to get Login/User Information.") + if user_info['code'] != 0 and user_info['code'] != -101: + self._report_error(user_info) + user_info = user_info['data'] + is_login = user_info['isLogin'] + if is_login: + is_vip = user_info['vipStatus'] + else: + is_vip = 0 + is_durl = False # If return the durl Stream, this will be true + + info = { + 'id': video_id, + 'title': video_data['title'], + 'description': video_data['desc'], + 'timestamp': video_data['ctime'], + 'thumbnail': video_data['pic'], + 'uploader': uploader_data['name'], + 'uploader_id': uploader_data['mid'], + 'duration': video_data['duration'], + 'webpage_url': f'https://www.bilibili.com/video/av{aid}', + 'categories': [video_data['tname']], + 'view_count': video_data['stat']['viewseo'], + 'comment_count': video_data['stat']['reply'], + 'tags': tags } - if video_count==1 : - info.update({'alt_title':video_data['pages'][0]['part']}) + if video_count == 1: + info.update({'alt_title': video_data['pages'][0]['part']}) - new_api=True #Parse video links from webpage first. - first=True #First Part of List - entries=[] + new_api = True # Parse video links from webpage first. + first = True # First Part of List + entries = [] - for part_info in video_data['pages'] : - if part != None and part_info["page"]!=part : + for part_info in video_data['pages']: + if part is not None and part_info["page"] != part: continue - uri=f'https://www.bilibili.com/video/av{aid}?p={part_info["page"]}' - if first : - first=False - else : - webpage=self._download_webpage(uri,f"{video_id} Part{part_info['page']}") - headers={'referer':uri} + uri = f'https://www.bilibili.com/video/av{aid}?p={part_info["page"]}' + if first: + first = False + else: + webpage = self._download_webpage(uri, f"{video_id} Part{part_info['page']}") + headers = {'referer': uri} if new_api: - play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. - if play_info != None : - play_info=json.loads(play_info.groups()[0]) - if play_info['code']!=0 : + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - new_api=False - play_info=self._download_json( + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", "Geting video links.", "Unable to get video links.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - play_info=self._download_json( + play_info = play_info['data'] + else: + play_info = self._download_json( f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn=120&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", "Geting video links.", "Unable to get video links.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - if 'durl' in play_info: # Stream for flv player - if video_count > 1 and len(play_info['durl']) > 1 and part==None : + play_info = play_info['data'] + if 'durl' in play_info: # Stream for flv player + if video_count > 1 and len(play_info['durl']) > 1 and part is None: self.report_warning( f"There are multiply FLV files in this part. Please input \"{uri}\" to extract it.", f"{video_id} Part{part_info['page']}") continue - is_durl=True - if video_count>1: + is_durl = True + if video_count > 1: info.update({ - 'title':f"{info['title']} - {part_info['part']}", - 'id':f"{video_id} P{part_info['page']}" + 'title': f"{info['title']} - {part_info['part']}", + 'id': f"{video_id} P{part_info['page']}" }) - video_quality=play_info['quality'] - accept_video_quality_desc=play_info['accept_description'] - accept_video_quality=play_info['accept_quality'] - video_desc_dict={} - for i in range(len(accept_video_quality)) : + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + video_desc_dict = {} + for i in range(len(accept_video_quality)): video_desc_dict.update({ - accept_video_quality[i]:accept_video_quality_desc[i] + accept_video_quality[i]: accept_video_quality_desc[i] }) - video_formats={video_quality:play_info['durl']} - video_formats_size={video_quality:self._calculate_size(play_info['durl'])} #Total Filesize Dict - durl_length=[len(play_info['durl'])] - for video_q in accept_video_quality : - if video_q not in video_formats : - if new_api : - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value=f"{video_q}") - webpage=self._download_webpage(uri, - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") - play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. - if play_info != None : - play_info=json.loads(play_info.groups()[0]) - if play_info['code']!=0 : + video_formats = {video_quality: play_info['durl']} + video_formats_size = {video_quality: self._calculate_size(play_info['durl'])} # Total Filesize Dict + durl_length = [len(play_info['durl'])] + for video_q in accept_video_quality: + if video_q not in video_formats: + if new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=f"{video_q}") + webpage = self._download_webpage(uri, + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - new_api=False - play_info=self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - play_info=self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + play_info = play_info['data'] + else: + play_info = self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - if 'durl' in play_info : - video_formats[play_info["quality"]]=play_info['durl'] - video_formats_size[play_info["quality"]]=self._calculate_size(play_info['durl']) + play_info = play_info['data'] + if 'durl' in play_info: + video_formats[play_info["quality"]] = play_info['durl'] + video_formats_size[play_info["quality"]] = self._calculate_size(play_info['durl']) durl_length.append(len(play_info['durl'])) - for i in range(max(durl_length)) : - entry={} + for i in range(max(durl_length)): + entry = {} entry.update(info) - entry.update({'id':f"{info['id']} Part{i+1}"}) - formats_output=[] - for video_q in accept_video_quality : - durl=video_formats[video_q] - if i < len(durl) : - video_format=durl[i] + entry.update({'id': f"{info['id']} Part{i+1}"}) + formats_output = [] + for video_q in accept_video_quality: + durl = video_formats[video_q] + if i < len(durl): + video_format = durl[i] formats_output.append({ - "url":video_format['url'], - "format_id":f"{video_q}", - "ext":"flv", - "http_headers":headers, - "filesize":video_format['size'] + "url": video_format['url'], + "format_id": f"{video_q}", + "ext": "flv", + "http_headers": headers, + "filesize": video_format['size'] }) - entry['formats']=formats_output + entry['formats'] = formats_output entries.append(entry) - elif 'dash' in play_info : # Stream for dash player - video_quality=play_info['quality'] - accept_video_quality_desc=play_info['accept_description'] - accept_video_quality=play_info['accept_quality'] - accept_audio_quality=[] - dash=play_info['dash'] - video_quality_list=[] - video_desc_dict={} - for i in range(len(accept_video_quality)) : + elif 'dash' in play_info: # Stream for dash player + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + accept_audio_quality = [] + dash = play_info['dash'] + video_quality_list = [] + video_desc_dict = {} + for i in range(len(accept_video_quality)): video_desc_dict.update({ - accept_video_quality[i]:accept_video_quality_desc[i] + accept_video_quality[i]: accept_video_quality_desc[i] }) - video_formats={} - for video_format in dash['video'] : - if video_format['codecs'].startswith('hev') : #Let format id increase 1 to distinguish codec - video_quality_list.append(video_format['id']+1) - video_formats[video_format['id']+1]=video_format - else : + video_formats = {} + for video_format in dash['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_quality_list.append(video_format['id'] + 1) + video_formats[video_format['id'] + 1] = video_format + else: video_quality_list.append(video_format['id']) - video_formats[video_format['id']]=video_format - bs=True #Try to get all video formats + video_formats[video_format['id']] = video_format + bs = True # Try to get all video formats while bs: - bs=False - for video_q in accept_video_quality : + bs = False + for video_q in accept_video_quality: if video_q not in video_formats: - if not is_login and video_q <=32 : - bs=True - elif is_vip<1 and video_q<=80 and video_q!=74 : - bs=True - elif is_vip>0: - bs=True - if new_api : - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value=f"{video_q}") - webpage=self._download_webpage(uri, - f"{video_id} Part{part_info['page']}", - f"Geting video links for format id : {video_q}.", - f"Unable to get video links for format id : {video_q}.") - play_info=re.search(r"window\.__playinfo__=([^<]+)",webpage,re.I) #Get video links from webpage. - if play_info != None : - play_info=json.loads(play_info.groups()[0]) - if play_info['code']!=0 : + if not is_login and video_q <= 32: + bs = True + elif is_vip < 1 and video_q <= 80 and video_q != 74: + bs = True + elif is_vip > 0: + bs = True + if new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=f"{video_q}") + webpage = self._download_webpage(uri, + f"{video_id} Part{part_info['page']}", + f"Geting video links for format id : {video_q}.", + f"Unable to get video links for format id : {video_q}.") + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - new_api=False - play_info=self._download_json( - f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16",f"{video_id} Part{part_info['page']}", + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] - else : - play_info=self._download_json( + play_info = play_info['data'] + else: + play_info = self._download_json( f"https://api.bilibili.com/x/player/playurl?cid={part_info['cid']}&qn={video_q}&otype=json&bvid={bvid}&fnver=0&fnval=16", f"{video_id} Part{part_info['page']}", f"Geting video links for format id : {video_q}.", f"Unable to get video links for format id : {video_q}.") - if play_info['code']!=0 : + if play_info['code'] != 0: self._report_error(play_info) - play_info=play_info['data'] + play_info = play_info['data'] if 'dash' in play_info: - for video_format in play_info['dash']['video'] : - if video_format['codecs'].startswith('hev') : #Let format id increase 1 to distinguish codec - video_format_q=video_format['id']+1 - else : - video_format_q=video_format['id'] - if video_format_q not in video_formats : + for video_format in play_info['dash']['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_format_q = video_format['id'] + 1 + else: + video_format_q = video_format['id'] + if video_format_q not in video_formats: video_quality_list.append(video_format_q) - video_formats[video_format_q]=video_format - bs=True + video_formats[video_format_q] = video_format + bs = True break - self._set_cookie(domain=".bilibili.com",name="CURRENT_QUALITY",value="120") - entry={} + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + entry = {} entry.update(info) - formats_output=[] - for i in video_quality_list : - video_format=video_formats[i] + formats_output = [] + for i in video_quality_list: + video_format = video_formats[i] formats_output.append( - {"url":video_format['base_url'], - "ext":"mp4", - "format_note":video_desc_dict[video_format['id']], - "format_id":f"{i}", - "vcodec":video_format['codecs'], - "fps":self._getfps(video_format['frame_rate']), - "width":video_format['width'], - "height":video_format['height'], - "http_headers":headers - }) - if 'audio' in dash and dash['audio']!=None : - for audio_format in dash['audio'] : + {"url": video_format['base_url'], + "ext": "mp4", + "format_note": video_desc_dict[video_format['id']], + "format_id": f"{i}", + "vcodec": video_format['codecs'], + "fps": self._getfps(video_format['frame_rate']), + "width": video_format['width'], + "height": video_format['height'], + "http_headers": headers + }) + if 'audio' in dash and dash['audio'] is not None: + for audio_format in dash['audio']: accept_audio_quality.append(audio_format['id']) - video_formats[audio_format['id']]=audio_format + video_formats[audio_format['id']] = audio_format accept_audio_quality.sort(reverse=True) - for audio_quality in accept_audio_quality : - audio_format=video_formats[audio_quality] + for audio_quality in accept_audio_quality: + audio_format = video_formats[audio_quality] formats_output.append({ - "url":audio_format["base_url"], - "format_id":f"{audio_format['id']}", - "ext":"mp4", - "acodec":audio_format['codecs'], - "http_headers":headers + "url": audio_format["base_url"], + "format_id": f"{audio_format['id']}", + "ext": "mp4", + "acodec": audio_format['codecs'], + "http_headers": headers }) - entry.update({"formats":formats_output}) - if video_count > 1 : - entry.update({"title":f"{info['title']} - {part_info['part']}"}) - entry.update({"id":f"{video_id} P{part_info['page']}"}) + entry.update({"formats": formats_output}) + if video_count > 1: + entry.update({"title": f"{info['title']} - {part_info['part']}"}) + entry.update({"id": f"{video_id} P{part_info['page']}"}) entries.append(entry) - if video_count > 1 : + if video_count > 1: if len(entries) == 1 and not is_durl: info.update({ - 'formats':entries[0]['formats'], - 'id':entries[0]['id'] + 'formats': entries[0]['formats'], + 'id': entries[0]['id'] }) return info info.update({ - "_type":'multi_video', - "entries":entries + "_type": 'multi_video', + "entries": entries }) return info - else : + else: if not is_durl: return entries[0] - else : - if len(entries)>1 : + else: + if len(entries) > 1: info.update({ - "_type":'multi_video', - "entries":entries + "_type": 'multi_video', + "entries": entries }) - else : + else: info.update({ - "formats":entries[0]['formats'] + "formats": entries[0]['formats'] }) return info