diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4dc597e16..6f774999c 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,37 +1,29 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib import re +import json from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_urlparse, ) from ..utils import ( ExtractorError, int_or_none, - float_or_none, - parse_iso8601, - smuggle_url, str_or_none, - strip_jsonp, - unified_timestamp, unsmuggle_url, - urlencode_postdata, ) class BiliBiliIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?:(?:www|bangumi)\.)? + (?:(?:www)\.)? bilibili\.(?:tv|com)/ (?: (?: - video/[aA][vV]| - anime/(?P\d+)/play\# + video/[aA][vV] )(?P\d+)| video/[bB][vV](?P[^/?#&]+) ) @@ -52,20 +44,6 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - }, { - # Tested in BiliBiliBangumiIE - 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', - 'only_matching': True, - }, { - 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', - 'md5': '3f721ad1e75030cc06faf73587cfec57', - 'info_dict': { - 'id': '100643', - 'ext': 'mp4', - 'title': 'CHAOS;CHILD', - 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', - }, - 'skip': 'Geo-restricted to China', }, { # Title with double quotes 'url': 'http://www.bilibili.com/video/av8903802/', @@ -120,208 +98,806 @@ class BiliBiliIE(InfoExtractor): else: raise ExtractorError('Can\'t extract Bangumi episode ID') + def _getfps(self, s): + "convert fps to int" + if s.isnumeric(): + return int(s) + else: + r = re.search(r"([0-9]+)/([0-9]+)", s) + if r is not None: + r = r.groups() + return int(r[0]) / int(r[1]) + else: + return 0 + + def _calculate_size(self, durl): + "Calculate total file size." + s = 0 + for i in durl: + s = s + i['size'] + return s + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') or mobj.group('id_bv') - anime_id = mobj.group('anime_id') + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + part = None + if 'p' in query and str(query['p'][0]).isnumeric(): + part = int(query['p'][0]) + + # Set Cookies need to parse the Links. + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") # Set default video quality + self._set_cookie(domain=".bilibili.com", name="CURRENT_FNVAL", value="16") + self._set_cookie(domain=".bilibili.com", name="laboratory", value="1-1") # Use new webpage API + self._set_cookie(domain=".bilibili.com", name="stardustvideo", value="1") + webpage = self._download_webpage(url, video_id) - if 'anime/' not in url: - cid = self._search_regex( - r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', - default=None - ) or compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', - r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] + video_info = re.search(r"window\.__INITIAL_STATE__=([^;]+)", webpage, re.I) + if video_info is not None: + video_info = json.loads(video_info.groups()[0]) else: - if 'no_bangumi_tip' not in smuggled_data: - self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( - video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) - headers = { - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': url - } - headers.update(self.geo_verification_headers()) + if mobj.group('id') is not None: + uri = "https://api.bilibili.com/x/web-interface/view/detail?bvid=BV%s&aid=&jsonp=jsonp" % (video_id) + else: + uri = "https://api.bilibili.com/x/web-interface/view/detail?bvid=&aid=%s&jsonp=jsonp" % (video_id) + redriect_info = self._download_json( + uri, video_id, + "Geting redriect information.", "Unable to get redriect information.") + if redriect_info['code'] != 0: + self._report_error(redriect_info) + redriect_info = redriect_info['data'] + if 'View' in redriect_info and 'redirect_url' in redriect_info['View']: + return self.url_result(redriect_info['View']['redirect_url']) + else: + raise ExtractorError("Can not find redirect URL.") + video_data = video_info['videoData'] + uploader_data = video_info['upData'] + aid = video_data['aid'] + bvid = video_data['bvid'] + video_count = video_data['videos'] - js = self._download_json( - 'http://bangumi.bilibili.com/web_api/get_source', video_id, - data=urlencode_postdata({'episode_id': video_id}), - headers=headers) - if 'result' not in js: - self._report_error(js) - cid = js['result']['cid'] + tags_info = self._download_json( + "https://api.bilibili.com/x/web-interface/view/detail/tag?aid=%s" % (aid), video_id, + 'Geting video tags.', 'Unable to get video tags.') + if tags_info['code'] != 0: + self._report_error(tags_info) + tags_info = tags_info['data'] + tags = [] + for i in tags_info: + tags.append(i['tag_name']) - headers = { - 'Referer': url - } - headers.update(self.geo_verification_headers()) + user_info = self._download_json("https://api.bilibili.com/x/web-interface/nav", video_id, + "Geting Login/User Information.", "Unable to get Login/User Information.") + if user_info['code'] != 0 and user_info['code'] != -101: + self._report_error(user_info) + user_info = user_info['data'] + is_login = user_info['isLogin'] + if is_login: + is_vip = user_info['vipStatus'] + else: + is_vip = 0 + is_durl = False # If return the durl Stream, this will be true - entries = [] - - RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') - for num, rendition in enumerate(RENDITIONS, start=1): - payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) - sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() - - video_info = self._download_json( - 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), - video_id, note='Downloading video info page', - headers=headers, fatal=num == len(RENDITIONS)) - - if not video_info: - continue - - if 'durl' not in video_info: - if num < len(RENDITIONS): - continue - self._report_error(video_info) - - for idx, durl in enumerate(video_info['durl']): - formats = [{ - 'url': durl['url'], - 'filesize': int_or_none(durl['size']), - }] - for backup_url in durl.get('backup_url', []): - formats.append({ - 'url': backup_url, - # backup URLs have lower priorities - 'preference': -2 if 'hd.mp4' in backup_url else -3, - }) - - for a_format in formats: - a_format.setdefault('http_headers', {}).update({ - 'Referer': url, - }) - - self._sort_formats(formats) - - entries.append({ - 'id': '%s_part%s' % (video_id, idx), - 'duration': float_or_none(durl.get('length'), 1000), - 'formats': formats, - }) - break - - title = self._html_search_regex( - (']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', - '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', - group='title') - description = self._html_search_meta('description', webpage) - timestamp = unified_timestamp(self._html_search_regex( - r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', - default=None) or self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) - - # TODO 'view_count' requires deobfuscating Javascript info = { 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - 'duration': float_or_none(video_info.get('timelength'), scale=1000), + 'title': video_data['title'], + 'description': video_data['desc'], + 'timestamp': video_data['ctime'], + 'thumbnail': video_data['pic'], + 'uploader': uploader_data['name'], + 'uploader_id': uploader_data['mid'], + 'duration': video_data['duration'], + 'webpage_url': 'https://www.bilibili.com/video/av%s' % (aid), + 'categories': [video_data['tname']], + 'view_count': video_data['stat']['viewseo'], + 'comment_count': video_data['stat']['reply'], + 'tags': tags } - uploader_mobj = re.search( - r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)', - webpage) - if uploader_mobj: + if video_count == 1: + info.update({'alt_title': video_data['pages'][0]['part']}) + + new_api = True # Parse video links from webpage first. + first = True # First Part of List + entries = [] + + for part_info in video_data['pages']: + if part is not None and part_info["page"] != part: + continue + uri = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, part_info["page"]) + if first: + first = False + else: + webpage = self._download_webpage(uri, "%s Part%s" % (video_id, part_info['page'])) + headers = {'referer': uri} + if new_api: + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=120&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links.", + "Unable to get video links.") + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + play_info = self._download_json( + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=120&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links.", + "Unable to get video links.") + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + if 'durl' in play_info: # Stream for flv player + if video_count > 1 and len(play_info['durl']) > 1 and part is None: + self.report_warning( + "There are multiply FLV files in this part. Please input \"%s\" to extract it." % (uri), + "%s Part%s" % (video_id, part_info['page'])) + continue + is_durl = True + if video_count > 1: + info.update({ + 'title': "%s - %s" % (info['title'], part_info['part']), + 'id': "%s P%s" % (video_id, part_info['page']) + }) + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + video_desc_dict = {} + for i in range(len(accept_video_quality)): + video_desc_dict.update({ + accept_video_quality[i]: accept_video_quality_desc[i] + }) + video_formats = {video_quality: play_info['durl']} + video_formats_size = {video_quality: self._calculate_size(play_info['durl'])} # Total Filesize Dict + durl_length = [len(play_info['durl'])] + for video_q in accept_video_quality: + if video_q not in video_formats: + if new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) + webpage = self._download_webpage(uri, + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + play_info = self._download_json( + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + if 'durl' in play_info: + video_formats[play_info["quality"]] = play_info['durl'] + video_formats_size[play_info["quality"]] = self._calculate_size(play_info['durl']) + durl_length.append(len(play_info['durl'])) + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + for i in range(max(durl_length)): + entry = {} + entry.update(info) + entry.update({'id': "%s Part%s" % (info['id'], i + 1)}) + formats_output = [] + for video_q in accept_video_quality: + durl = video_formats[video_q] + if i < len(durl): + video_format = durl[i] + formats_output.append({ + "url": video_format['url'], + "format_id": str(video_q), + "format_note": video_desc_dict[video_q], + "ext": "flv", + "http_headers": headers, + "filesize": video_format['size'] + }) + entry['formats'] = formats_output + entries.append(entry) + elif 'dash' in play_info: # Stream for dash player + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + accept_audio_quality = [] + dash = play_info['dash'] + video_quality_list = [] + video_desc_dict = {} + for i in range(len(accept_video_quality)): + video_desc_dict.update({ + accept_video_quality[i]: accept_video_quality_desc[i] + }) + video_formats = {} + for video_format in dash['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_quality_list.append(video_format['id'] + 1) + video_formats[video_format['id'] + 1] = video_format + else: + video_quality_list.append(video_format['id']) + video_formats[video_format['id']] = video_format + bs = True # Try to get all video formats + while bs: + bs = False + for video_q in accept_video_quality: + if video_q not in video_formats: + if not is_login and video_q <= 32: + bs = True + elif is_vip < 1 and video_q <= 80 and video_q != 74: + bs = True + elif is_vip > 0: + bs = True + if new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) + webpage = self._download_webpage(uri, + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) # Get video links from webpage. + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + new_api = False + play_info = self._download_json( + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + play_info = self._download_json( + "https://api.bilibili.com/x/player/playurl?cid=%s&qn=%s&otype=json&bvid=%s&fnver=0&fnval=16" % (part_info['cid'], video_q, bvid), + "%s Part%s" % (video_id, part_info['page']), + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + if 'dash' in play_info: + for video_format in play_info['dash']['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_format_q = video_format['id'] + 1 + else: + video_format_q = video_format['id'] + if video_format_q not in video_formats: + video_quality_list.append(video_format_q) + video_formats[video_format_q] = video_format + bs = True + break + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + entry = {} + entry.update(info) + formats_output = [] + for i in video_quality_list: + video_format = video_formats[i] + formats_output.append( + {"url": video_format['base_url'], + "ext": "mp4", + "format_note": video_desc_dict[video_format['id']], + "format_id": str(i), + "vcodec": video_format['codecs'], + "fps": self._getfps(video_format['frame_rate']), + "width": video_format['width'], + "height": video_format['height'], + "http_headers": headers + }) + if 'audio' in dash and dash['audio'] is not None: + for audio_format in dash['audio']: + accept_audio_quality.append(audio_format['id']) + video_formats[audio_format['id']] = audio_format + accept_audio_quality.sort(reverse=True) + for audio_quality in accept_audio_quality: + audio_format = video_formats[audio_quality] + formats_output.append({ + "url": audio_format["base_url"], + "format_id": str(audio_format['id']), + "ext": "mp4", + "acodec": audio_format['codecs'], + "http_headers": headers + }) + entry.update({"formats": formats_output}) + if video_count > 1: + entry.update({"title": "%s - %s" % (info['title'], part_info['part'])}) + entry.update({"id": "%s P%s" % (video_id, part_info['page'])}) + entries.append(entry) + + if video_count > 1: + if len(entries) == 1 and not is_durl: + info.update({ + 'formats': entries[0]['formats'], + 'id': entries[0]['id'] + }) + return info info.update({ - 'uploader': uploader_mobj.group('name'), - 'uploader_id': uploader_mobj.group('id'), + "_type": 'multi_video', + "entries": entries }) - if not info.get('uploader'): - info['uploader'] = self._html_search_meta( - 'author', webpage, 'uploader', default=None) - - for entry in entries: - entry.update(info) - - if len(entries) == 1: - return entries[0] + return info else: - for idx, entry in enumerate(entries): - entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - - return { - '_type': 'multi_video', - 'id': video_id, - 'title': title, - 'description': description, - 'entries': entries, - } + if not is_durl: + return entries[0] + else: + if len(entries) > 1: + info.update({ + "_type": 'multi_video', + "entries": entries + }) + else: + info.update({ + "formats": entries[0]['formats'] + }) + return info class BiliBiliBangumiIE(InfoExtractor): - _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www)\.)? + bilibili\.(?:tv|com)/ + (?: + (?: + bangumi/play/[sS][sS] + )(?P<ssid>\d+)| + bangumi/play/[eE][pP](?P<epid>\d+) + ) + ''' - IE_NAME = 'bangumi.bilibili.com' + IE_NAME = 'bilibili bangumi' IE_DESC = 'BiliBili番剧' - _TESTS = [{ - 'url': 'http://bangumi.bilibili.com/anime/1869', - 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - }, - 'playlist_count': 26, - }, { - 'url': 'http://bangumi.bilibili.com/anime/1869', - 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - }, - 'playlist': [{ - 'md5': '91da8621454dd58316851c27c68b0c13', - 'info_dict': { - 'id': '40062', - 'ext': 'mp4', - 'title': '混沌武士', - 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', - 'timestamp': 1414538739, - 'upload_date': '20141028', - 'episode': '疾风怒涛 Tempestuous Temperaments', - 'episode_number': 1, - }, - }], - 'params': { - 'playlist_items': '1', - }, - }] + _TESTS = [] - @classmethod + @ classmethod def suitable(cls, url): return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) + def _get_episode_list(self, bangumi_info): + ep_list = bangumi_info['epList'] + episode_list = [] + for i in ep_list: + temp = {} + temp.update(i) + episode_list.append(temp) + if 'sections' in bangumi_info: + for section in bangumi_info['sections']: + for i in section['epList']: + temp = {} + temp.update(i) + temp.update({ + "section_title": section['title'], + "section_id": section['id'] + }) + episode_list.append(temp) + return episode_list + + def _report_error(self, error): + if 'message' in error: + raise ExtractorError(error['message']) + elif 'code' in error: + raise ExtractorError(str(error['code'])) + else: + raise ExtractorError(str(error)) + + def _report_warning(self, warning, video_id=None): + if 'message' in warning: + self.report_warning(warning['message'], video_id) + elif 'code' in warning: + self.report_warning(str(warning['code']), video_id) + else: + self.report_warning(str(warning), video_id) + + def _calculate_size(self, durl): + "Calculate total file size." + s = 0 + for i in durl: + s = s + i['size'] + return s + + def _getfps(self, s): + "convert fps to int" + if s.isnumeric(): + return int(s) + else: + r = re.search(r"([0-9]+)/([0-9]+)", s) + if r is not None: + r = r.groups() + return int(r[0]) / int(r[1]) + else: + return 0 + + def _extract_episode(self, episode_info): + epid = episode_info['id'] + uri = "https://www.bilibili.com/bangumi/play/ep%s" % (epid) + if self._epid is None: + video_id = "%s %s" % (self._video_id, episode_info['titleFormat']) + else: + video_id = self._video_id + if self._first: + webpage = self._webpage + self._first = False + else: + webpage = self._download_webpage(uri, video_id) + headers = {'referer': uri} + if self._new_api: + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + self._new_api = False + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=120&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], episode_info['bvid'], epid), + video_id, + "Geting video links.", + "Unable to get video links.", + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._new_api = True + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + else: + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=120&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], episode_info['bvid'], epid), + video_id, + "Geting video links.", + "Unable to get video links.", + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + if 'durl' in play_info: # Stream for flv player + if self._video_count > 1 and len(play_info['durl']) > 1 and self._epid is None: + self._report_warning( + "There are multiply FLV files in this episode. Please input \"%s\" to extract it." % (uri), + video_id) + return + self._is_durl = True + if self._epid is not None: + self._info.update({ + "title": "%s - %s %s" % (self._info['title'], episode_info['titleFormat'], episode_info['longTitle']), + "id": video_id, + "episode": episode_info['longTitle'], + "episode_id": episode_info['id'] + }) + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + video_desc_dict = {} + for i in range(len(accept_video_quality)): + video_desc_dict.update({ + accept_video_quality[i]: accept_video_quality_desc[i] + }) + video_formats = {video_quality: play_info['durl']} + video_formats_size = {video_quality: self._calculate_size(play_info['durl'])} + durl_length = [len(play_info['durl'])] + for video_q in accept_video_quality: + if video_q not in video_formats: + if self._new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) + webpage = self._download_webpage( + uri, + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + self._new_api = False + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._new_api = True + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + else: + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + if 'durl' in play_info: + video_formats[play_info["quality"]] = play_info['durl'] + video_formats_size[play_info["quality"]] = self._calculate_size(play_info['durl']) + durl_length.append(len(play_info['durl'])) + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + for i in range(max(durl_length)): + entry = {} + entry.update(self._info) + if self._epid is None: + entry.update({ + "title": "%s - %s %s" % (self._info['title'], episode_info['titleFormat'], episode_info['longTitle']), + "id": video_id, + "episode": episode_info['longTitle'], + "episode_id": episode_info['id'] + }) + else: + entry.update({ + "id": "%s Part%s" % (video_id, i + 1) + }) + formats_output = [] + for video_q in accept_video_quality: + durl = video_formats[video_q] + if i < len(durl): + video_format = durl[i] + formats_output.append({ + "url": video_format['url'], + "format_id": str(video_q), + "format_note": video_desc_dict[video_q], + "ext": "flv", + "http_headers": headers, + "filesize": video_format['size'] + }) + entry['formats'] = formats_output + self._entries.append(entry) + elif 'dash' in play_info: # Stream for dash player + video_quality = play_info['quality'] + accept_video_quality_desc = play_info['accept_description'] + accept_video_quality = play_info['accept_quality'] + accept_audio_quality = [] + dash = play_info['dash'] + video_quality_list = [] + video_desc_dict = {} + for i in range(len(accept_video_quality)): + video_desc_dict.update({ + accept_video_quality[i]: accept_video_quality_desc[i] + }) + video_formats = {} + for video_format in dash['video']: + if video_format['codecs'].startswith('hev'): + video_quality_list.append(video_format['id'] + 1) + video_formats[video_format['id'] + 1] = video_format + else: + video_quality_list.append(video_format['id']) + video_formats[video_format['id']] = video_format + bs = True # Try to get all video formats + while bs: + bs = False + for video_q in accept_video_quality: + if video_q not in video_formats: + if not self._is_login and video_q <= 32: + bs = True + elif self._is_vip < 1 and video_q <= 80 and video_q != 74: + bs = True + elif self._is_vip > 0: + bs = True + if self._new_api: + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value=str(video_q)) + webpage = self._download_webpage( + uri, + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q)) + play_info = re.search(r"window\.__playinfo__=([^<]+)", webpage, re.I) + if play_info is not None: + play_info = json.loads(play_info.groups()[0]) + if play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['data'] + else: + self._new_api = False + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._new_api = True + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + else: + play_info = self._download_json( + "https://api.bilibili.com/pgc/player/web/playurl?cid=%s&qn=%s&type=&otype=json&fourk=1&bvid=%s&ep_id=%s&fnver=0&fnval=16&session=" % (episode_info['cid'], video_q, episode_info['bvid'], epid), + video_id, + "Geting video links for format id : %s." % (video_q), + "Unable to get video links for format id : %s." % (video_q), + headers=headers) + if play_info['code'] == -10403: # Need vip or buy + self._report_warning(play_info) + elif play_info['code'] != 0: + self._report_error(play_info) + play_info = play_info['result'] + if 'dash' in play_info: + for video_format in play_info['dash']['video']: + if video_format['codecs'].startswith('hev'): # Let format id increase 1 to distinguish codec + video_format_q = video_format['id'] + 1 + else: + video_format_q = video_format['id'] + if video_format_q not in video_formats: + video_quality_list.append(video_format_q) + video_formats[video_format_q] = video_format + bs = True + break + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") + entry = {} + entry.update(self._info) + entry.update({ + "title": "%s - %s %s" % (self._info['title'], episode_info['titleFormat'], episode_info['longTitle']), + "id": video_id, + "episode": episode_info['longTitle'], + "episode_id": episode_info['id'] + }) + formats_output = [] + for i in video_quality_list: + video_format = video_formats[i] + formats_output.append({ + "url": video_format['base_url'], + "ext": "mp4", + "format_note": video_desc_dict[video_format['id']], + "format_id": str(i), + "vcodec": video_format['codecs'], + "fps": self._getfps(video_format['frame_rate']), + "width": video_format['width'], + "height": video_format['height'], + "http_headers": headers + }) + if 'audio' in dash and dash['audio'] is not None: + for audio_format in dash['audio']: + accept_audio_quality.append(audio_format['id']) + video_formats[audio_format['id']] = audio_format + accept_audio_quality.sort(reverse=True) + for audio_quality in accept_audio_quality: + audio_format = video_formats[audio_quality] + formats_output.append({ + "url": audio_format["base_url"], + "format_id": str(audio_format['id']), + "ext": "mp4", + "acodec": audio_format['codecs'], + "http_headers": headers + }) + entry.update({"formats": formats_output}) + self._entries.append(entry) + def _real_extract(self, url): - bangumi_id = self._match_id(url) + url, smuggled_data = unsmuggle_url(url, {}) - # Sometimes this API returns a JSONP response - season_info = self._download_json( - 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, - bangumi_id, transform_source=strip_jsonp)['result'] + mobj = re.match(self._VALID_URL, url) + ssid = mobj.group('ssid') + epid = mobj.group('epid') + video_id = ssid or epid + if ssid is not None: + ssid = int(ssid) + video_id = "ss" + video_id + if epid is not None: + epid = int(epid) + video_id = "ep" + video_id - entries = [{ - '_type': 'url_transparent', - 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), - 'ie_key': BiliBiliIE.ie_key(), - 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), - 'episode': episode.get('index_title'), - 'episode_number': int_or_none(episode.get('index')), - } for episode in season_info['episodes']] + # Set Cookies need to parse the Links. + self._set_cookie(domain=".bilibili.com", name="CURRENT_QUALITY", value="120") # Set default video quality + self._set_cookie(domain=".bilibili.com", name="CURRENT_FNVAL", value="16") + self._set_cookie(domain=".bilibili.com", name="laboratory", value="1-1") # Use new webpage API + self._set_cookie(domain=".bilibili.com", name="stardustvideo", value="1") - entries = sorted(entries, key=lambda entry: entry.get('episode_number')) + webpage = self._download_webpage(url, video_id) - return self.playlist_result( - entries, bangumi_id, - season_info.get('bangumi_title'), season_info.get('evaluate')) + bangumi_info = re.search(r"window\.__INITIAL_STATE__=([^;]+)", webpage, re.I) + if bangumi_info is not None: + bangumi_info = json.loads(bangumi_info.groups()[0]) + else: + raise ExtractorError("Can not find the bangumi.") + media_info = bangumi_info['mediaInfo'] + if ssid is None: + ssid = int(media_info['ssId']) + + user_info = self._download_json( + "https://api.bilibili.com/x/web-interface/nav", video_id, + "Geting Login/User Information.", "Unable to get Login/User Information.") + if user_info['code'] != 0 and user_info['code'] != -101: + self._report_error(user_info) + user_info = user_info['data'] + self._is_login = user_info['isLogin'] + if self._is_login: + self._is_vip = user_info['vipStatus'] + else: + self._is_vip = 0 + self._is_durl = False # If return the durl Stream, this will be true + + self._info = { + "series": media_info['series'], + "title": media_info['title'], + "season": media_info['title'], + "season_id": media_info['ssId'], + "id": video_id, + "thumbnail": "https:" + media_info['cover'], + "description": media_info['evaluate'], + "uploader": media_info['upInfo']['name'], + "release_date": media_info['pub']['time'][0:4] + media_info['pub']['time'][5:7] + media_info['pub']['time'][8:10], + "uploader_id": media_info['upInfo']['mid'], + "view_count": media_info['stat']['views'], + "like_count": media_info['stat']['favorites'], + "comment_count": media_info['stat']['reply'], + "webpage_url": "https://www.bilibili.com/bangumi/play/%s" % (video_id) + } + + ep_list = self._get_episode_list(bangumi_info) + if epid is not None: + ep_info = None + for ep in ep_list: + if ep['id'] == epid: + ep_info = ep + break + if ep_info is None: + self._report_error("Can not find the infomation of ep%s." % (epid)) + self._video_count = len(ep_list) + self._new_api = True # Parse video links from webpage first. + self._first = True # First Part + self._webpage = webpage + self._video_id = video_id + self._epid = epid + self._entries = [] + if epid is not None: + self._extract_episode(ep_info) + else: + for ep_info in ep_list: + self._extract_episode(ep_info) + + if epid is None: + self._info.update({ + "_type": 'multi_video', + 'entries': self._entries + }) + return self._info + else: + if len(self._entries) == 1: + return self._entries[0] + else: + self._info.update({ + "_type": 'multi_video', + 'entries': self._entries + }) + return self._info class BilibiliAudioBaseIE(InfoExtractor):