diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4dc597e16..dbec223b1 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -20,8 +20,11 @@ from ..utils import ( unified_timestamp, unsmuggle_url, urlencode_postdata, + try_get, ) +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() + class BiliBiliIE(InfoExtractor): _VALID_URL = r'''(?x) @@ -257,6 +260,7 @@ class BiliBiliIE(InfoExtractor): } +# this extractor doesn't work now class BiliBiliBangumiIE(InfoExtractor): _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P\d+)' @@ -436,6 +440,71 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): return self.playlist_result(entries, am_id) +class BilibiliNewBangumiIE(InfoExtractor): + _VALID_URL = r'(?:https?://www\.)?bilibili\.com/bangumi/play/ep(?P\d+)' + _TESTS = [{ + 'url': 'https://www.bilibili.com/bangumi/play/ep307448', + 'md5': '084ae96f913cf13ab626326d86190ddf', + 'info_dict': { + 'id': '307448', + 'ext': 'flv', + 'title': '异度侵入 ID:INVADED:第3话 SNIPED 瀑布世界', + # 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' + # Don't forget to add ? after url + _BILIBILI_API = 'https://bangumi.bilibili.com/player/web_api/v2/playurl' + _BILIBILI_TOKEN_API = 'https://api.bilibili.com/x/player/playurl/token' + + def _real_extract(self, url): + url, _ = unsmuggle_url(url) + video_id = self._search_regex(self._VALID_URL, url, 'video_id', group='id') + webpage = self._download_webpage(url, video_id) + + jsondata = self._search_regex(r'window.__INITIAL_STATE__=(.+?);', webpage, 'jsondata', group=1) + bgmdata = self._parse_json(jsondata, video_id) # may throw exception + + try: + cid = bgmdata['epInfo']['cid'] + # aid = bgmdata['epInfo']['aid'] + except KeyError as e: + raise ExtractorError('Failed to extract cid', cause=e, video_id=video_id) + + title = bgmdata.get('h1Title') or self._og_search_title(webpage) + + season = self._search_regex(r'"ssType":(\d+)', webpage, 'season', group=1) + + ''' + tokens = self._download_json(f'{self._BILIBILI_TOKEN_API}?aid={aid}&cid={cid}', video_id) + try: + token = tokens['data']['token'] + except KeyError as e: + raise ExtractorError(f'{video_id}: Failed to read JSON', cause = e) + ''' + + # quality is hardcore to 80 + # params = f'appkey={self._APP_KEY}&cid={cid}&module=bangumi&otype=json&qn=80&quality=80&season_type={season}&type=' + params = 'appkey=%s&cid=%s&module=bangumi&otype=json&qn=80&quality=80&season_type=%s&type=' % (self._APP_KEY, cid, season) + # may throw exception + # urls = self._download_json(f'{self._BILIBILI_API}?{params}&sign={md5(params + self._BILIBILI_KEY)}', video_id) + urls = self._download_json('%s?%s&sign=%s' % (self._BILIBILI_API, params, md5(params + self._BILIBILI_KEY)), video_id) + + real_url = try_get(urls, lambda x: x['durl'][0]['url']) + + return { + 'title': title, + 'id': video_id, + 'url': real_url, + 'ext': urls.get('format'), + 'size': try_get(urls, lambda x: x['durl'][0]['size']), + 'http_headers': { + 'Referer': url, + } + } + + class BiliBiliPlayerIE(InfoExtractor): _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P\d+)' _TEST = { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ae7079a6a..f1538eba1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -105,6 +105,7 @@ from .bilibili import ( BiliBiliBangumiIE, BilibiliAudioIE, BilibiliAudioAlbumIE, + BilibiliNewBangumiIE, BiliBiliPlayerIE, ) from .biobiochiletv import BioBioChileTVIE