From dccbdb63a6d1e921f3796ca7b49c7ac165787644 Mon Sep 17 00:00:00 2001 From: snylonue Date: Wed, 11 Mar 2020 14:05:16 +0800 Subject: [PATCH 1/5] [bilibili] Fix animation extractor The codes are ugly and unstable, but they at least work with recent animations' links fix #22012 --- youtube_dl/extractor/bilibili.py | 72 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 73 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 80bd696e2..432203273 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -22,6 +22,8 @@ from ..utils import ( urlencode_postdata, ) +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() + class BiliBiliIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)' @@ -242,6 +244,7 @@ class BiliBiliIE(InfoExtractor): } +# this extractor doesn't work now class BiliBiliBangumiIE(InfoExtractor): _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P\d+)' @@ -419,3 +422,72 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): entries, am_id, album_title, album_data.get('intro')) return self.playlist_result(entries, am_id) + + +class BilibiliNewBangumiIE(InfoExtractor): + _VALID_URL = r'(https?://www\.)?bilibili\.com/bangumi/play/ep(?P\d+)' + _TESTS = [{ + 'url': 'https://www.bilibili.com/bangumi/play/ep307448', + 'md5': '084ae96f913cf13ab626326d86190ddf', + 'info_dict': { + 'id': '307448', + 'ext': 'flv', + 'title': '异度侵入 ID:INVADED:第3话 SNIPED 瀑布世界', + # 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' + # Don't forget to add ? after url + _BILIBILI_API = 'https://bangumi.bilibili.com/player/web_api/v2/playurl' + _BILIBILI_TOKEN_API = 'https://api.bilibili.com/x/player/playurl/token' + + def _real_extract(self, url): + url, _ = unsmuggle_url(url) + match_url = re.match(self._VALID_URL, url) + video_id = match_url.group('id') + webpage = self._download_webpage(url, video_id) + + jsondata = re.search(r'window.__INITIAL_STATE__=(.+?);', webpage) # may return None + bgmdata = self._parse_json(jsondata.group(1), video_id) # may throw exception + try: + cid = bgmdata['epInfo']['cid'] + # aid = bgmdata['epInfo']['aid'] + title = bgmdata['h1Title'] + except KeyError as e: + raise ExtractorError(f'{video_id}: Failed to read JSON', cause=e) + + jsondata = re.search(r'"ssType":(\d+)', webpage) # may return None + season = jsondata.group(1) + + ''' + tokens = self._download_json(f'{self._BILIBILI_TOKEN_API}?aid={aid}&cid={cid}', video_id) + try: + token = tokens['data']['token'] + except KeyError as e: + raise ExtractorError(f'{video_id}: Failed to read JSON', cause = e) + ''' + + # quality is hardcore to 80 + # params = f'appkey={self._APP_KEY}&cid={cid}&module=bangumi&otype=json&qn=80&quality=80&season_type={season}&type=' + params = 'appkey=%s&cid=%s&module=bangumi&otype=json&qn=80&quality=80&season_type=%s&type=' % (self._APP_KEY, cid, season) + # may throw exception + # urls = self._download_json(f'{self._BILIBILI_API}?{params}&sign={md5(params + self._BILIBILI_KEY)}', video_id) + urls = self._download_json('%s?%s&sign=%s' % (self._BILIBILI_API, params, md5(params + self._BILIBILI_KEY)), video_id) + try: + # The url can be played in 1080p + rurl = urls['durl'][0]['url'] + ext = urls['format'] + size = urls['durl'][0]['size'] + except (KeyError, IndexError) as e: + raise ExtractorError(f'{video_id}: Failed to read JSON', cause=e) + return { + 'title': title, + 'id': video_id, + 'url': rurl, + 'ext': ext, + 'size': size, + 'http_headers': { + 'Referer': url, + } + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..a6a4bd5f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -105,6 +105,7 @@ from .bilibili import ( BiliBiliBangumiIE, BilibiliAudioIE, BilibiliAudioAlbumIE, + BilibiliNewBangumiIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( From e59daf797cafc61296407634338312e2279bd6bf Mon Sep 17 00:00:00 2001 From: snylonue Date: Wed, 11 Mar 2020 15:27:09 +0800 Subject: [PATCH 2/5] [bilibili] Follow youtube-dl coding conventions --- youtube_dl/extractor/bilibili.py | 35 +++++++++++++++----------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 432203273..dd28246de 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -20,6 +20,7 @@ from ..utils import ( unified_timestamp, unsmuggle_url, urlencode_postdata, + try_get, ) md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() @@ -425,7 +426,7 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): class BilibiliNewBangumiIE(InfoExtractor): - _VALID_URL = r'(https?://www\.)?bilibili\.com/bangumi/play/ep(?P\d+)' + _VALID_URL = r'(?:https?://www\.)?bilibili\.com/bangumi/play/ep(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/bangumi/play/ep307448', 'md5': '084ae96f913cf13ab626326d86190ddf', @@ -444,21 +445,21 @@ class BilibiliNewBangumiIE(InfoExtractor): def _real_extract(self, url): url, _ = unsmuggle_url(url) - match_url = re.match(self._VALID_URL, url) - video_id = match_url.group('id') + video_id = self._search_regex(self._VALID_URL, url, 'video_id', group='id') webpage = self._download_webpage(url, video_id) - jsondata = re.search(r'window.__INITIAL_STATE__=(.+?);', webpage) # may return None - bgmdata = self._parse_json(jsondata.group(1), video_id) # may throw exception + jsondata = self._search_regex(r'window.__INITIAL_STATE__=(.+?);', webpage, 'jsondata', group=1) + bgmdata = self._parse_json(jsondata, video_id) # may throw exception + try: cid = bgmdata['epInfo']['cid'] # aid = bgmdata['epInfo']['aid'] - title = bgmdata['h1Title'] except KeyError as e: - raise ExtractorError(f'{video_id}: Failed to read JSON', cause=e) + raise ExtractorError(f'{video_id}: Failed to extract cid', cause=e) - jsondata = re.search(r'"ssType":(\d+)', webpage) # may return None - season = jsondata.group(1) + title = bgmdata.get('h1Title') or self._og_search_title(webpage) + + season = self._search_regex(r'"ssType":(\d+)', webpage, 'season', group=1) ''' tokens = self._download_json(f'{self._BILIBILI_TOKEN_API}?aid={aid}&cid={cid}', video_id) @@ -474,19 +475,15 @@ class BilibiliNewBangumiIE(InfoExtractor): # may throw exception # urls = self._download_json(f'{self._BILIBILI_API}?{params}&sign={md5(params + self._BILIBILI_KEY)}', video_id) urls = self._download_json('%s?%s&sign=%s' % (self._BILIBILI_API, params, md5(params + self._BILIBILI_KEY)), video_id) - try: - # The url can be played in 1080p - rurl = urls['durl'][0]['url'] - ext = urls['format'] - size = urls['durl'][0]['size'] - except (KeyError, IndexError) as e: - raise ExtractorError(f'{video_id}: Failed to read JSON', cause=e) + + real_url = try_get(urls, lambda x: x['durl'][0]['url']) + return { 'title': title, 'id': video_id, - 'url': rurl, - 'ext': ext, - 'size': size, + 'url': real_url, + 'ext': urls.get('format'), + 'size': try_get(urls, lambda x: x['durl'][0]['size']), 'http_headers': { 'Referer': url, } From 3988b7bf085d1e171fa5ab22a424e1ef8dbe5d9e Mon Sep 17 00:00:00 2001 From: snylonue Date: Wed, 11 Mar 2020 16:10:07 +0800 Subject: [PATCH 3/5] [bilibili] try to support python 2.6+ and fix ci --- youtube_dl/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index dd28246de..e7831a7e2 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -455,7 +455,7 @@ class BilibiliNewBangumiIE(InfoExtractor): cid = bgmdata['epInfo']['cid'] # aid = bgmdata['epInfo']['aid'] except KeyError as e: - raise ExtractorError(f'{video_id}: Failed to extract cid', cause=e) + raise ExtractorError('Failed to extract cid', cause=e, video_id=video_id) title = bgmdata.get('h1Title') or self._og_search_title(webpage) From d24e286f5392447817f5212d0805b866f9ae83dd Mon Sep 17 00:00:00 2001 From: snylonue Date: Wed, 25 Mar 2020 12:56:46 +0800 Subject: [PATCH 4/5] fix ci --- youtube_dl/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 2959038ed..dbec223b1 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -504,6 +504,7 @@ class BilibiliNewBangumiIE(InfoExtractor): } } + class BiliBiliPlayerIE(InfoExtractor): _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P\d+)' _TEST = { From ab71a2b1087ec021e05c657cbfb8d95f92138f70 Mon Sep 17 00:00:00 2001 From: snylonue Date: Wed, 25 Mar 2020 12:56:46 +0800 Subject: [PATCH 5/5] [bilibili] fix ci --- youtube_dl/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 2959038ed..dbec223b1 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -504,6 +504,7 @@ class BilibiliNewBangumiIE(InfoExtractor): } } + class BiliBiliPlayerIE(InfoExtractor): _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P\d+)' _TEST = {