diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py index cae8bbe8b..bf4945ecc 100644 --- a/youtube_dl/extractor/acfun.py +++ b/youtube_dl/extractor/acfun.py @@ -5,7 +5,6 @@ import json from .common import InfoExtractor from ..compat import ( - compat_cookiejar, compat_urllib_parse_urlencode, compat_urllib_request, ) @@ -18,203 +17,223 @@ from ..utils import ( ExtractorError, ) + class BasicAcfunInfoExtractor(InfoExtractor): _FAKE_HEADERS = { - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa - 'Accept-Charset': 'UTF-8,*;q=0.5', - 'Accept-Encoding': 'gzip,deflate,sdch', - 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa - } + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # noqa + "Accept-Charset": "UTF-8,*;q=0.5", + "Accept-Encoding": "gzip,deflate,sdch", + "Accept-Language": "en-US,en;q=0.8", + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0", # noqa + } + def _extract_formats(self, currentVideoInfo): - durationMillis = currentVideoInfo.get('durationMillis') - if 'ksPlayJson' in currentVideoInfo: - ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) - representation = ksPlayJson.get('adaptationSet')[0].get('representation') + durationMillis = currentVideoInfo.get("durationMillis") + if "ksPlayJson" in currentVideoInfo: + ksPlayJson = ksPlayJson = json.loads(currentVideoInfo["ksPlayJson"]) + representation = ksPlayJson.get("adaptationSet")[0].get("representation") formats = [] for stream in representation: size = float_or_none(durationMillis) * stream["avgBitrate"] / 8 - stream_id = stream["qualityLabel"] - quality = stream["qualityType"] - formats += [{ - 'url': stream["url"], - 'ext': 'mp4', - 'width': stream.get('width'), - 'height': stream.get('height'), - 'filesize': size, - }] - formats = formats[::-1] + formats += [ + { + "url": stream["url"], + "ext": "mp4", + "width": stream.get("width"), + "height": stream.get("height"), + "filesize": size, + } + ] + formats = formats[::-1] self._sort_formats(formats) - return formats + return formats + class AcfunIE(BasicAcfunInfoExtractor): - _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P[_\d]+)' + _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P[_\d]+)" _TESTS = [ { - 'url': 'https://www.acfun.cn/v/ac18184362', - 'info_dict': { - 'id': '18184362', - 'ext': 'mp4', - 'duration': 192.042, - 'title': '【AC娘】魔性新单《极乐857》上线!来和AC娘一起云蹦迪吧!', - 'uploader': 'AC娘本体', - 'uploader_id': 23682490 - } + "url": "https://www.acfun.cn/v/ac18184362", + "info_dict": { + "id": "18184362", + "ext": "mp4", + "duration": 192.042, + "title": "【AC娘】魔性新单《极乐857》上线!来和AC娘一起云蹦迪吧!", + "uploader": "AC娘本体", + "uploader_id": 23682490, + }, }, { - 'url': 'https://www.acfun.cn/v/ac17532274_3', - 'info_dict': { - 'id': '17532274_3', - 'ext': 'mp4', - 'duration': 233.770, - 'title': '【AC娘x竾颜音】【周六狂欢24小时】TRAP:七夕恋歌!落入本娘爱的陷阱! - TRAP 阿婵', - 'uploader': 'AC娘本体', - 'uploader_id': 23682490 - } - } - ] + "url": "https://www.acfun.cn/v/ac17532274_3", + "info_dict": { + "id": "17532274_3", + "ext": "mp4", + "duration": 233.770, + "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP:七夕恋歌!落入本娘爱的陷阱! - TRAP 阿婵", + "uploader": "AC娘本体", + "uploader_id": 23682490, + }, + }, + ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS) - json_text = self._html_search_regex(r'(?s)videoInfo\s*=\s*(\{.*?\});', webpage, 'json_text') + json_text = self._html_search_regex( + r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text" + ) json_data = json.loads(json_text) - title = json_data['title'] - p_title = self._html_search_regex(r"(.*?)", webpage, 'p_title', default=None) + title = json_data["title"] + p_title = self._html_search_regex( + r"(.*?)", + webpage, + "p_title", + default=None, + ) if p_title: - title = '%s - %s' % (title, p_title) + title = "%s - %s" % (title, p_title) - uploader = json_data.get('user').get('name') - uploader_id = json_data.get('user').get('id') + uploader = json_data.get("user").get("name") + uploader_id = json_data.get("user").get("id") + + currentVideoInfo = json_data.get("currentVideoInfo") + durationMillis = currentVideoInfo.get("durationMillis") + duration = durationMillis / 1000 - currentVideoInfo = json_data.get('currentVideoInfo') - durationMillis = currentVideoInfo.get('durationMillis') - duration = durationMillis / 1000 - formats = self._extract_formats(currentVideoInfo) return { - 'id': video_id, - 'uploader_id': str_to_int(uploader_id), - 'title': title, - 'uploader': str_or_none(uploader), - 'duration': float_or_none(duration), - 'formats': formats + "id": video_id, + "uploader_id": str_to_int(uploader_id), + "title": title, + "uploader": str_or_none(uploader), + "duration": float_or_none(duration), + "formats": formats, } class AcfunBangumiIE(BasicAcfunInfoExtractor): - _VALID_URL = r'https?://www\.acfun\.cn/bangumi/aa(?P[_\d]+)' + _VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P[_\d]+)" _TEST = { - 'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1748679', - 'info_dict': { - 'id': '6002917_36188_1748679', - 'ext': 'mp4', - 'duration': 1437.076, - 'title': '租借女友 第12话 告白和女友', - } + "url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679", + "info_dict": { + "id": "6002917_36188_1748679", + "ext": "mp4", + "duration": 1437.076, + "title": "租借女友 第12话 告白和女友", + }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS) - json_text = self._html_search_regex(r'(?s)bangumiData\s*=\s*(\{.*?\});', webpage, 'json_text') + json_text = self._html_search_regex( + r"(?s)bangumiData\s*=\s*(\{.*?\});", webpage, "json_text" + ) json_data = json.loads(json_text) - title = json_data.get('showTitle') or json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title'] + title = ( + json_data.get("showTitle") + or json_data["bangumiTitle"] + + " " + + json_data["episodeName"] + + " " + + json_data["title"] + ) - currentVideoInfo = json_data.get('currentVideoInfo') - durationMillis = currentVideoInfo.get('durationMillis') - duration = durationMillis / 1000 + currentVideoInfo = json_data.get("currentVideoInfo") + durationMillis = currentVideoInfo.get("durationMillis") + duration = durationMillis / 1000 formats = self._extract_formats(currentVideoInfo) return { - 'id': video_id, - 'title': title, - 'duration': float_or_none(duration), - 'formats': formats + "id": video_id, + "title": title, + "duration": float_or_none(duration), + "formats": formats, } + class AcfunLiveIE(BasicAcfunInfoExtractor): - _VALID_URL = r'https?://live\.acfun\.cn/live/(?P\d+)' + _VALID_URL = r"https?://live\.acfun\.cn/live/(?P\d+)" _TEST = { - 'url': 'https://live.acfun.cn/live/36782183', - 'only_matching': True, - 'info_dict': { - 'id': '36782183', - 'ext': 'mp4', + "url": "https://live.acfun.cn/live/36782183", + "only_matching": True, + "info_dict": { + "id": "36782183", + "ext": "mp4", # 'title': '看见兔兔就烦!', - 'is_live': True, - } + "is_live": True, + }, } def _real_extract(self, url): live_id = self._match_id(url) - self._FAKE_HEADERS.update({ - 'Referer': url - }) + self._FAKE_HEADERS.update({"Referer": url}) # Firstly get _did cookie fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS) first_res = compat_urllib_request.urlopen(fisrt_req) for header_name, header_value in first_res.info().items(): - if header_name.lower() == 'set-cookie': + if header_name.lower() == "set-cookie": cookies = header_value if not cookies: - raise ExtractorError('Fail to fetch cookies') + raise ExtractorError("Fail to fetch cookies") - cookies_dict = dict(c.strip(' ,').split('=', 1) for c in cookies.split(';')) - did_cookie = cookies_dict['_did'] + cookies_dict = dict(c.strip(" ,").split("=", 1) for c in cookies.split(";")) + did_cookie = cookies_dict["_did"] - self._FAKE_HEADERS.update({ - 'Cookie': '_did=%s' % did_cookie - }) + self._FAKE_HEADERS.update({"Cookie": "_did=%s" % did_cookie}) # Login to get userId and acfun.api.visitor_st - login_data = compat_urllib_parse_urlencode({'sid': 'acfun.api.visitor'}).encode('ascii') + login_data = compat_urllib_parse_urlencode({"sid": "acfun.api.visitor"}).encode( + "ascii" + ) login_json = self._download_json( - 'https://id.app.acfun.cn/rest/app/visitor/login', - live_id, - data=login_data, - headers=self._FAKE_HEADERS) + "https://id.app.acfun.cn/rest/app/visitor/login", + live_id, + data=login_data, + headers=self._FAKE_HEADERS, + ) - streams_url = "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s" % ( - login_json['userId'], - did_cookie, login_json['acfun.api.visitor_st']) + streams_url = ( + "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s" + % (login_json["userId"], did_cookie, login_json["acfun.api.visitor_st"]) + ) # Fetch stream lists - fetch_streams_data = compat_urllib_parse_urlencode({ - 'authorId': int_or_none(live_id), - 'pullStreamType': 'FLV' - }).encode('ascii') + fetch_streams_data = compat_urllib_parse_urlencode( + {"authorId": int_or_none(live_id), "pullStreamType": "FLV"} + ).encode("ascii") streams_json = self._download_json( - streams_url, - live_id, - data=fetch_streams_data, - headers=self._FAKE_HEADERS) + streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS + ) # print(streams_json) - title = streams_json['data']['caption'] - streams_info = json.loads(streams_json['data']['videoPlayRes']) # streams info - representation = streams_info['liveAdaptiveManifest'][0]['adaptationSet']['representation'] - + title = streams_json["data"]["caption"] + streams_info = json.loads(streams_json["data"]["videoPlayRes"]) # streams info + representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][ + "representation" + ] + formats = [] for stream in representation: - quality = stream["qualityType"] - formats += [{ - 'url': stream["url"], - 'ext': 'mp4', - 'tbr': stream.get('bitrate'), - }] + formats += [ + { + "url": stream["url"], + "ext": "mp4", + "tbr": stream.get("bitrate"), + } + ] self._sort_formats(formats) return { - 'id': live_id, - 'title': self._live_title(title), - 'formats': formats, - 'is_live': True - } \ No newline at end of file + "id": live_id, + "title": self._live_title(title), + "formats": formats, + "is_live": True, + }