2020-10-05 06:38:25 +02:00
|
|
|
|
# coding: utf-8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
from ..compat import (
|
|
|
|
|
compat_urllib_parse_urlencode,
|
|
|
|
|
compat_urllib_request,
|
|
|
|
|
)
|
|
|
|
|
from ..utils import (
|
|
|
|
|
int_or_none,
|
|
|
|
|
float_or_none,
|
|
|
|
|
str_or_none,
|
|
|
|
|
str_to_int,
|
|
|
|
|
sanitized_Request,
|
|
|
|
|
ExtractorError,
|
|
|
|
|
)
|
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
|
2020-10-05 06:38:25 +02:00
|
|
|
|
class BasicAcfunInfoExtractor(InfoExtractor):
|
|
|
|
|
_FAKE_HEADERS = {
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # noqa
|
|
|
|
|
"Accept-Charset": "UTF-8,*;q=0.5",
|
|
|
|
|
"Accept-Encoding": "gzip,deflate,sdch",
|
|
|
|
|
"Accept-Language": "en-US,en;q=0.8",
|
|
|
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0", # noqa
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 06:38:25 +02:00
|
|
|
|
def _extract_formats(self, currentVideoInfo):
|
2020-10-05 12:14:42 +02:00
|
|
|
|
durationMillis = currentVideoInfo.get("durationMillis")
|
|
|
|
|
if "ksPlayJson" in currentVideoInfo:
|
|
|
|
|
ksPlayJson = ksPlayJson = json.loads(currentVideoInfo["ksPlayJson"])
|
|
|
|
|
representation = ksPlayJson.get("adaptationSet")[0].get("representation")
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
formats = []
|
|
|
|
|
for stream in representation:
|
|
|
|
|
size = float_or_none(durationMillis) * stream["avgBitrate"] / 8
|
2020-10-05 12:14:42 +02:00
|
|
|
|
formats += [
|
|
|
|
|
{
|
|
|
|
|
"url": stream["url"],
|
|
|
|
|
"ext": "mp4",
|
|
|
|
|
"width": stream.get("width"),
|
|
|
|
|
"height": stream.get("height"),
|
|
|
|
|
"filesize": size,
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
formats = formats[::-1]
|
2020-10-05 06:38:25 +02:00
|
|
|
|
self._sort_formats(formats)
|
2020-10-05 12:14:42 +02:00
|
|
|
|
return formats
|
|
|
|
|
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
class AcfunIE(BasicAcfunInfoExtractor):
|
2020-10-05 12:14:42 +02:00
|
|
|
|
_VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)"
|
2020-10-05 06:38:25 +02:00
|
|
|
|
_TESTS = [
|
|
|
|
|
{
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"url": "https://www.acfun.cn/v/ac18184362",
|
|
|
|
|
"info_dict": {
|
|
|
|
|
"id": "18184362",
|
|
|
|
|
"ext": "mp4",
|
|
|
|
|
"duration": 192.042,
|
|
|
|
|
"title": "【AC娘】魔性新单《极乐857》上线!来和AC娘一起云蹦迪吧!",
|
|
|
|
|
"uploader": "AC娘本体",
|
|
|
|
|
"uploader_id": 23682490,
|
|
|
|
|
},
|
2020-10-05 06:38:25 +02:00
|
|
|
|
},
|
|
|
|
|
{
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"url": "https://www.acfun.cn/v/ac17532274_3",
|
|
|
|
|
"info_dict": {
|
|
|
|
|
"id": "17532274_3",
|
|
|
|
|
"ext": "mp4",
|
|
|
|
|
"duration": 233.770,
|
|
|
|
|
"title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP:七夕恋歌!落入本娘爱的陷阱! - TRAP 阿婵",
|
|
|
|
|
"uploader": "AC娘本体",
|
|
|
|
|
"uploader_id": 23682490,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
]
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
video_id = self._match_id(url)
|
|
|
|
|
webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
|
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
json_text = self._html_search_regex(
|
|
|
|
|
r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
json_data = json.loads(json_text)
|
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
title = json_data["title"]
|
|
|
|
|
p_title = self._html_search_regex(
|
|
|
|
|
r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
|
|
|
|
|
webpage,
|
|
|
|
|
"p_title",
|
|
|
|
|
default=None,
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
if p_title:
|
2020-10-05 12:14:42 +02:00
|
|
|
|
title = "%s - %s" % (title, p_title)
|
|
|
|
|
|
|
|
|
|
uploader = json_data.get("user").get("name")
|
|
|
|
|
uploader_id = json_data.get("user").get("id")
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
currentVideoInfo = json_data.get("currentVideoInfo")
|
|
|
|
|
durationMillis = currentVideoInfo.get("durationMillis")
|
|
|
|
|
duration = durationMillis / 1000
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
formats = self._extract_formats(currentVideoInfo)
|
|
|
|
|
return {
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"id": video_id,
|
|
|
|
|
"uploader_id": str_to_int(uploader_id),
|
|
|
|
|
"title": title,
|
|
|
|
|
"uploader": str_or_none(uploader),
|
|
|
|
|
"duration": float_or_none(duration),
|
|
|
|
|
"formats": formats,
|
2020-10-05 06:38:25 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AcfunBangumiIE(BasicAcfunInfoExtractor):
|
2020-10-05 12:14:42 +02:00
|
|
|
|
_VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)"
|
2020-10-05 06:38:25 +02:00
|
|
|
|
_TEST = {
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679",
|
|
|
|
|
"info_dict": {
|
|
|
|
|
"id": "6002917_36188_1748679",
|
|
|
|
|
"ext": "mp4",
|
|
|
|
|
"duration": 1437.076,
|
|
|
|
|
"title": "租借女友 第12话 告白和女友",
|
|
|
|
|
},
|
2020-10-05 06:38:25 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
video_id = self._match_id(url)
|
|
|
|
|
webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
|
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
json_text = self._html_search_regex(
|
|
|
|
|
r"(?s)bangumiData\s*=\s*(\{.*?\});", webpage, "json_text"
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
json_data = json.loads(json_text)
|
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
title = (
|
|
|
|
|
json_data.get("showTitle")
|
|
|
|
|
or json_data["bangumiTitle"]
|
|
|
|
|
+ " "
|
|
|
|
|
+ json_data["episodeName"]
|
|
|
|
|
+ " "
|
|
|
|
|
+ json_data["title"]
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
currentVideoInfo = json_data.get("currentVideoInfo")
|
|
|
|
|
durationMillis = currentVideoInfo.get("durationMillis")
|
|
|
|
|
duration = durationMillis / 1000
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
formats = self._extract_formats(currentVideoInfo)
|
|
|
|
|
return {
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"id": video_id,
|
|
|
|
|
"title": title,
|
|
|
|
|
"duration": float_or_none(duration),
|
|
|
|
|
"formats": formats,
|
2020-10-05 06:38:25 +02:00
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
|
2020-10-05 06:38:25 +02:00
|
|
|
|
class AcfunLiveIE(BasicAcfunInfoExtractor):
|
2020-10-05 12:14:42 +02:00
|
|
|
|
_VALID_URL = r"https?://live\.acfun\.cn/live/(?P<id>\d+)"
|
2020-10-05 06:38:25 +02:00
|
|
|
|
_TEST = {
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"url": "https://live.acfun.cn/live/36782183",
|
|
|
|
|
"only_matching": True,
|
|
|
|
|
"info_dict": {
|
|
|
|
|
"id": "36782183",
|
|
|
|
|
"ext": "mp4",
|
2020-10-05 06:38:25 +02:00
|
|
|
|
# 'title': '看见兔兔就烦!',
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"is_live": True,
|
|
|
|
|
},
|
2020-10-05 06:38:25 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
live_id = self._match_id(url)
|
2020-10-05 12:14:42 +02:00
|
|
|
|
self._FAKE_HEADERS.update({"Referer": url})
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
# Firstly get _did cookie
|
|
|
|
|
fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
|
|
|
|
|
first_res = compat_urllib_request.urlopen(fisrt_req)
|
|
|
|
|
|
|
|
|
|
for header_name, header_value in first_res.info().items():
|
2020-10-05 12:14:42 +02:00
|
|
|
|
if header_name.lower() == "set-cookie":
|
2020-10-05 06:38:25 +02:00
|
|
|
|
cookies = header_value
|
|
|
|
|
if not cookies:
|
2020-10-05 12:14:42 +02:00
|
|
|
|
raise ExtractorError("Fail to fetch cookies")
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
cookies_dict = dict(c.strip(" ,").split("=", 1) for c in cookies.split(";"))
|
|
|
|
|
did_cookie = cookies_dict["_did"]
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
self._FAKE_HEADERS.update({"Cookie": "_did=%s" % did_cookie})
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
# Login to get userId and acfun.api.visitor_st
|
2020-10-05 12:14:42 +02:00
|
|
|
|
login_data = compat_urllib_parse_urlencode({"sid": "acfun.api.visitor"}).encode(
|
|
|
|
|
"ascii"
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
login_json = self._download_json(
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"https://id.app.acfun.cn/rest/app/visitor/login",
|
|
|
|
|
live_id,
|
|
|
|
|
data=login_data,
|
|
|
|
|
headers=self._FAKE_HEADERS,
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
2020-10-05 12:14:42 +02:00
|
|
|
|
streams_url = (
|
|
|
|
|
"https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s"
|
|
|
|
|
% (login_json["userId"], did_cookie, login_json["acfun.api.visitor_st"])
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
# Fetch stream lists
|
2020-10-05 12:14:42 +02:00
|
|
|
|
fetch_streams_data = compat_urllib_parse_urlencode(
|
|
|
|
|
{"authorId": int_or_none(live_id), "pullStreamType": "FLV"}
|
|
|
|
|
).encode("ascii")
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
streams_json = self._download_json(
|
2020-10-05 12:14:42 +02:00
|
|
|
|
streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS
|
|
|
|
|
)
|
2020-10-05 06:38:25 +02:00
|
|
|
|
|
|
|
|
|
# print(streams_json)
|
2020-10-05 12:14:42 +02:00
|
|
|
|
title = streams_json["data"]["caption"]
|
|
|
|
|
streams_info = json.loads(streams_json["data"]["videoPlayRes"]) # streams info
|
|
|
|
|
representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][
|
|
|
|
|
"representation"
|
|
|
|
|
]
|
|
|
|
|
|
2020-10-05 06:38:25 +02:00
|
|
|
|
formats = []
|
|
|
|
|
for stream in representation:
|
2020-10-05 12:14:42 +02:00
|
|
|
|
formats += [
|
|
|
|
|
{
|
|
|
|
|
"url": stream["url"],
|
|
|
|
|
"ext": "mp4",
|
|
|
|
|
"tbr": stream.get("bitrate"),
|
|
|
|
|
}
|
|
|
|
|
]
|
2020-10-05 06:38:25 +02:00
|
|
|
|
self._sort_formats(formats)
|
|
|
|
|
return {
|
2020-10-05 12:14:42 +02:00
|
|
|
|
"id": live_id,
|
|
|
|
|
"title": self._live_title(title),
|
|
|
|
|
"formats": formats,
|
|
|
|
|
"is_live": True,
|
|
|
|
|
}
|