1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 16:44:32 +01:00

[acfun] Add playlist support for bangumi

This commit is contained in:
johnsmith2077 2020-10-07 14:38:35 +08:00
parent d96564c350
commit 165ff0aab2

View File

@ -1,20 +1,18 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time
import json import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
str_or_none, str_or_none,
str_to_int, str_to_int,
sanitized_Request, sanitized_Request,
urlencode_postdata,
ExtractorError, ExtractorError,
) )
@ -36,7 +34,8 @@ class BasicAcfunInfoExtractor(InfoExtractor):
formats = [] formats = []
for stream in representation: for stream in representation:
size = float_or_none(durationMillis) * stream["avgBitrate"] / 8 avgByterate = float_or_none(stream.get("avgBitrate"), 8)
size = float_or_none(durationMillis, invscale=avgByterate)
formats += [ formats += [
{ {
"url": stream["url"], "url": stream["url"],
@ -79,7 +78,7 @@ class AcfunIE(BasicAcfunInfoExtractor):
}, },
}, },
{ {
"note": "multiple video with playlist", "note": "multiple video within playlist",
"url": "https://www.acfun.cn/v/ac17532274", "url": "https://www.acfun.cn/v/ac17532274",
"info_dict": { "info_dict": {
"id": "17532274", "id": "17532274",
@ -113,8 +112,8 @@ class AcfunIE(BasicAcfunInfoExtractor):
if not page_id and video_num and video_num > 1: if not page_id and video_num and video_num > 1:
if not self._downloader.params.get("noplaylist"): if not self._downloader.params.get("noplaylist"):
self.to_screen( self.to_screen(
"Downloading all pages %s - add --no-playlist to just download video" "Downloading all pages of %s(ac%s) - add --no-playlist to just download video"
% video_id % (title, video_id)
) )
entries = [ entries = [
self.url_result( self.url_result(
@ -134,7 +133,8 @@ class AcfunIE(BasicAcfunInfoExtractor):
return playlist return playlist
self.to_screen( self.to_screen(
"Downloading just video %s because of --no-playlist" % video_id "Downloading just video %s(ac%s) because of --no-playlist"
% (title, video_id)
) )
p_title = self._html_search_regex( p_title = self._html_search_regex(
@ -152,7 +152,7 @@ class AcfunIE(BasicAcfunInfoExtractor):
currentVideoInfo = json_data.get("currentVideoInfo") currentVideoInfo = json_data.get("currentVideoInfo")
durationMillis = currentVideoInfo.get("durationMillis") durationMillis = currentVideoInfo.get("durationMillis")
duration = float_or_none(durationMillis) / 1000.0 duration = float_or_none(durationMillis, 1000)
formats = self._extract_formats(currentVideoInfo) formats = self._extract_formats(currentVideoInfo)
return { return {
@ -166,8 +166,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
class AcfunBangumiIE(BasicAcfunInfoExtractor): class AcfunBangumiIE(BasicAcfunInfoExtractor):
_VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)" _VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P<id>\d+)(?P<episode_id>[_\d]+)?"
_TEST = { _TESTS = [
{
"note": "single episode",
"url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679", "url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679",
"info_dict": { "info_dict": {
"id": "6002917_36188_1748679", "id": "6002917_36188_1748679",
@ -175,35 +177,82 @@ class AcfunBangumiIE(BasicAcfunInfoExtractor):
"duration": 1437.076, "duration": 1437.076,
"title": "租借女友 第12话 告白和女友", "title": "租借女友 第12话 告白和女友",
}, },
} },
{
"note": "all episodes of bangumi",
"url": "https://www.acfun.cn/bangumi/aa6002917",
"info_dict": {
"id": "6002917",
"title": "租借女友",
},
"playlist_count": 12,
},
]
_TEMPLATE_URL = "https://www.acfun.cn/bangumi/aa%s%s"
_FETCH_EPISODES_URL = "https://www.acfun.cn/bangumi/aa%s?pagelets=pagelet_partlist&reqID=0&ajaxpipe=1&t=%d"
def _all_episodes(self, bangumi_id):
timestamp = int_or_none(float_or_none(time.time(), invscale=1000))
print("Timestamp: ", timestamp)
webpage = self._download_webpage(
self._FETCH_EPISODES_URL % (bangumi_id, timestamp),
bangumi_id,
headers=self._FAKE_HEADERS,
)
entries = [
self.url_result(self._TEMPLATE_URL % (bangumi_id, eid), self.IE_NAME, eid)
for eid in re.findall(
r"data-href=./bangumi/aa%s([_\d]+)." % bangumi_id, webpage
)
]
return entries
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) bangumi_id, episode_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS) webpage = self._download_webpage(url, bangumi_id, headers=self._FAKE_HEADERS)
json_text = self._html_search_regex( json_text = self._html_search_regex(
r"(?s)bangumiData\s*=\s*(\{.*?\});", webpage, "json_text" r"(?s)bangumiData\s*=\s*(\{.*?\});", webpage, "json_text"
) )
json_data = json.loads(json_text) json_data = json.loads(json_text)
title = ( bangumiTitle = json_data["bangumiTitle"]
json_data.get("showTitle")
or json_data["bangumiTitle"] if not episode_id:
+ " " if not self._downloader.params.get("noplaylist"):
+ json_data["episodeName"] self.to_screen(
+ " " "Downloading all episodes of %s(aa%s) - add --no-playlist to just download first episode"
+ json_data["title"] % (bangumiTitle, bangumi_id)
)
playlist = self.playlist_result(
self._all_episodes(bangumi_id), bangumi_id, bangumiTitle
)
return playlist
self.to_screen(
"Downloading just first episode %s(aa%s) because of --no-playlist"
% (bangumiTitle, bangumi_id)
)
title = json_data.get("showTitle") or "%s %s %s" % (
json_data["bangumiTitle"],
json_data["episodeName"],
json_data["title"],
) )
currentVideoInfo = json_data.get("currentVideoInfo") currentVideoInfo = json_data.get("currentVideoInfo")
durationMillis = currentVideoInfo.get("durationMillis") durationMillis = currentVideoInfo.get("durationMillis")
duration = float_or_none(durationMillis) / 1000.0 duration = float_or_none(durationMillis, 1000)
if episode_id:
bangumi_id += episode_id
formats = self._extract_formats(currentVideoInfo) formats = self._extract_formats(currentVideoInfo)
return { return {
"id": video_id, "id": bangumi_id,
"title": title, "title": title,
"duration": float_or_none(duration), "duration": duration,
"formats": formats, "formats": formats,
} }
@ -211,23 +260,31 @@ class AcfunBangumiIE(BasicAcfunInfoExtractor):
class AcfunLiveIE(BasicAcfunInfoExtractor): class AcfunLiveIE(BasicAcfunInfoExtractor):
_VALID_URL = r"https?://live\.acfun\.cn/live/(?P<id>\d+)" _VALID_URL = r"https?://live\.acfun\.cn/live/(?P<id>\d+)"
_TEST = { _TEST = {
"url": "https://live.acfun.cn/live/36782183", "url": "https://live.acfun.cn/live/34195163",
"only_matching": True,
"info_dict": { "info_dict": {
"id": "36782183", "id": "34195163",
"ext": "mp4", "ext": "mp4",
# 'title': '看见兔兔就烦!', "title": r"re:^晴心Haruko \d{4}-\d{2}-\d{2} \d{2}:\d{2}$",
"is_live": True, "is_live": True,
}, },
"only_matching": True,
} }
_LOGIN_URL = "https://id.app.acfun.cn/rest/app/visitor/login"
_STREAMS_URL = "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s"
def _real_extract(self, url): def _real_extract(self, url):
live_id = self._match_id(url) live_id = self._match_id(url)
self._FAKE_HEADERS.update({"Referer": url}) self._FAKE_HEADERS.update({"Referer": url})
# Firstly get _did cookie # Firstly fetch _did cookie and streamer name(use for title)
fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS) first_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
first_res = compat_urllib_request.urlopen(fisrt_req) webpage, first_res = self._download_webpage_handle(first_req, live_id)
live_up_name = self._html_search_regex(
r"<a [^>]*?class[^>]*?up-name[^>]*?>([^<]*?)</a>",
webpage,
"live_up_name",
)
for header_name, header_value in first_res.info().items(): for header_name, header_value in first_res.info().items():
if header_name.lower() == "set-cookie": if header_name.lower() == "set-cookie":
@ -241,25 +298,24 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
self._FAKE_HEADERS.update({"Cookie": "_did=%s" % did_cookie}) self._FAKE_HEADERS.update({"Cookie": "_did=%s" % did_cookie})
# Login to get userId and acfun.api.visitor_st # Login to get userId and acfun.api.visitor_st
login_data = compat_urllib_parse_urlencode({"sid": "acfun.api.visitor"}).encode( login_data = urlencode_postdata({"sid": "acfun.api.visitor"})
"ascii"
)
login_json = self._download_json( login_json = self._download_json(
"https://id.app.acfun.cn/rest/app/visitor/login", self._LOGIN_URL,
live_id, live_id,
data=login_data, data=login_data,
headers=self._FAKE_HEADERS, headers=self._FAKE_HEADERS,
) )
streams_url = ( streams_url = self._STREAMS_URL % (
"https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s" login_json["userId"],
% (login_json["userId"], did_cookie, login_json["acfun.api.visitor_st"]) did_cookie,
login_json["acfun.api.visitor_st"],
) )
# Fetch stream lists # Fetch stream lists
fetch_streams_data = compat_urllib_parse_urlencode( fetch_streams_data = urlencode_postdata(
{"authorId": int_or_none(live_id), "pullStreamType": "FLV"} {"authorId": int_or_none(live_id), "pullStreamType": "FLV"}
).encode("ascii") )
streams_json = self._download_json( streams_json = self._download_json(
streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS
@ -270,8 +326,7 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
except AssertionError: except AssertionError:
raise ExtractorError("This live room is currently closed") raise ExtractorError("This live room is currently closed")
title = streams_json["data"]["caption"] streams_info = json.loads(streams_json["data"]["videoPlayRes"])
streams_info = json.loads(streams_json["data"]["videoPlayRes"]) # streams info
representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][ representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][
"representation" "representation"
] ]
@ -288,7 +343,7 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
"id": live_id, "id": live_id,
"title": self._live_title(title), "title": self._live_title(live_up_name),
"formats": formats, "formats": formats,
"is_live": True, "is_live": True,
} }