1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-29 19:47:54 +01:00

[acfun] Add playlist support for normal video

This commit is contained in:
johnsmith2077 2020-10-07 02:28:01 +08:00
parent 1758030b08
commit 345bd3b026

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -51,9 +52,10 @@ class BasicAcfunInfoExtractor(InfoExtractor):
class AcfunIE(BasicAcfunInfoExtractor): class AcfunIE(BasicAcfunInfoExtractor):
_VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)" _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>\d+)(?P<page_id>[_\d]+)?"
_TESTS = [ _TESTS = [
{ {
"note": "single video without playlist",
"url": "https://www.acfun.cn/v/ac18184362", "url": "https://www.acfun.cn/v/ac18184362",
"info_dict": { "info_dict": {
"id": "18184362", "id": "18184362",
@ -65,9 +67,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
}, },
}, },
{ {
"note": "single video in playlist",
"url": "https://www.acfun.cn/v/ac17532274_3", "url": "https://www.acfun.cn/v/ac17532274_3",
"info_dict": { "info_dict": {
"id": "17532274_3", "id": "17532274",
"ext": "mp4", "ext": "mp4",
"duration": 233.770, "duration": 233.770,
"title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP七夕恋歌落入本娘爱的陷阱 - TRAP 阿婵", "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP七夕恋歌落入本娘爱的陷阱 - TRAP 阿婵",
@ -75,30 +78,68 @@ class AcfunIE(BasicAcfunInfoExtractor):
"uploader_id": 23682490, "uploader_id": 23682490,
}, },
}, },
{
"note": "multiple video with playlist",
"url": "https://www.acfun.cn/v/ac17532274",
"info_dict": {
"id": "17532274",
"title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP七夕恋歌落入本娘爱的陷阱",
"uploader": "AC娘本体",
"uploader_id": 23682490,
},
"playlist_count": 5
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, page_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS) webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
json_text = self._html_search_regex( json_text = self._html_search_regex(
r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text" r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
) )
json_data = json.loads(json_text) json_data = json.loads(json_text)
title = json_data["title"] title = json_data["title"]
uploader = str_or_none(json_data.get("user").get("name"))
uploader_id = str_to_int(json_data.get("user").get("id"))
videoList = json_data.get('videoList')
if videoList:
video_num = len(videoList)
if not page_id and video_num and video_num > 1:
if not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading all pages %s - add --no-playlist to just download video' % video_id)
entries = [self.url_result(
'%s_%d' % (url, pid),
self.IE_NAME,
video_id='%s_%d' % (video_id, pid))
for pid in range(1, video_num+1)]
playlist = self.playlist_result(entries, video_id, title)
playlist.update({
'uploader': uploader,
'uploader_id': uploader_id,
})
return playlist
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
p_title = self._html_search_regex( p_title = self._html_search_regex(
r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>", r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
webpage, webpage,
"p_title", "p_title",
default=None, default=None,
) )
if p_title: if p_title:
title = "%s - %s" % (title, p_title) title = "%s-%s" % (title, p_title)
uploader = json_data.get("user").get("name")
uploader_id = json_data.get("user").get("id")
if page_id:
video_id += page_id
currentVideoInfo = json_data.get("currentVideoInfo") currentVideoInfo = json_data.get("currentVideoInfo")
durationMillis = currentVideoInfo.get("durationMillis") durationMillis = currentVideoInfo.get("durationMillis")
duration = float_or_none(durationMillis) / 1000.0 duration = float_or_none(durationMillis) / 1000.0
@ -106,10 +147,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
formats = self._extract_formats(currentVideoInfo) formats = self._extract_formats(currentVideoInfo)
return { return {
"id": video_id, "id": video_id,
"uploader_id": str_to_int(uploader_id), "uploader_id": uploader_id,
"title": title, "title": title,
"uploader": str_or_none(uploader), "uploader": uploader,
"duration": float_or_none(duration), "duration": duration,
"formats": formats, "formats": formats,
} }