From 345bd3b0264ad82f5a91c3d027bb49b6c1d6f7ee Mon Sep 17 00:00:00 2001 From: johnsmith2077 Date: Wed, 7 Oct 2020 02:28:01 +0800 Subject: [PATCH] [acfun] Add playlist support for normal video --- youtube_dl/extractor/acfun.py | 65 ++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py index fab83a4e3..004898990 100644 --- a/youtube_dl/extractor/acfun.py +++ b/youtube_dl/extractor/acfun.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..compat import ( @@ -51,9 +52,10 @@ class BasicAcfunInfoExtractor(InfoExtractor): class AcfunIE(BasicAcfunInfoExtractor): - _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P[_\d]+)" + _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P\d+)(?P[_\d]+)?" _TESTS = [ { + "note": "single video without playlist", "url": "https://www.acfun.cn/v/ac18184362", "info_dict": { "id": "18184362", @@ -65,9 +67,10 @@ class AcfunIE(BasicAcfunInfoExtractor): }, }, { + "note": "single video in playlist", "url": "https://www.acfun.cn/v/ac17532274_3", "info_dict": { - "id": "17532274_3", + "id": "17532274", "ext": "mp4", "duration": 233.770, "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP:七夕恋歌!落入本娘爱的陷阱! - TRAP 阿婵", @@ -75,30 +78,68 @@ class AcfunIE(BasicAcfunInfoExtractor): "uploader_id": 23682490, }, }, + { + "note": "multiple video with playlist", + "url": "https://www.acfun.cn/v/ac17532274", + "info_dict": { + "id": "17532274", + "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP:七夕恋歌!落入本娘爱的陷阱!", + "uploader": "AC娘本体", + "uploader_id": 23682490, + }, + "playlist_count": 5 + } ] def _real_extract(self, url): - video_id = self._match_id(url) + video_id, page_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS) json_text = self._html_search_regex( r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text" ) - json_data = json.loads(json_text) + json_data = json.loads(json_text) title = json_data["title"] + + uploader = str_or_none(json_data.get("user").get("name")) + uploader_id = str_to_int(json_data.get("user").get("id")) + + videoList = json_data.get('videoList') + if videoList: + video_num = len(videoList) + + if not page_id and video_num and video_num > 1: + if not self._downloader.params.get('noplaylist'): + self.to_screen('Downloading all pages %s - add --no-playlist to just download video' % video_id) + entries = [self.url_result( + '%s_%d' % (url, pid), + self.IE_NAME, + video_id='%s_%d' % (video_id, pid)) + for pid in range(1, video_num+1)] + playlist = self.playlist_result(entries, video_id, title) + playlist.update({ + 'uploader': uploader, + 'uploader_id': uploader_id, + }) + return playlist + + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + p_title = self._html_search_regex( r"(.*?)", webpage, "p_title", default=None, - ) + ) + if p_title: - title = "%s - %s" % (title, p_title) - - uploader = json_data.get("user").get("name") - uploader_id = json_data.get("user").get("id") + title = "%s-%s" % (title, p_title) + if page_id: + video_id += page_id + currentVideoInfo = json_data.get("currentVideoInfo") durationMillis = currentVideoInfo.get("durationMillis") duration = float_or_none(durationMillis) / 1000.0 @@ -106,10 +147,10 @@ class AcfunIE(BasicAcfunInfoExtractor): formats = self._extract_formats(currentVideoInfo) return { "id": video_id, - "uploader_id": str_to_int(uploader_id), + "uploader_id": uploader_id, "title": title, - "uploader": str_or_none(uploader), - "duration": float_or_none(duration), + "uploader": uploader, + "duration": duration, "formats": formats, }