[acfun] Add playlist support for normal video

2025-02-18 18:17:55 +01:00 · 2020-10-07 02:28:01 +08:00 · 2020-10-07 02:28:01 +08:00 · 345bd3b026
commit 345bd3b026
parent 1758030b08
1 changed files with 53 additions and 12 deletions
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..compat import (
@ -51,9 +52,10 @@ class BasicAcfunInfoExtractor(InfoExtractor):
 class AcfunIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)"
+    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>\d+)(?P<page_id>[_\d]+)?"
    _TESTS = [
        {
            "note": "single video without playlist",
            "url": "https://www.acfun.cn/v/ac18184362",
            "info_dict": {
                "id": "18184362",
@ -65,9 +67,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
            },
        },
        {
            "note": "single video in playlist",
            "url": "https://www.acfun.cn/v/ac17532274_3",
            "info_dict": {
-                "id": "17532274_3",
+                "id": "17532274",
                "ext": "mp4",
                "duration": 233.770,
                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵",
@ -75,30 +78,68 @@ class AcfunIE(BasicAcfunInfoExtractor):
                "uploader_id": 23682490,
            },
        },
        {
            "note": "multiple video with playlist",
            "url": "https://www.acfun.cn/v/ac17532274",
            "info_dict": {
                "id": "17532274",
                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！",
                "uploader": "AC娘本体",
                "uploader_id": 23682490,
            },
            "playlist_count": 5
        }
    ]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        video_id, page_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
        json_text = self._html_search_regex(
            r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
        )
-        json_data = json.loads(json_text)
+        json_data = json.loads(json_text)   
        title = json_data["title"]
        uploader = str_or_none(json_data.get("user").get("name"))
        uploader_id = str_to_int(json_data.get("user").get("id"))       
        videoList = json_data.get('videoList')
        if videoList:
            video_num = len(videoList)
        if not page_id and video_num and video_num > 1:
            if not self._downloader.params.get('noplaylist'):
                self.to_screen('Downloading all pages %s - add --no-playlist to just download video' % video_id)
                entries = [self.url_result( 
                    '%s_%d' % (url, pid), 
                    self.IE_NAME, 
                    video_id='%s_%d' % (video_id, pid)) 
                    for pid in range(1, video_num+1)]
                playlist = self.playlist_result(entries, video_id, title)
                playlist.update({
                    'uploader': uploader,
                    'uploader_id': uploader_id,
                })
                return playlist
            self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 
        p_title = self._html_search_regex(
            r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
            webpage,
            "p_title",
            default=None,
-        )
+        )        
        if p_title:
-            title = "%s - %s" % (title, p_title)
+            title = "%s-%s" % (title, p_title)     
        uploader = json_data.get("user").get("name")
        uploader_id = json_data.get("user").get("id")
        if page_id:
            video_id += page_id             
        currentVideoInfo = json_data.get("currentVideoInfo")
        durationMillis = currentVideoInfo.get("durationMillis")
        duration = float_or_none(durationMillis) / 1000.0
@ -106,10 +147,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
        formats = self._extract_formats(currentVideoInfo)
        return {
            "id": video_id,
-            "uploader_id": str_to_int(uploader_id),
+            "uploader_id": uploader_id,
            "title": title,
-            "uploader": str_or_none(uploader),
+            "uploader": uploader,
-            "duration": float_or_none(duration),
+            "duration": duration,
            "formats": formats,
        }