[acfun] Add playlist support for normal video

2025-01-07 13:47:54 +01:00 · 2020-10-07 02:28:01 +08:00 · 2020-10-07 02:28:01 +08:00 · 345bd3b026
commit 345bd3b026
parent 1758030b08
1 changed files with 53 additions and 12 deletions
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals

 import json
+import re

 from .common import InfoExtractor
 from ..compat import (
@ -51,9 +52,10 @@ class BasicAcfunInfoExtractor(InfoExtractor):


 class AcfunIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)"
+    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>\d+)(?P<page_id>[_\d]+)?"
    _TESTS = [
        {
+            "note": "single video without playlist",
            "url": "https://www.acfun.cn/v/ac18184362",
            "info_dict": {
                "id": "18184362",
@ -65,9 +67,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
            },
        },
        {
+            "note": "single video in playlist",
            "url": "https://www.acfun.cn/v/ac17532274_3",
            "info_dict": {
-                "id": "17532274_3",
+                "id": "17532274",
                "ext": "mp4",
                "duration": 233.770,
                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵",
@ -75,30 +78,68 @@ class AcfunIE(BasicAcfunInfoExtractor):
                "uploader_id": 23682490,
            },
        },
+        {
+            "note": "multiple video with playlist",
+            "url": "https://www.acfun.cn/v/ac17532274",
+            "info_dict": {
+                "id": "17532274",
+                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！",
+                "uploader": "AC娘本体",
+                "uploader_id": 23682490,
+            },
+            "playlist_count": 5
+        }
    ]

    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        video_id, page_id = re.match(self._VALID_URL, url).groups()
+
        webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)

        json_text = self._html_search_regex(
            r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
        )
-        json_data = json.loads(json_text)
+        json_data = json.loads(json_text)   

        title = json_data["title"]
+
+        uploader = str_or_none(json_data.get("user").get("name"))
+        uploader_id = str_to_int(json_data.get("user").get("id"))       
+
+        videoList = json_data.get('videoList')
+        if videoList:
+            video_num = len(videoList)
+        
+        if not page_id and video_num and video_num > 1:
+            if not self._downloader.params.get('noplaylist'):
+                self.to_screen('Downloading all pages %s - add --no-playlist to just download video' % video_id)
+                entries = [self.url_result( 
+                    '%s_%d' % (url, pid), 
+                    self.IE_NAME, 
+                    video_id='%s_%d' % (video_id, pid)) 
+                    for pid in range(1, video_num+1)]
+                playlist = self.playlist_result(entries, video_id, title)
+                playlist.update({
+                    'uploader': uploader,
+                    'uploader_id': uploader_id,
+                })
+                return playlist
+                          
+            self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 
+
        p_title = self._html_search_regex(
            r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
            webpage,
            "p_title",
            default=None,
-        )
+        )        
+
        if p_title:
-            title = "%s - %s" % (title, p_title)
-
-        uploader = json_data.get("user").get("name")
-        uploader_id = json_data.get("user").get("id")
+            title = "%s-%s" % (title, p_title)     

+        if page_id:
+            video_id += page_id             
+            
        currentVideoInfo = json_data.get("currentVideoInfo")
        durationMillis = currentVideoInfo.get("durationMillis")
        duration = float_or_none(durationMillis) / 1000.0
@ -106,10 +147,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
        formats = self._extract_formats(currentVideoInfo)
        return {
            "id": video_id,
-            "uploader_id": str_to_int(uploader_id),
+            "uploader_id": uploader_id,
            "title": title,
-            "uploader": str_or_none(uploader),
-            "duration": float_or_none(duration),
+            "uploader": uploader,
+            "duration": duration,
            "formats": formats,
        }