From cef5fe698a0d571f4b6c3bd35367abc5c759196c Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Mon, 5 Oct 2020 12:38:25 +0800
Subject: [PATCH 1/7] [acfun] Add new extractor

---
 youtube_dl/extractor/acfun.py      | 220 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   5 +
 2 files changed, 225 insertions(+)
 create mode 100644 youtube_dl/extractor/acfun.py
diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
new file mode 100644
index 000000000..cae8bbe8b
--- /dev/null
+++ b/youtube_dl/extractor/acfun.py
@@ -0,0 +1,220 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_cookiejar,
+    compat_urllib_parse_urlencode,
+    compat_urllib_request,
+)
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    str_or_none,
+    str_to_int,
+    sanitized_Request,
+    ExtractorError,
+)
+
+class BasicAcfunInfoExtractor(InfoExtractor):
+    _FAKE_HEADERS = {
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',  # noqa
+        'Accept-Charset': 'UTF-8,*;q=0.5',
+        'Accept-Encoding': 'gzip,deflate,sdch',
+        'Accept-Language': 'en-US,en;q=0.8',
+        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',  # noqa
+    }    
+    def _extract_formats(self, currentVideoInfo):
+        durationMillis = currentVideoInfo.get('durationMillis')
+        if 'ksPlayJson' in currentVideoInfo:
+            ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
+            representation = ksPlayJson.get('adaptationSet')[0].get('representation')
+
+        formats = []
+        for stream in representation:
+            size = float_or_none(durationMillis) * stream["avgBitrate"] / 8
+            stream_id = stream["qualityLabel"]
+            quality = stream["qualityType"]
+            formats += [{
+                'url': stream["url"],
+                'ext': 'mp4',
+                'width': stream.get('width'),
+                'height': stream.get('height'),
+                'filesize': size,
+            }]
+        formats = formats[::-1]  
+        self._sort_formats(formats)
+        return formats        
+
+class AcfunIE(BasicAcfunInfoExtractor):
+    _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
+    _TESTS = [
+        {
+            'url': 'https://www.acfun.cn/v/ac18184362',
+            'info_dict': {
+                'id': '18184362',
+                'ext': 'mp4',
+                'duration': 192.042,
+                'title': '【AC娘】魔性新单《极乐857》上线！来和AC娘一起云蹦迪吧！',
+                'uploader': 'AC娘本体',
+                'uploader_id': 23682490
+            }
+        },
+        {
+            'url': 'https://www.acfun.cn/v/ac17532274_3',
+            'info_dict': {
+                'id': '17532274_3',
+                'ext': 'mp4',
+                'duration': 233.770,
+                'title': '【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵',
+                'uploader': 'AC娘本体',
+                'uploader_id': 23682490
+            }
+        }
+    ]  
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
+
+        json_text = self._html_search_regex(r'(?s)videoInfo\s*=\s*(\{.*?\});', webpage, 'json_text')
+        json_data = json.loads(json_text)
+
+        title = json_data['title']
+        p_title = self._html_search_regex(r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>", webpage, 'p_title', default=None)
+        if p_title:
+            title = '%s - %s' % (title, p_title)                  
+
+        uploader = json_data.get('user').get('name')
+        uploader_id = json_data.get('user').get('id')
+
+        currentVideoInfo = json_data.get('currentVideoInfo')
+        durationMillis = currentVideoInfo.get('durationMillis')
+        duration = durationMillis / 1000    
+    
+        formats = self._extract_formats(currentVideoInfo)
+        return {
+            'id': video_id,
+            'uploader_id': str_to_int(uploader_id),
+            'title': title,
+            'uploader': str_or_none(uploader),
+            'duration': float_or_none(duration),
+            'formats': formats
+        }
+
+
+class AcfunBangumiIE(BasicAcfunInfoExtractor):
+    _VALID_URL = r'https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)'
+    _TEST = {
+        'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1748679',
+        'info_dict': {
+            'id': '6002917_36188_1748679',
+            'ext': 'mp4',
+            'duration': 1437.076,
+            'title': '租借女友 第12话 告白和女友',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
+
+        json_text = self._html_search_regex(r'(?s)bangumiData\s*=\s*(\{.*?\});', webpage, 'json_text')
+        json_data = json.loads(json_text)
+
+        title = json_data.get('showTitle') or json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
+
+        currentVideoInfo = json_data.get('currentVideoInfo')
+        durationMillis = currentVideoInfo.get('durationMillis')
+        duration = durationMillis / 1000    
+
+        formats = self._extract_formats(currentVideoInfo)
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': float_or_none(duration),
+            'formats': formats
+        }
+
+class AcfunLiveIE(BasicAcfunInfoExtractor):
+    _VALID_URL = r'https?://live\.acfun\.cn/live/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://live.acfun.cn/live/36782183',
+        'only_matching': True,
+        'info_dict': {
+            'id': '36782183',
+            'ext': 'mp4',
+            # 'title': '看见兔兔就烦！',
+            'is_live': True,
+        } 
+    }
+
+    def _real_extract(self, url):
+        live_id = self._match_id(url)
+        self._FAKE_HEADERS.update({
+            'Referer': url
+        })
+
+        # Firstly get _did cookie
+        fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
+        first_res = compat_urllib_request.urlopen(fisrt_req)
+
+        for header_name, header_value in first_res.info().items():
+            if header_name.lower() == 'set-cookie':
+                cookies = header_value
+        if not cookies:
+            raise ExtractorError('Fail to fetch cookies')
+
+        cookies_dict = dict(c.strip(' ,').split('=', 1) for c in cookies.split(';'))
+        did_cookie = cookies_dict['_did']        
+
+        self._FAKE_HEADERS.update({
+            'Cookie': '_did=%s' % did_cookie
+        })        
+
+        # Login to get userId and acfun.api.visitor_st
+        login_data = compat_urllib_parse_urlencode({'sid': 'acfun.api.visitor'}).encode('ascii')
+        login_json = self._download_json(
+            'https://id.app.acfun.cn/rest/app/visitor/login', 
+            live_id, 
+            data=login_data, 
+            headers=self._FAKE_HEADERS)
+
+        streams_url = "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s" % (
+            login_json['userId'], 
+            did_cookie, login_json['acfun.api.visitor_st'])
+
+        # Fetch stream lists
+        fetch_streams_data = compat_urllib_parse_urlencode({
+            'authorId':  int_or_none(live_id),
+            'pullStreamType': 'FLV'    
+        }).encode('ascii')
+
+        streams_json = self._download_json(
+            streams_url, 
+            live_id,  
+            data=fetch_streams_data, 
+            headers=self._FAKE_HEADERS)
+
+        # print(streams_json)
+        title = streams_json['data']['caption']
+        streams_info = json.loads(streams_json['data']['videoPlayRes']) # streams info
+        representation = streams_info['liveAdaptiveManifest'][0]['adaptationSet']['representation']
+        
+        formats = []
+        for stream in representation:
+            quality = stream["qualityType"]
+            formats += [{
+                'url': stream["url"],
+                'ext': 'mp4',
+                'tbr': stream.get('bitrate'),
+            }]  
+        self._sort_formats(formats)
+        return {
+            'id': live_id,
+            'title': self._live_title(title),
+            'formats': formats,
+            'is_live': True
+        }
\ No newline at end of file
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index ae7079a6a..7023ca3d9 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -18,6 +18,11 @@ from .acast import (
     ACastIE,
     ACastChannelIE,
 )
+from .acfun import (
+    AcfunIE,
+    AcfunBangumiIE,
+    AcfunLiveIE,
+)
 from .adn import ADNIE
 from .adobeconnect import AdobeConnectIE
 from .adobetv import (

From 6bbb9d0ae886a36739e0cf146e5bb1734e832a12 Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Mon, 5 Oct 2020 18:14:42 +0800
Subject: [PATCH 2/7] [acfun] Re-format code to pass flake8

---
 youtube_dl/extractor/acfun.py | 269 ++++++++++++++++++----------------
 1 file changed, 144 insertions(+), 125 deletions(-)

diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
index cae8bbe8b..bf4945ecc 100644
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@@ -5,7 +5,6 @@ import json
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_cookiejar,
     compat_urllib_parse_urlencode,
     compat_urllib_request,
 )
@@ -18,203 +17,223 @@ from ..utils import (
     ExtractorError,
 )
 
+
 class BasicAcfunInfoExtractor(InfoExtractor):
     _FAKE_HEADERS = {
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',  # noqa
-        'Accept-Charset': 'UTF-8,*;q=0.5',
-        'Accept-Encoding': 'gzip,deflate,sdch',
-        'Accept-Language': 'en-US,en;q=0.8',
-        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0',  # noqa
-    }    
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",  # noqa
+        "Accept-Charset": "UTF-8,*;q=0.5",
+        "Accept-Encoding": "gzip,deflate,sdch",
+        "Accept-Language": "en-US,en;q=0.8",
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",  # noqa
+    }
+
     def _extract_formats(self, currentVideoInfo):
-        durationMillis = currentVideoInfo.get('durationMillis')
-        if 'ksPlayJson' in currentVideoInfo:
-            ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
-            representation = ksPlayJson.get('adaptationSet')[0].get('representation')
+        durationMillis = currentVideoInfo.get("durationMillis")
+        if "ksPlayJson" in currentVideoInfo:
+            ksPlayJson = ksPlayJson = json.loads(currentVideoInfo["ksPlayJson"])
+            representation = ksPlayJson.get("adaptationSet")[0].get("representation")
 
         formats = []
         for stream in representation:
             size = float_or_none(durationMillis) * stream["avgBitrate"] / 8
-            stream_id = stream["qualityLabel"]
-            quality = stream["qualityType"]
-            formats += [{
-                'url': stream["url"],
-                'ext': 'mp4',
-                'width': stream.get('width'),
-                'height': stream.get('height'),
-                'filesize': size,
-            }]
-        formats = formats[::-1]  
+            formats += [
+                {
+                    "url": stream["url"],
+                    "ext": "mp4",
+                    "width": stream.get("width"),
+                    "height": stream.get("height"),
+                    "filesize": size,
+                }
+            ]
+        formats = formats[::-1]
         self._sort_formats(formats)
-        return formats        
+        return formats
+
 
 class AcfunIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
+    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)"
     _TESTS = [
         {
-            'url': 'https://www.acfun.cn/v/ac18184362',
-            'info_dict': {
-                'id': '18184362',
-                'ext': 'mp4',
-                'duration': 192.042,
-                'title': '【AC娘】魔性新单《极乐857》上线！来和AC娘一起云蹦迪吧！',
-                'uploader': 'AC娘本体',
-                'uploader_id': 23682490
-            }
+            "url": "https://www.acfun.cn/v/ac18184362",
+            "info_dict": {
+                "id": "18184362",
+                "ext": "mp4",
+                "duration": 192.042,
+                "title": "【AC娘】魔性新单《极乐857》上线！来和AC娘一起云蹦迪吧！",
+                "uploader": "AC娘本体",
+                "uploader_id": 23682490,
+            },
         },
         {
-            'url': 'https://www.acfun.cn/v/ac17532274_3',
-            'info_dict': {
-                'id': '17532274_3',
-                'ext': 'mp4',
-                'duration': 233.770,
-                'title': '【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵',
-                'uploader': 'AC娘本体',
-                'uploader_id': 23682490
-            }
-        }
-    ]  
+            "url": "https://www.acfun.cn/v/ac17532274_3",
+            "info_dict": {
+                "id": "17532274_3",
+                "ext": "mp4",
+                "duration": 233.770,
+                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵",
+                "uploader": "AC娘本体",
+                "uploader_id": 23682490,
+            },
+        },
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
 
-        json_text = self._html_search_regex(r'(?s)videoInfo\s*=\s*(\{.*?\});', webpage, 'json_text')
+        json_text = self._html_search_regex(
+            r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
+        )
         json_data = json.loads(json_text)
 
-        title = json_data['title']
-        p_title = self._html_search_regex(r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>", webpage, 'p_title', default=None)
+        title = json_data["title"]
+        p_title = self._html_search_regex(
+            r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
+            webpage,
+            "p_title",
+            default=None,
+        )
         if p_title:
-            title = '%s - %s' % (title, p_title)                  
+            title = "%s - %s" % (title, p_title)
 
-        uploader = json_data.get('user').get('name')
-        uploader_id = json_data.get('user').get('id')
+        uploader = json_data.get("user").get("name")
+        uploader_id = json_data.get("user").get("id")
+
+        currentVideoInfo = json_data.get("currentVideoInfo")
+        durationMillis = currentVideoInfo.get("durationMillis")
+        duration = durationMillis / 1000
 
-        currentVideoInfo = json_data.get('currentVideoInfo')
-        durationMillis = currentVideoInfo.get('durationMillis')
-        duration = durationMillis / 1000    
-    
         formats = self._extract_formats(currentVideoInfo)
         return {
-            'id': video_id,
-            'uploader_id': str_to_int(uploader_id),
-            'title': title,
-            'uploader': str_or_none(uploader),
-            'duration': float_or_none(duration),
-            'formats': formats
+            "id": video_id,
+            "uploader_id": str_to_int(uploader_id),
+            "title": title,
+            "uploader": str_or_none(uploader),
+            "duration": float_or_none(duration),
+            "formats": formats,
         }
 
 
 class AcfunBangumiIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r'https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)'
+    _VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)"
     _TEST = {
-        'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1748679',
-        'info_dict': {
-            'id': '6002917_36188_1748679',
-            'ext': 'mp4',
-            'duration': 1437.076,
-            'title': '租借女友 第12话 告白和女友',
-        }
+        "url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679",
+        "info_dict": {
+            "id": "6002917_36188_1748679",
+            "ext": "mp4",
+            "duration": 1437.076,
+            "title": "租借女友 第12话 告白和女友",
+        },
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
 
-        json_text = self._html_search_regex(r'(?s)bangumiData\s*=\s*(\{.*?\});', webpage, 'json_text')
+        json_text = self._html_search_regex(
+            r"(?s)bangumiData\s*=\s*(\{.*?\});", webpage, "json_text"
+        )
         json_data = json.loads(json_text)
 
-        title = json_data.get('showTitle') or json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
+        title = (
+            json_data.get("showTitle")
+            or json_data["bangumiTitle"]
+            + " "
+            + json_data["episodeName"]
+            + " "
+            + json_data["title"]
+        )
 
-        currentVideoInfo = json_data.get('currentVideoInfo')
-        durationMillis = currentVideoInfo.get('durationMillis')
-        duration = durationMillis / 1000    
+        currentVideoInfo = json_data.get("currentVideoInfo")
+        durationMillis = currentVideoInfo.get("durationMillis")
+        duration = durationMillis / 1000
 
         formats = self._extract_formats(currentVideoInfo)
         return {
-            'id': video_id,
-            'title': title,
-            'duration': float_or_none(duration),
-            'formats': formats
+            "id": video_id,
+            "title": title,
+            "duration": float_or_none(duration),
+            "formats": formats,
         }
 
+
 class AcfunLiveIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r'https?://live\.acfun\.cn/live/(?P<id>\d+)'
+    _VALID_URL = r"https?://live\.acfun\.cn/live/(?P<id>\d+)"
     _TEST = {
-        'url': 'https://live.acfun.cn/live/36782183',
-        'only_matching': True,
-        'info_dict': {
-            'id': '36782183',
-            'ext': 'mp4',
+        "url": "https://live.acfun.cn/live/36782183",
+        "only_matching": True,
+        "info_dict": {
+            "id": "36782183",
+            "ext": "mp4",
             # 'title': '看见兔兔就烦！',
-            'is_live': True,
-        } 
+            "is_live": True,
+        },
     }
 
     def _real_extract(self, url):
         live_id = self._match_id(url)
-        self._FAKE_HEADERS.update({
-            'Referer': url
-        })
+        self._FAKE_HEADERS.update({"Referer": url})
 
         # Firstly get _did cookie
         fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
         first_res = compat_urllib_request.urlopen(fisrt_req)
 
         for header_name, header_value in first_res.info().items():
-            if header_name.lower() == 'set-cookie':
+            if header_name.lower() == "set-cookie":
                 cookies = header_value
         if not cookies:
-            raise ExtractorError('Fail to fetch cookies')
+            raise ExtractorError("Fail to fetch cookies")
 
-        cookies_dict = dict(c.strip(' ,').split('=', 1) for c in cookies.split(';'))
-        did_cookie = cookies_dict['_did']        
+        cookies_dict = dict(c.strip(" ,").split("=", 1) for c in cookies.split(";"))
+        did_cookie = cookies_dict["_did"]
 
-        self._FAKE_HEADERS.update({
-            'Cookie': '_did=%s' % did_cookie
-        })        
+        self._FAKE_HEADERS.update({"Cookie": "_did=%s" % did_cookie})
 
         # Login to get userId and acfun.api.visitor_st
-        login_data = compat_urllib_parse_urlencode({'sid': 'acfun.api.visitor'}).encode('ascii')
+        login_data = compat_urllib_parse_urlencode({"sid": "acfun.api.visitor"}).encode(
+            "ascii"
+        )
         login_json = self._download_json(
-            'https://id.app.acfun.cn/rest/app/visitor/login', 
-            live_id, 
-            data=login_data, 
-            headers=self._FAKE_HEADERS)
+            "https://id.app.acfun.cn/rest/app/visitor/login",
+            live_id,
+            data=login_data,
+            headers=self._FAKE_HEADERS,
+        )
 
-        streams_url = "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s" % (
-            login_json['userId'], 
-            did_cookie, login_json['acfun.api.visitor_st'])
+        streams_url = (
+            "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s"
+            % (login_json["userId"], did_cookie, login_json["acfun.api.visitor_st"])
+        )
 
         # Fetch stream lists
-        fetch_streams_data = compat_urllib_parse_urlencode({
-            'authorId':  int_or_none(live_id),
-            'pullStreamType': 'FLV'    
-        }).encode('ascii')
+        fetch_streams_data = compat_urllib_parse_urlencode(
+            {"authorId": int_or_none(live_id), "pullStreamType": "FLV"}
+        ).encode("ascii")
 
         streams_json = self._download_json(
-            streams_url, 
-            live_id,  
-            data=fetch_streams_data, 
-            headers=self._FAKE_HEADERS)
+            streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS
+        )
 
         # print(streams_json)
-        title = streams_json['data']['caption']
-        streams_info = json.loads(streams_json['data']['videoPlayRes']) # streams info
-        representation = streams_info['liveAdaptiveManifest'][0]['adaptationSet']['representation']
-        
+        title = streams_json["data"]["caption"]
+        streams_info = json.loads(streams_json["data"]["videoPlayRes"])  # streams info
+        representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][
+            "representation"
+        ]
+
         formats = []
         for stream in representation:
-            quality = stream["qualityType"]
-            formats += [{
-                'url': stream["url"],
-                'ext': 'mp4',
-                'tbr': stream.get('bitrate'),
-            }]  
+            formats += [
+                {
+                    "url": stream["url"],
+                    "ext": "mp4",
+                    "tbr": stream.get("bitrate"),
+                }
+            ]
         self._sort_formats(formats)
         return {
-            'id': live_id,
-            'title': self._live_title(title),
-            'formats': formats,
-            'is_live': True
-        }
\ No newline at end of file
+            "id": live_id,
+            "title": self._live_title(title),
+            "formats": formats,
+            "is_live": True,
+        }

From 1758030b08c6f671c26a79778022cda511b144dd Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Mon, 5 Oct 2020 18:42:49 +0800
Subject: [PATCH 3/7] [acfun] Add error prompt

---
 youtube_dl/extractor/acfun.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
index bf4945ecc..fab83a4e3 100644
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@@ -101,7 +101,7 @@ class AcfunIE(BasicAcfunInfoExtractor):
 
         currentVideoInfo = json_data.get("currentVideoInfo")
         durationMillis = currentVideoInfo.get("durationMillis")
-        duration = durationMillis / 1000
+        duration = float_or_none(durationMillis) / 1000.0
 
         formats = self._extract_formats(currentVideoInfo)
         return {
@@ -146,7 +146,7 @@ class AcfunBangumiIE(BasicAcfunInfoExtractor):
 
         currentVideoInfo = json_data.get("currentVideoInfo")
         durationMillis = currentVideoInfo.get("durationMillis")
-        duration = durationMillis / 1000
+        duration = float_or_none(durationMillis) / 1000.0
 
         formats = self._extract_formats(currentVideoInfo)
         return {
@@ -214,7 +214,11 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
             streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS
         )
 
-        # print(streams_json)
+        try:
+            assert "data" in streams_json
+        except AssertionError:
+            raise ExtractorError("This live room is currently closed")
+
         title = streams_json["data"]["caption"]
         streams_info = json.loads(streams_json["data"]["videoPlayRes"])  # streams info
         representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][

From 345bd3b0264ad82f5a91c3d027bb49b6c1d6f7ee Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Wed, 7 Oct 2020 02:28:01 +0800
Subject: [PATCH 4/7] [acfun] Add playlist support for normal video

---
 youtube_dl/extractor/acfun.py | 65 ++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
index fab83a4e3..004898990 100644
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import json
+import re
 
 from .common import InfoExtractor
 from ..compat import (
@@ -51,9 +52,10 @@ class BasicAcfunInfoExtractor(InfoExtractor):
 
 
 class AcfunIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)"
+    _VALID_URL = r"https?://www\.acfun\.cn/v/ac(?P<id>\d+)(?P<page_id>[_\d]+)?"
     _TESTS = [
         {
+            "note": "single video without playlist",
             "url": "https://www.acfun.cn/v/ac18184362",
             "info_dict": {
                 "id": "18184362",
@@ -65,9 +67,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
             },
         },
         {
+            "note": "single video in playlist",
             "url": "https://www.acfun.cn/v/ac17532274_3",
             "info_dict": {
-                "id": "17532274_3",
+                "id": "17532274",
                 "ext": "mp4",
                 "duration": 233.770,
                 "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵",
@@ -75,30 +78,68 @@ class AcfunIE(BasicAcfunInfoExtractor):
                 "uploader_id": 23682490,
             },
         },
+        {
+            "note": "multiple video with playlist",
+            "url": "https://www.acfun.cn/v/ac17532274",
+            "info_dict": {
+                "id": "17532274",
+                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！",
+                "uploader": "AC娘本体",
+                "uploader_id": 23682490,
+            },
+            "playlist_count": 5
+        }
     ]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        video_id, page_id = re.match(self._VALID_URL, url).groups()
+
         webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
 
         json_text = self._html_search_regex(
             r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
         )
-        json_data = json.loads(json_text)
+        json_data = json.loads(json_text)   
 
         title = json_data["title"]
+
+        uploader = str_or_none(json_data.get("user").get("name"))
+        uploader_id = str_to_int(json_data.get("user").get("id"))       
+
+        videoList = json_data.get('videoList')
+        if videoList:
+            video_num = len(videoList)
+        
+        if not page_id and video_num and video_num > 1:
+            if not self._downloader.params.get('noplaylist'):
+                self.to_screen('Downloading all pages %s - add --no-playlist to just download video' % video_id)
+                entries = [self.url_result( 
+                    '%s_%d' % (url, pid), 
+                    self.IE_NAME, 
+                    video_id='%s_%d' % (video_id, pid)) 
+                    for pid in range(1, video_num+1)]
+                playlist = self.playlist_result(entries, video_id, title)
+                playlist.update({
+                    'uploader': uploader,
+                    'uploader_id': uploader_id,
+                })
+                return playlist
+                          
+            self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 
+
         p_title = self._html_search_regex(
             r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
             webpage,
             "p_title",
             default=None,
-        )
+        )        
+
         if p_title:
-            title = "%s - %s" % (title, p_title)
-
-        uploader = json_data.get("user").get("name")
-        uploader_id = json_data.get("user").get("id")
+            title = "%s-%s" % (title, p_title)     
 
+        if page_id:
+            video_id += page_id             
+            
         currentVideoInfo = json_data.get("currentVideoInfo")
         durationMillis = currentVideoInfo.get("durationMillis")
         duration = float_or_none(durationMillis) / 1000.0
@@ -106,10 +147,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
         formats = self._extract_formats(currentVideoInfo)
         return {
             "id": video_id,
-            "uploader_id": str_to_int(uploader_id),
+            "uploader_id": uploader_id,
             "title": title,
-            "uploader": str_or_none(uploader),
-            "duration": float_or_none(duration),
+            "uploader": uploader,
+            "duration": duration,
             "formats": formats,
         }
 

From d96564c350e56eecc35feefa371ba781dd0793db Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Wed, 7 Oct 2020 02:39:21 +0800
Subject: [PATCH 5/7] [acfun] Fix test cases

---
 youtube_dl/extractor/acfun.py | 60 ++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
index 004898990..5e97f7603 100644
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@@ -70,10 +70,10 @@ class AcfunIE(BasicAcfunInfoExtractor):
             "note": "single video in playlist",
             "url": "https://www.acfun.cn/v/ac17532274_3",
             "info_dict": {
-                "id": "17532274",
+                "id": "17532274_3",
                 "ext": "mp4",
                 "duration": 233.770,
-                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！ - TRAP 阿婵",
+                "title": "【AC娘x竾颜音】【周六狂欢24小时】TRAP：七夕恋歌！落入本娘爱的陷阱！-TRAP 阿婵",
                 "uploader": "AC娘本体",
                 "uploader_id": 23682490,
             },
@@ -87,8 +87,8 @@ class AcfunIE(BasicAcfunInfoExtractor):
                 "uploader": "AC娘本体",
                 "uploader_id": 23682490,
             },
-            "playlist_count": 5
-        }
+            "playlist_count": 5,
+        },
     ]
 
     def _real_extract(self, url):
@@ -99,47 +99,57 @@ class AcfunIE(BasicAcfunInfoExtractor):
         json_text = self._html_search_regex(
             r"(?s)videoInfo\s*=\s*(\{.*?\});", webpage, "json_text"
         )
-        json_data = json.loads(json_text)   
+        json_data = json.loads(json_text)
 
         title = json_data["title"]
 
         uploader = str_or_none(json_data.get("user").get("name"))
-        uploader_id = str_to_int(json_data.get("user").get("id"))       
+        uploader_id = str_to_int(json_data.get("user").get("id"))
 
-        videoList = json_data.get('videoList')
+        videoList = json_data.get("videoList")
         if videoList:
             video_num = len(videoList)
-        
+
         if not page_id and video_num and video_num > 1:
-            if not self._downloader.params.get('noplaylist'):
-                self.to_screen('Downloading all pages %s - add --no-playlist to just download video' % video_id)
-                entries = [self.url_result( 
-                    '%s_%d' % (url, pid), 
-                    self.IE_NAME, 
-                    video_id='%s_%d' % (video_id, pid)) 
-                    for pid in range(1, video_num+1)]
+            if not self._downloader.params.get("noplaylist"):
+                self.to_screen(
+                    "Downloading all pages %s - add --no-playlist to just download video"
+                    % video_id
+                )
+                entries = [
+                    self.url_result(
+                        "%s_%d" % (url, pid),
+                        self.IE_NAME,
+                        video_id="%s_%d" % (video_id, pid),
+                    )
+                    for pid in range(1, video_num + 1)
+                ]
                 playlist = self.playlist_result(entries, video_id, title)
-                playlist.update({
-                    'uploader': uploader,
-                    'uploader_id': uploader_id,
-                })
+                playlist.update(
+                    {
+                        "uploader": uploader,
+                        "uploader_id": uploader_id,
+                    }
+                )
                 return playlist
-                          
-            self.to_screen('Downloading just video %s because of --no-playlist' % video_id) 
+
+            self.to_screen(
+                "Downloading just video %s because of --no-playlist" % video_id
+            )
 
         p_title = self._html_search_regex(
             r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>",
             webpage,
             "p_title",
             default=None,
-        )        
+        )
 
         if p_title:
-            title = "%s-%s" % (title, p_title)     
+            title = "%s-%s" % (title, p_title)
 
         if page_id:
-            video_id += page_id             
-            
+            video_id += page_id
+
         currentVideoInfo = json_data.get("currentVideoInfo")
         durationMillis = currentVideoInfo.get("durationMillis")
         duration = float_or_none(durationMillis) / 1000.0

From 165ff0aab2a511fdfad8ed2102e124bb3b0b2049 Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Wed, 7 Oct 2020 14:38:35 +0800
Subject: [PATCH 6/7] [acfun] Add playlist support for bangumi

---
 youtube_dl/extractor/acfun.py | 155 +++++++++++++++++++++++-----------
 1 file changed, 105 insertions(+), 50 deletions(-)

diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
index 5e97f7603..7be5db967 100644
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@@ -1,20 +1,18 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import time
 import json
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse_urlencode,
-    compat_urllib_request,
-)
 from ..utils import (
     int_or_none,
     float_or_none,
     str_or_none,
     str_to_int,
     sanitized_Request,
+    urlencode_postdata,
     ExtractorError,
 )
 
@@ -36,7 +34,8 @@ class BasicAcfunInfoExtractor(InfoExtractor):
 
         formats = []
         for stream in representation:
-            size = float_or_none(durationMillis) * stream["avgBitrate"] / 8
+            avgByterate = float_or_none(stream.get("avgBitrate"), 8)
+            size = float_or_none(durationMillis, invscale=avgByterate)
             formats += [
                 {
                     "url": stream["url"],
@@ -79,7 +78,7 @@ class AcfunIE(BasicAcfunInfoExtractor):
             },
         },
         {
-            "note": "multiple video with playlist",
+            "note": "multiple video within playlist",
             "url": "https://www.acfun.cn/v/ac17532274",
             "info_dict": {
                 "id": "17532274",
@@ -113,8 +112,8 @@ class AcfunIE(BasicAcfunInfoExtractor):
         if not page_id and video_num and video_num > 1:
             if not self._downloader.params.get("noplaylist"):
                 self.to_screen(
-                    "Downloading all pages %s - add --no-playlist to just download video"
-                    % video_id
+                    "Downloading all pages of %s(ac%s) - add --no-playlist to just download video"
+                    % (title, video_id)
                 )
                 entries = [
                     self.url_result(
@@ -134,7 +133,8 @@ class AcfunIE(BasicAcfunInfoExtractor):
                 return playlist
 
             self.to_screen(
-                "Downloading just video %s because of --no-playlist" % video_id
+                "Downloading just video %s(ac%s) because of --no-playlist"
+                % (title, video_id)
             )
 
         p_title = self._html_search_regex(
@@ -152,7 +152,7 @@ class AcfunIE(BasicAcfunInfoExtractor):
 
         currentVideoInfo = json_data.get("currentVideoInfo")
         durationMillis = currentVideoInfo.get("durationMillis")
-        duration = float_or_none(durationMillis) / 1000.0
+        duration = float_or_none(durationMillis, 1000)
 
         formats = self._extract_formats(currentVideoInfo)
         return {
@@ -166,44 +166,93 @@ class AcfunIE(BasicAcfunInfoExtractor):
 
 
 class AcfunBangumiIE(BasicAcfunInfoExtractor):
-    _VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)"
-    _TEST = {
-        "url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679",
-        "info_dict": {
-            "id": "6002917_36188_1748679",
-            "ext": "mp4",
-            "duration": 1437.076,
-            "title": "租借女友 第12话 告白和女友",
+    _VALID_URL = r"https?://www\.acfun\.cn/bangumi/aa(?P<id>\d+)(?P<episode_id>[_\d]+)?"
+    _TESTS = [
+        {
+            "note": "single episode",
+            "url": "https://www.acfun.cn/bangumi/aa6002917_36188_1748679",
+            "info_dict": {
+                "id": "6002917_36188_1748679",
+                "ext": "mp4",
+                "duration": 1437.076,
+                "title": "租借女友 第12话 告白和女友",
+            },
         },
-    }
+        {
+            "note": "all episodes of bangumi",
+            "url": "https://www.acfun.cn/bangumi/aa6002917",
+            "info_dict": {
+                "id": "6002917",
+                "title": "租借女友",
+            },
+            "playlist_count": 12,
+        },
+    ]
+
+    _TEMPLATE_URL = "https://www.acfun.cn/bangumi/aa%s%s"
+    _FETCH_EPISODES_URL = "https://www.acfun.cn/bangumi/aa%s?pagelets=pagelet_partlist&reqID=0&ajaxpipe=1&t=%d"
+
+    def _all_episodes(self, bangumi_id):
+        timestamp = int_or_none(float_or_none(time.time(), invscale=1000))
+        print("Timestamp: ", timestamp)
+        webpage = self._download_webpage(
+            self._FETCH_EPISODES_URL % (bangumi_id, timestamp),
+            bangumi_id,
+            headers=self._FAKE_HEADERS,
+        )
+        entries = [
+            self.url_result(self._TEMPLATE_URL % (bangumi_id, eid), self.IE_NAME, eid)
+            for eid in re.findall(
+                r"data-href=./bangumi/aa%s([_\d]+)." % bangumi_id, webpage
+            )
+        ]
+        return entries
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
+        bangumi_id, episode_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, bangumi_id, headers=self._FAKE_HEADERS)
 
         json_text = self._html_search_regex(
             r"(?s)bangumiData\s*=\s*(\{.*?\});", webpage, "json_text"
         )
         json_data = json.loads(json_text)
 
-        title = (
-            json_data.get("showTitle")
-            or json_data["bangumiTitle"]
-            + " "
-            + json_data["episodeName"]
-            + " "
-            + json_data["title"]
+        bangumiTitle = json_data["bangumiTitle"]
+
+        if not episode_id:
+            if not self._downloader.params.get("noplaylist"):
+                self.to_screen(
+                    "Downloading all episodes of %s(aa%s) - add --no-playlist to just download first episode"
+                    % (bangumiTitle, bangumi_id)
+                )
+                playlist = self.playlist_result(
+                    self._all_episodes(bangumi_id), bangumi_id, bangumiTitle
+                )
+                return playlist
+
+            self.to_screen(
+                "Downloading just first episode %s(aa%s) because of --no-playlist"
+                % (bangumiTitle, bangumi_id)
+            )
+
+        title = json_data.get("showTitle") or "%s %s %s" % (
+            json_data["bangumiTitle"],
+            json_data["episodeName"],
+            json_data["title"],
         )
 
         currentVideoInfo = json_data.get("currentVideoInfo")
         durationMillis = currentVideoInfo.get("durationMillis")
-        duration = float_or_none(durationMillis) / 1000.0
+        duration = float_or_none(durationMillis, 1000)
+
+        if episode_id:
+            bangumi_id += episode_id
 
         formats = self._extract_formats(currentVideoInfo)
         return {
-            "id": video_id,
+            "id": bangumi_id,
             "title": title,
-            "duration": float_or_none(duration),
+            "duration": duration,
             "formats": formats,
         }
 
@@ -211,23 +260,31 @@ class AcfunBangumiIE(BasicAcfunInfoExtractor):
 class AcfunLiveIE(BasicAcfunInfoExtractor):
     _VALID_URL = r"https?://live\.acfun\.cn/live/(?P<id>\d+)"
     _TEST = {
-        "url": "https://live.acfun.cn/live/36782183",
-        "only_matching": True,
+        "url": "https://live.acfun.cn/live/34195163",
         "info_dict": {
-            "id": "36782183",
+            "id": "34195163",
             "ext": "mp4",
-            # 'title': '看见兔兔就烦！',
+            "title": r"re:^晴心Haruko \d{4}-\d{2}-\d{2} \d{2}:\d{2}$",
             "is_live": True,
         },
+        "only_matching": True,
     }
 
+    _LOGIN_URL = "https://id.app.acfun.cn/rest/app/visitor/login"
+    _STREAMS_URL = "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s"
+
     def _real_extract(self, url):
         live_id = self._match_id(url)
         self._FAKE_HEADERS.update({"Referer": url})
 
-        # Firstly get _did cookie
-        fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
-        first_res = compat_urllib_request.urlopen(fisrt_req)
+        # Firstly fetch _did cookie and streamer name(use for title)
+        first_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
+        webpage, first_res = self._download_webpage_handle(first_req, live_id)
+        live_up_name = self._html_search_regex(
+            r"<a [^>]*?class[^>]*?up-name[^>]*?>([^<]*?)</a>",
+            webpage,
+            "live_up_name",
+        )
 
         for header_name, header_value in first_res.info().items():
             if header_name.lower() == "set-cookie":
@@ -241,25 +298,24 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
         self._FAKE_HEADERS.update({"Cookie": "_did=%s" % did_cookie})
 
         # Login to get userId and acfun.api.visitor_st
-        login_data = compat_urllib_parse_urlencode({"sid": "acfun.api.visitor"}).encode(
-            "ascii"
-        )
+        login_data = urlencode_postdata({"sid": "acfun.api.visitor"})
         login_json = self._download_json(
-            "https://id.app.acfun.cn/rest/app/visitor/login",
+            self._LOGIN_URL,
             live_id,
             data=login_data,
             headers=self._FAKE_HEADERS,
         )
 
-        streams_url = (
-            "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s"
-            % (login_json["userId"], did_cookie, login_json["acfun.api.visitor_st"])
+        streams_url = self._STREAMS_URL % (
+            login_json["userId"],
+            did_cookie,
+            login_json["acfun.api.visitor_st"],
         )
 
         # Fetch stream lists
-        fetch_streams_data = compat_urllib_parse_urlencode(
+        fetch_streams_data = urlencode_postdata(
             {"authorId": int_or_none(live_id), "pullStreamType": "FLV"}
-        ).encode("ascii")
+        )
 
         streams_json = self._download_json(
             streams_url, live_id, data=fetch_streams_data, headers=self._FAKE_HEADERS
@@ -270,8 +326,7 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
         except AssertionError:
             raise ExtractorError("This live room is currently closed")
 
-        title = streams_json["data"]["caption"]
-        streams_info = json.loads(streams_json["data"]["videoPlayRes"])  # streams info
+        streams_info = json.loads(streams_json["data"]["videoPlayRes"])
         representation = streams_info["liveAdaptiveManifest"][0]["adaptationSet"][
             "representation"
         ]
@@ -288,7 +343,7 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
         self._sort_formats(formats)
         return {
             "id": live_id,
-            "title": self._live_title(title),
+            "title": self._live_title(live_up_name),
             "formats": formats,
             "is_live": True,
         }

From c21015f533b5282ac6d1cc9f521b7854792216bf Mon Sep 17 00:00:00 2001
From: johnsmith2077 <johnsmith2077@foxmail.com>
Date: Wed, 7 Oct 2020 18:06:38 +0800
Subject: [PATCH 7/7] [acfun] use hls instead of flv for live stream to avoid
 broken video

---
 youtube_dl/extractor/acfun.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/acfun.py b/youtube_dl/extractor/acfun.py
index 7be5db967..76c31ede0 100644
--- a/youtube_dl/extractor/acfun.py
+++ b/youtube_dl/extractor/acfun.py
@@ -194,7 +194,6 @@ class AcfunBangumiIE(BasicAcfunInfoExtractor):
 
     def _all_episodes(self, bangumi_id):
         timestamp = int_or_none(float_or_none(time.time(), invscale=1000))
-        print("Timestamp: ", timestamp)
         webpage = self._download_webpage(
             self._FETCH_EPISODES_URL % (bangumi_id, timestamp),
             bangumi_id,
@@ -333,9 +332,15 @@ class AcfunLiveIE(BasicAcfunInfoExtractor):
 
         formats = []
         for stream in representation:
+            # use hls instead of flv to fix video broken problem when stopped
+            i = stream["url"].index("flv?")
+            u3m8_url = (
+                stream["url"][0:i].replace("pull.etoote.com", "hlspull.etoote.com")
+                + "m3u8"
+            )
             formats += [
                 {
-                    "url": stream["url"],
+                    "url": u3m8_url,
                     "ext": "mp4",
                     "tbr": stream.get("bitrate"),
                 }