From ea0a8811577cbbe8807ad0560795332b11381839 Mon Sep 17 00:00:00 2001
From: Yarn <Yarn@users.noreply.github.com>
Date: Tue, 21 Apr 2020 15:21:17 -0700
Subject: [PATCH 1/4] Update twitcasting extractor and add twitcasting history
 extractor

---
 youtube_dl/extractor/extractors.py  |   2 +-
 youtube_dl/extractor/twitcasting.py | 101 +++++++++++++++++++++++++---
 2 files changed, 92 insertions(+), 11 deletions(-)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index ef803b8a7..8f2fac787 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1214,7 +1214,7 @@ from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
 from .twentythreevideo import TwentyThreeVideoIE
-from .twitcasting import TwitCastingIE
+from .twitcasting import TwitCastingIE, TwitCastingHistoryIE
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py
index 2dbe89f5b..d0bed5b0a 100644
--- a/youtube_dl/extractor/twitcasting.py
+++ b/youtube_dl/extractor/twitcasting.py
@@ -1,11 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+import itertools
+
 from .common import InfoExtractor
 from ..utils import urlencode_postdata
 
-import re
-
 
 class TwitCastingIE(InfoExtractor):
     _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
@@ -56,15 +57,25 @@ class TwitCastingIE(InfoExtractor):
             r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
             webpage, 'title', default=None) or self._html_search_meta(
             'twitter:title', webpage, fatal=True)
+        # title is split across lines with lots of whitespace
+        title = title.replace('\n', ' ')
+        while '  ' in title:
+            title = title.replace('  ', ' ')
 
-        m3u8_url = self._search_regex(
-            (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-             r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
-            webpage, 'm3u8 url', group='url')
-
-        formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
+        # m3u8_url = self._search_regex(
+        #     (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+        #      r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
+        #     webpage, 'm3u8 url', group='url')
+        # m3u8_url = m3u8_url.replace('\\/', '/')
+        # formats = self._extract_m3u8_formats(
+        #     m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+        #     m3u8_id='hls')
+        formats = [
+            {
+                'url': "http://dl01.twitcasting.tv/{uploader_id}/download/{video_id}?dl=1".format(uploader_id=uploader_id, video_id=video_id),
+                'ext': 'mp4',
+            }
+        ]
 
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._og_search_description(
@@ -79,3 +90,73 @@ class TwitCastingIE(InfoExtractor):
             'uploader_id': uploader_id,
             'formats': formats,
         }
+
+
+class TwitCastingHistoryIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/show'
+    _TESTS = [
+        {
+            'url': 'https://twitcasting.tv/mttbernardini/show/',
+            'info_dict': {
+                'title': 'Matteo Bernardini',
+                'id': 'mttbernardini',
+            },
+            'playlist_count': 1,
+        },
+    ]
+
+    def _get_meta_and_entries(self, url):
+        for page_num in itertools.count(0):
+            page_url = "{}/{}".format(url.rstrip('/'), page_num)
+            pagenum = None
+            list_id = None
+            webpage = self._download_webpage(
+                page_url, list_id,
+                'Downloading page %s' % pagenum)
+
+            if page_num == 0:
+                # title = re.search(r'<span class="tw-user-nav-name">(.*)</span>', webpage)
+                title = re.search(r'(?s)<[^>]+class=["\']tw-user-nav-name[^>]+>(.+?)</', webpage)
+                title = title.group(1).strip()
+                user_id = re.search(r'data-user-id="(.*)"', webpage)
+                user_id = user_id.group(1).strip()
+                yield (title, user_id)
+
+            first_page_selected = webpage.find('class="selected">1</a>') != -1
+            if page_num != 0 and first_page_selected:
+                break
+
+            matches = re.finditer(r'''<a[^>]+class=["']tw-movie-thumbnail["'][^>]+href="(.+)"[^>]+>((?:\n|.)*?)</a>''', webpage)
+            matches = list(matches)
+
+            for match in matches:
+                href = match.group(1)
+                inner = match.group(2)
+                # if REC isn't present either a live broadcast or an image
+                # e.g. https://twitcasting.tv/marrynontan/movie/506296434
+                if 'REC' not in inner:
+                    continue
+
+                # skip videos that require a password
+                # e.g. https://twitcasting.tv/mttbernardini/movie/3689740
+                locked = re.search(r'''src="/img/locked.png"''', inner)
+                if locked is not None:
+                    continue
+
+                title = re.search(r'''<[^>]+class=["']tw-movie-thumbnail-title[^>]+>[ \n]*?(.+?) *?</''', inner)
+                if title is not None:
+                    title = title.group(1).strip()
+
+                video_url = 'https://twitcasting.tv{}'.format(href)
+                video_id = href.split('/')[-1]
+                result = self.url_result(video_url, ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)
+                yield result
+
+    def _real_extract(self, url):
+        entries = self._get_meta_and_entries(url)
+
+        (title, user_id) = next(entries)
+
+        result = self.playlist_result(entries, playlist_title=title, playlist_id=user_id)
+
+        return result

From 18a84b26e03b8985c39c5257def6c0b7b8990e28 Mon Sep 17 00:00:00 2001
From: Yarn <yarnnd@gmail.com>
Date: Fri, 18 Sep 2020 02:12:20 -0700
Subject: [PATCH 2/4] Update twitcasting extractor

---
 youtube_dl/extractor/twitcasting.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py
index d0bed5b0a..fc1dd0826 100644
--- a/youtube_dl/extractor/twitcasting.py
+++ b/youtube_dl/extractor/twitcasting.py
@@ -62,20 +62,14 @@ class TwitCastingIE(InfoExtractor):
         while '  ' in title:
             title = title.replace('  ', ' ')
 
-        # m3u8_url = self._search_regex(
-        #     (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-        #      r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
-        #     webpage, 'm3u8 url', group='url')
-        # m3u8_url = m3u8_url.replace('\\/', '/')
-        # formats = self._extract_m3u8_formats(
-        #     m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
-        #     m3u8_id='hls')
-        formats = [
-            {
-                'url': "http://dl01.twitcasting.tv/{uploader_id}/download/{video_id}?dl=1".format(uploader_id=uploader_id, video_id=video_id),
-                'ext': 'mp4',
-            }
-        ]
+        m3u8_url = self._search_regex(
+            (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+             r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
+            webpage, 'm3u8 url', group='url')
+        m3u8_url = m3u8_url.replace('\\/', '/')
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
 
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._og_search_description(

From 754d90c9a2c000e2f1bc6a89d78319fd77257f1f Mon Sep 17 00:00:00 2001
From: Yarn <yarnnd@gmail.com>
Date: Sat, 19 Sep 2020 03:24:10 -0700
Subject: [PATCH 3/4] Resolve issues with twitcasting extractor.

---
 youtube_dl/extractor/twitcasting.py | 37 +++++++++++++++++++----------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py
index fc1dd0826..23b8542b7 100644
--- a/youtube_dl/extractor/twitcasting.py
+++ b/youtube_dl/extractor/twitcasting.py
@@ -53,10 +53,10 @@ class TwitCastingIE(InfoExtractor):
             })
         webpage = self._download_webpage(url, video_id, data=request_data)
 
-        title = self._html_search_regex(
+        title = (self._html_search_regex(
             r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
-            webpage, 'title', default=None) or self._html_search_meta(
-            'twitter:title', webpage, fatal=True)
+            webpage, 'title', default=None)
+            or self._html_search_meta('twitter:title', webpage, fatal=True))
         # title is split across lines with lots of whitespace
         title = title.replace('\n', ' ')
         while '  ' in title:
@@ -101,7 +101,7 @@ class TwitCastingHistoryIE(InfoExtractor):
 
     def _get_meta_and_entries(self, url):
         for page_num in itertools.count(0):
-            page_url = "{}/{}".format(url.rstrip('/'), page_num)
+            page_url = "%s/%s" % (url.rstrip('/'), page_num)
             pagenum = None
             list_id = None
             webpage = self._download_webpage(
@@ -109,11 +109,19 @@ class TwitCastingHistoryIE(InfoExtractor):
                 'Downloading page %s' % pagenum)
 
             if page_num == 0:
-                # title = re.search(r'<span class="tw-user-nav-name">(.*)</span>', webpage)
-                title = re.search(r'(?s)<[^>]+class=["\']tw-user-nav-name[^>]+>(.+?)</', webpage)
-                title = title.group(1).strip()
-                user_id = re.search(r'data-user-id="(.*)"', webpage)
-                user_id = user_id.group(1).strip()
+                title = self._search_regex(
+                    r'(?s)<[^>]+class=["\']tw-user-nav-name[^>]+>(.+?)</',
+                    webpage, 'playlist_title', fatal=False)
+
+                if title is not None:
+                    title = title.strip()
+
+                user_id = self._search_regex(
+                    r'data-user-id=["\'](.+?)["\']',
+                    webpage, 'user_id', fatal=False)
+                if user_id is not None:
+                    user_id = user_id.strip()
+
                 yield (title, user_id)
 
             first_page_selected = webpage.find('class="selected">1</a>') != -1
@@ -137,13 +145,16 @@ class TwitCastingHistoryIE(InfoExtractor):
                 if locked is not None:
                     continue
 
-                title = re.search(r'''<[^>]+class=["']tw-movie-thumbnail-title[^>]+>[ \n]*?(.+?) *?</''', inner)
+                title = self._search_regex(
+                    r'''<[^>]+class=["']tw-movie-thumbnail-title[^>]+>[ \n]*?(.+?) *?</''',
+                    inner, 'title', fatal=False)
                 if title is not None:
-                    title = title.group(1).strip()
+                    title = title.strip()
 
-                video_url = 'https://twitcasting.tv{}'.format(href)
+                video_url = 'https://twitcasting.tv%s' % href
                 video_id = href.split('/')[-1]
-                result = self.url_result(video_url, ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)
+                result = self.url_result(video_url,
+                    ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)
                 yield result
 
     def _real_extract(self, url):

From 9652269ff09b4ac65ae29c2e7eb48766cd4a3b71 Mon Sep 17 00:00:00 2001
From: Yarn <yarnnd@gmail.com>
Date: Sat, 19 Sep 2020 04:29:06 -0700
Subject: [PATCH 4/4] satisfy flake8

---
 youtube_dl/extractor/twitcasting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py
index 23b8542b7..6431e58fb 100644
--- a/youtube_dl/extractor/twitcasting.py
+++ b/youtube_dl/extractor/twitcasting.py
@@ -154,7 +154,7 @@ class TwitCastingHistoryIE(InfoExtractor):
                 video_url = 'https://twitcasting.tv%s' % href
                 video_id = href.split('/')[-1]
                 result = self.url_result(video_url,
-                    ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)
+                                         ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)
                 yield result
 
     def _real_extract(self, url):