From 8361e7f93489f226542517216b2127ff170ca996 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 12 Jun 2019 21:41:46 +0100 Subject: [PATCH 01/28] [toutv] update client key(closes #21370) --- youtube_dl/extractor/toutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 00f58a087..44b022fca 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -38,7 +38,7 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] - _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' + _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4' def _real_initialize(self): email, password = self._get_login_info() From 28cc2241e44ff0c0704cfffaca6d47d377041aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:56:17 +0700 Subject: [PATCH 02/28] [utils] Restrict parse_codecs and add theora as known vcodec (#21381) --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 11 +++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 71980b3fc..659c6ece5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -822,6 +822,15 @@ class TestUtil(unittest.TestCase): 'vcodec': 'av01.0.05M.08', 'acodec': 'none', }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ead9bd862..a1f586b80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2529,7 +2529,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): @@ -2540,13 +2540,8 @@ def parse_codecs(codecs_str): if not vcodec and not acodec: if len(splited_codecs) == 2: return { - 'vcodec': vcodec, - 'acodec': acodec, - } - elif len(splited_codecs) == 1: - return { - 'vcodec': 'none', - 'acodec': vcodec, + 'vcodec': splited_codecs[0], + 'acodec': splited_codecs[1], } else: return { From b85eae0f057a0afdf1da9d6034c19327c8de33cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:59:05 +0700 Subject: [PATCH 03/28] [youtube] Hardcode codec metadata for av01 video only formats (closes #21381) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 963c73a2d..7b630b191 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -500,6 +500,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') From 7c24a58bdb60af80137beac85c8804c70194a455 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 16 Jun 2019 06:32:17 +0100 Subject: [PATCH 04/28] [sixplay] add support for rtlmost.hu(#21405) --- youtube_dl/extractor/sixplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 2a72af11b..7ec66ecf3 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -19,7 +19,7 @@ from ..utils import ( class SixPlayIE(InfoExtractor): IE_NAME = '6play' - _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P[0-9]+)' + _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P[0-9]+)' _TESTS = [{ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', @@ -35,6 +35,9 @@ class SixPlayIE(InfoExtractor): }, { 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', 'only_matching': True, + }, { + 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', + 'only_matching': True, }] def _real_extract(self, url): @@ -43,6 +46,7 @@ class SixPlayIE(InfoExtractor): '6play.fr': ('6play', 'm6web'), 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), + 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), }.get(domain, ('6play', 'm6web')) data = self._download_json( From c40714cdee0ce3de1a5f6e17a61d3ee4c610ae63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 20 Jun 2019 00:57:58 +0700 Subject: [PATCH 05/28] [youtube] Make --write-annotations non fatal (closes #21452) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7b630b191..1010c8616 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1581,8 +1581,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return video_id def _extract_annotations(self, video_id): - url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + return self._download_webpage( + 'https://www.youtube.com/annotations_invideo', video_id, + note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + query={ + 'features': 1, + 'legacy': 1, + 'video_id': video_id, + }) @staticmethod def _extract_chapters(description, duration): From abefc03f517e9208b9d0c35e7e683941a40bb152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 22:58:42 +0700 Subject: [PATCH 06/28] [youtube] Update signature function patterns (closes #21469, closes #21476) --- youtube_dl/extractor/youtube.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1010c8616..83b6ac134 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1312,11 +1312,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + # Obsolete patterns + r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From bc6438c092be6ca63843a349eee1db2b5d398d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:01:09 +0700 Subject: [PATCH 07/28] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c4d485ff1..10394a3b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) + +Extractors +* [youtube] Update signature function patterns (#21469, #21476) +* [youtube] Make --write-annotations non fatal (#21452) ++ [sixplay] Add support for rtlmost.hu (#21405) +* [youtube] Hardcode codec metadata for av01 video only formats (#21381) +* [toutv] Update client key (#21370) ++ [biqle] Add support for new embed domain +* [cbs] Improve DRM protected videos detection (#21339) + + version 2019.06.08 Core From 9842d29d660b1ffe7873823542085879ba9d86a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:04:09 +0700 Subject: [PATCH 08/28] release 2019.06.21 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 104ad598c..7a2b16827 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index f711af040..d6180e672 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index ae865a8b0..7cb981abf 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8246b570e..802fa2313 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 292c2e697..5153864a1 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 10394a3b6..2d9988da3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.21 Core * [utils] Restrict parse_codecs and add theora as known vcodec (#21381) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6aa666bc9..33474a452 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.08' +__version__ = '2019.06.21' From 4681441d2faf54615962029c7240601e339281bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:07:26 +0700 Subject: [PATCH 09/28] [crunchyroll:playlist] Fix and relax title extraction (closes #21291, closes #21443) --- youtube_dl/extractor/crunchyroll.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 588c3c71b..75b56ee42 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -661,9 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): webpage = self._download_webpage( self._add_skip_wall(url), show_id, headers=self.geo_verification_headers()) - title = self._html_search_regex( - r'(?s)]*>\s*(.*?)', - webpage, 'title') + title = self._html_search_meta('name', webpage, default=None) + episode_paths = re.findall( r'(?s)
  • ]+>.*? Date: Sat, 22 Jun 2019 00:15:52 +0700 Subject: [PATCH 10/28] [crunchyroll] Move Accept-Language workaround to video extractor since it causes playlists not to list any videos --- youtube_dl/extractor/crunchyroll.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 75b56ee42..85a9a577f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -103,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor): def _real_initialize(self): self._login() - def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) - else sanitized_Request(url_or_request)) - # Accept-Language must be set explicitly to accept any language to avoid issues - # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. - # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction - # should be imposed or not (from what I can see it just takes the first language - # ignoring the priority and requires it to correspond the IP). By the way this causes - # Crunchyroll to not work in georestriction cases in some browsers that don't place - # the locale lang first in header. However allowing any language seems to workaround the issue. - request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) - @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) @@ -269,6 +256,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): '1080': ('80', '108'), } + def _download_webpage(self, url_or_request, *args, **kwargs): + request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + else sanitized_Request(url_or_request)) + # Accept-Language must be set explicitly to accept any language to avoid issues + # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. + # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction + # should be imposed or not (from what I can see it just takes the first language + # ignoring the priority and requires it to correspond the IP). By the way this causes + # Crunchyroll to not work in georestriction cases in some browsers that don't place + # the locale lang first in header. However allowing any language seems to workaround the issue. + request.add_header('Accept-Language', '*') + return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) + def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(compat_b64decode(data)) iv = bytes_to_intlist(compat_b64decode(iv)) From 9c2aaac2685b34143ed770d5e0c7f3906ab1107d Mon Sep 17 00:00:00 2001 From: Emmanuel Froissart Date: Wed, 12 Jun 2019 13:55:07 +0200 Subject: [PATCH 11/28] [tf1] Fix wat id extraction (closes #21365) --- youtube_dl/extractor/tf1.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 903f47380..091350848 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import js_to_json + class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" @@ -43,12 +45,40 @@ class TF1IE(InfoExtractor): }, { 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html', 'only_matching': True, + }, { + 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html', + 'info_dict': { + 'id': '13641379', + 'ext': 'mp4', + 'title': 'md5:f392bc52245dc5ad43771650c96fb620', + 'description': 'md5:44bc54f0a21322f5b91d68e76a544eae', + 'upload_date': '20190611', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, + }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - webpage, 'wat id', group='id') + vids_data_string = self._html_search_regex( + r'', + webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None + if vids_data_string is not None: + vids_data = self._parse_json( + vids_data_string, video_id, + transform_source=js_to_json) + video_data = [v for v in vids_data.values() + if 'slug' in v and v['slug'] == video_id] + if len(video_data) > 0 and 'streamId' in video_data[0]: + wat_id = video_data[0]['streamId'] + if wat_id is None: + wat_id = self._html_search_regex( + [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' + ], + webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 1c11204056566c2983f0a837897d882581880f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:37:49 +0700 Subject: [PATCH 12/28] [tf1] Improve extraction and fix issues (closes #21372) --- youtube_dl/extractor/tf1.py | 42 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 091350848..55e2a0721 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -2,8 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor - -from ..utils import js_to_json +from ..compat import compat_str class TF1IE(InfoExtractor): @@ -62,23 +61,32 @@ class TF1IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - vids_data_string = self._html_search_regex( - r'', - webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None - if vids_data_string is not None: - vids_data = self._parse_json( - vids_data_string, video_id, - transform_source=js_to_json) - video_data = [v for v in vids_data.values() - if 'slug' in v and v['slug'] == video_id] - if len(video_data) > 0 and 'streamId' in video_data[0]: - wat_id = video_data[0]['streamId'] - if wat_id is None: + + data = self._parse_json( + self._search_regex( + r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|)', webpage, + 'data', default='{}'), video_id, fatal=False) + + if data: + try: + wat_id = next( + video.get('streamId') + for key, video in data.items() + if isinstance(video, dict) + and video.get('slug') == video_id) + if not isinstance(wat_id, compat_str) or not wat_id.isdigit(): + wat_id = None + except StopIteration: + pass + + if not wat_id: wat_id = self._html_search_regex( - [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' - ], + (r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2'), webpage, 'wat id', group='id') + return self.url_result('wat:%s' % wat_id, 'Wat') From 31ce6e996666e7512990da01ef58785933dcb2be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 02:22:07 +0700 Subject: [PATCH 13/28] [youtube] Add another signature function pattern --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 83b6ac134..b570d5bae 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1314,6 +1314,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', From 21b08463a777a79876721e49d3d07a19bc3fe05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 05:34:46 +0700 Subject: [PATCH 14/28] [pornhub] Rework extractors (closes #11922, closes #16078, closes #17454, closes #17936) --- youtube_dl/extractor/pornhub.py | 157 +++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index cb59d526f..72c351d56 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -387,17 +387,81 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): }] -class PornHubUserVideosIE(PornHubPlaylistBaseIE): +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'playlist_mincount': 0, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'playlist_mincount': 0, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserIE, cls).suitable(url)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('id') + return self.url_result( + '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + user_id = mobj.group('id') + + page_url = self._make_page_url(url) + + entries = [] + for page_num in itertools.count(1): + try: + webpage = self._download_webpage( + page_url, user_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + entries.extend(page_entries) + if not self._has_more(webpage): + break + + return self.playlist_result(orderedSet(entries), user_id) + + +class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' _TESTS = [{ - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'info_dict': { - 'id': 'zoe_ph', - }, - 'playlist_mincount': 171, + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 149, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -426,26 +490,69 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, }] - def _real_extract(self, url): + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserVideosIE, cls).suitable(url)) + + def _make_page_url(self, url): + return url + + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }] + + def _make_page_url(self, url): mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - user_id = mobj.group('id') + return '%s/ajax' % mobj.group('url') - entries = [] - for page_num in itertools.count(1): - try: - webpage = self._download_webpage( - url, user_id, 'Downloading page %d' % page_num, - query={'page': page_num}) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - break - raise - page_entries = self._extract_entries(webpage, host) - if not page_entries: - break - entries.extend(page_entries) - - return self.playlist_result(entries, user_id) + @staticmethod + def _has_more(webpage): + return True From 1f7a563ab0efd0745ea66c354255844a9bd36c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 06:01:43 +0700 Subject: [PATCH 15/28] [pornhub] Add support for downloading single pages and search pages (closes #15570) --- youtube_dl/extractor/pornhub.py | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 72c351d56..7de585604 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -409,14 +409,14 @@ class PornHubUserIE(PornHubPlaylistBaseIE): @classmethod def suitable(cls, url): return (False - if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubUserIE, cls).suitable(url)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), video_id=user_id) @@ -426,10 +426,13 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): host = mobj.group('host') user_id = mobj.group('id') + page = int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + page_url = self._make_page_url(url) entries = [] - for page_num in itertools.count(1): + for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( page_url, user_id, 'Downloading page %d' % page_num, @@ -448,10 +451,17 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): return self.playlist_result(orderedSet(entries), user_id) -class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' +class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ + (?: + (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| + video/search + ) + ''' _TESTS = [{ - 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', @@ -462,6 +472,12 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): 'id': 'jenny-blighe', }, 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -484,12 +500,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', - 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', @@ -506,9 +516,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): # Newest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', - 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', 'only_matching': True, @@ -521,7 +528,7 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): def suitable(cls, url): return (False if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserVideosIE, cls).suitable(url)) + else super(PornHubPagedVideosIE, cls).suitable(url)) def _make_page_url(self, url): return url From 9634de178d35c5cd767b183c2be82b14bef84209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 08:37:07 +0700 Subject: [PATCH 16/28] [pornhub] Add support for more paged video sources --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/pornhub.py | 101 +++++++++++++++++------------ 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1ed8a4b2..9cd7d3ac4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,8 +892,9 @@ from .porncom import PornComIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, - PornHubPlaylistIE, - PornHubUserVideosIE, + PornHubUserIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7de585604..11b8cfcf7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -372,23 +372,8 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): entries, playlist_id, title, playlist.get('description')) -class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/playlist/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/4667351', - 'info_dict': { - 'id': '4667351', - 'title': 'Nataly Hot', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://de.pornhub.com/playlist/4667351', - 'only_matching': True, - }] - - class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -400,23 +385,20 @@ class PornHubUserIE(PornHubPlaylistBaseIE): 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/users/russianveet69', - 'playlist_mincount': 0, + 'only_matching': True, }, { 'url': 'https://www.pornhub.com/channels/povd', - 'playlist_mincount': 0, + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return (False - if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserIE, cls).suitable(url)) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) @@ -424,7 +406,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') - user_id = mobj.group('id') + item_id = mobj.group('id') page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) @@ -435,7 +417,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, user_id, 'Downloading page %d' % page_num, + page_url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -448,18 +430,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), user_id) + return self.playlist_result(orderedSet(entries), item_id) -class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ - (?: - (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| - video/search - ) - ''' +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?P(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -469,20 +444,20 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 149, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', 'info_dict': { - 'id': 'povd', + 'id': 'channels/povd/videos', }, 'playlist_mincount': 293, }, { @@ -522,13 +497,55 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': 'playlist/44121572', + }, + 'playlist_mincount': 132, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, }] @classmethod def suitable(cls, url): return (False - if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubPagedVideosIE, cls).suitable(url)) + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) def _make_page_url(self, url): return url From 091c9b43164f6f3b31f5f911c88a4aeaa0358429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jun 2019 02:13:46 +0700 Subject: [PATCH 17/28] [vimeo:likes] Implement extrator in terms of channel extractor This allows to obtain videos' ids before extraction (#21493) --- youtube_dl/extractor/vimeo.py | 50 +++++------------------------------ 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a41178bab..aeee7df8f 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,7 +16,6 @@ from ..utils import ( determine_ext, ExtractorError, js_to_json, - InAdvancePagedList, int_or_none, merge_dicts, NO_DEFAULT, @@ -1065,7 +1064,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') -class VimeoLikesIE(InfoExtractor): +class VimeoLikesIE(VimeoChannelIE): _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' @@ -1073,55 +1072,20 @@ class VimeoLikesIE(InfoExtractor): 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, 'info_dict': { - 'id': 'user755559_likes', - 'description': 'See all the videos urza likes', - 'title': 'Videos urza likes', + 'id': 'user755559', + 'title': 'urza’s Likes', }, }, { 'url': 'https://vimeo.com/stormlapse/likes', 'only_matching': True, }] + def _page_url(self, base_url, pagenum): + return '%s/page:%d/' % (base_url, pagenum) + def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id) - page_count = self._int( - self._search_regex( - r'''(?x)
  • - .*?
  • \s* - ''', webpage, 'page count', default=1), - 'page count', fatal=True) - PAGE_SIZE = 12 - title = self._html_search_regex( - r'(?s)

    (.+?)

    ', webpage, 'title', fatal=False) - description = self._html_search_meta('description', webpage) - - def _get_page(idx): - page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % ( - user_id, idx + 1) - webpage = self._download_webpage( - page_url, user_id, - note='Downloading page %d/%d' % (idx + 1, page_count)) - video_list = self._search_regex( - r'(?s)
      ]*>(.*?)
    ', - webpage, 'video content') - paths = re.findall( - r']*>\s* Date: Sun, 23 Jun 2019 02:16:09 +0700 Subject: [PATCH 18/28] [vimeo:channel,group] Make title extraction no fatal --- youtube_dl/extractor/vimeo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index aeee7df8f..b5b44a79a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -813,7 +813,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex( + self._TITLE_RE, webpage, 'list title', fatal=False) def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -954,7 +955,7 @@ class VimeoGroupsIE(VimeoAlbumIE): }] def _extract_list_title(self, webpage): - return self._og_search_title(webpage) + return self._og_search_title(webpage, fatal=False) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 695720ebe81166b3ee249eb3916e3c7819ef57a8 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Sat, 22 Jun 2019 22:31:43 +0100 Subject: [PATCH 19/28] [openload] Add support for oload.life (#21495) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32d546e4e..b2918dc85 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -244,7 +244,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -363,6 +363,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.website/embed/drTBl1aOTvk/', 'only_matching': True, + }, { + 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 3031b7c4ed3a446dc83123ce34780f4db56ad4ef Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Sun, 23 Jun 2019 19:04:05 +0900 Subject: [PATCH 20/28] [brightcove:new] Add support for playlists (#21331) --- youtube_dl/extractor/brightcove.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c0345e2c3..58ec5c979 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -483,7 +483,7 @@ class BrightcoveLegacyIE(InfoExtractor): class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' - _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*videoId=(?P\d+|ref:[^&]+)' + _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*(?Pvideo|playlist)Id=(?P\d+|ref:[^&]+)' _TESTS = [{ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'md5': 'c8100925723840d4b0d243f7025703be', @@ -516,6 +516,21 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } + }, { + # playlist stream + 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', + 'info_dict': { + 'id': '5718313430001', + 'title': 'No Audio Playlist', + }, + 'playlist_count': 7, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', + 'only_matching': True, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -715,7 +730,7 @@ class BrightcoveNewIE(AdobePassIE): 'ip_blocks': smuggled_data.get('geo_ip_blocks'), }) - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' @@ -736,7 +751,7 @@ class BrightcoveNewIE(AdobePassIE): r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = { 'Accept': 'application/json;pk=%s' % policy_key, } @@ -771,5 +786,12 @@ class BrightcoveNewIE(AdobePassIE): 'tveToken': tve_token, }) + if content_type == 'playlist': + return self.playlist_result( + [self._parse_brightcove_metadata(vid, vid.get('id'), headers) + for vid in json_data.get('videos', []) if vid.get('id')], + json_data.get('id'), json_data.get('name'), + json_data.get('description')) + return self._parse_brightcove_metadata( json_data, video_id, headers=headers) From 27cef8885de4ffaa33f96973df3c50b62504bd49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Jun 2019 23:01:43 +0700 Subject: [PATCH 21/28] [beeg] Add support for api/v6 v2 URLs (closes #21511) --- youtube_dl/extractor/beeg.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 192f11ea6..c15a0ac8f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, unified_timestamp, @@ -11,6 +14,7 @@ from ..utils import ( class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P\d+)' _TESTS = [{ + # api/v6 v1 'url': 'http://beeg.com/5416503', 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { @@ -24,6 +28,10 @@ class BeegIE(InfoExtractor): 'tags': list, 'age_limit': 18, } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -41,11 +49,22 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} + for api_path in ('', 'api.'): video = self._download_json( 'https://%sbeeg.com/api/v6/%s/video/%s' % (api_path, beeg_version, video_id), video_id, - fatal=api_path == 'api.') + fatal=api_path == 'api.', query=query) if video: break From 1d83e9bd4b2dbc854f6f8b7f4baa14602a288c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Jun 2019 00:12:31 +0700 Subject: [PATCH 22/28] [nfb] Remove extractor (closes #21518) Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/nfb.py | 112 ----------------------------- 2 files changed, 113 deletions(-) delete mode 100644 youtube_dl/extractor/nfb.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9cd7d3ac4..530474f3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -745,7 +745,6 @@ from .nexx import ( NexxIE, NexxEmbedIE, ) -from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE from .nhl import NHLIE diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py deleted file mode 100644 index adcc636bc..000000000 --- a/youtube_dl/extractor/nfb.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - determine_ext, - int_or_none, - qualities, - urlencode_postdata, - xpath_text, -) - - -class NFBIE(InfoExtractor): - IE_NAME = 'nfb' - IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' - - _TEST = { - 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', - 'info_dict': { - 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'flv', - 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:6b8e32dde3abf91e58857b174916620c', - 'duration': 3128, - 'creator': 'Mark Sandiford', - 'uploader': 'Mark Sandiford', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_xml( - 'https://www.nfb.ca/film/%s/player_config' % video_id, - video_id, 'Downloading player config XML', - data=urlencode_postdata({'getConfig': 'true'}), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' - }) - - title, description, thumbnail, duration, uploader, author = [None] * 6 - thumbnails, formats = [[]] * 2 - subtitles = {} - - for media in config.findall('./player/stream/media'): - if media.get('type') == 'posterImage': - quality_key = qualities(('low', 'high')) - thumbnails = [] - for asset in media.findall('assets/asset'): - asset_url = xpath_text(asset, 'default/url', default=None) - if not asset_url: - continue - quality = asset.get('quality') - thumbnails.append({ - 'url': asset_url, - 'id': quality, - 'preference': quality_key(quality), - }) - elif media.get('type') == 'video': - title = xpath_text(media, 'title', fatal=True) - for asset in media.findall('assets/asset'): - quality = asset.get('quality') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', quality or '', 'height', default=None)) - for node in asset: - streamer = xpath_text(node, 'streamerURI', default=None) - if not streamer: - continue - play_path = xpath_text(node, 'url', default=None) - if not play_path: - continue - formats.append({ - 'url': streamer, - 'app': streamer.split('/', 3)[3], - 'play_path': play_path, - 'rtmp_live': False, - 'ext': 'flv', - 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, - 'height': height, - }) - self._sort_formats(formats) - description = clean_html(xpath_text(media, 'description')) - uploader = xpath_text(media, 'author') - duration = int_or_none(media.get('duration')) - for subtitle in media.findall('./subtitles/subtitle'): - subtitle_url = xpath_text(subtitle, 'url', default=None) - if not subtitle_url: - continue - lang = xpath_text(subtitle, 'lang', default='en') - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'duration': duration, - 'creator': uploader, - 'uploader': uploader, - 'formats': formats, - 'subtitles': subtitles, - } From 509bcec37ba26a8c7bc263cf8067495ec7cf120a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 27 Jun 2019 12:06:09 +0100 Subject: [PATCH 23/28] [fusion] fix extraction(closes #17775)(closes #21269) --- youtube_dl/extractor/fusion.py | 69 +++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index 25e284d46..a3f44b812 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -1,35 +1,84 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import ( + determine_ext, + int_or_none, + mimetype2ext, + parse_iso8601, +) class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P\d+)' _TESTS = [{ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', 'info_dict': { - 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', + 'id': '3145868', 'ext': 'mp4', 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', 'duration': 140.0, + 'timestamp': 1442589635, + 'uploader': 'UNIVISON', + 'upload_date': '20150918', }, 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Anvato'], }, { 'url': 'http://fusion.tv/video/201781', 'only_matching': True, + }, { + 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) + video = self._download_json( + 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id) - ooyala_code = self._search_regex( - r'data-ooyala-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') + info = { + 'id': video_id, + 'title': video['title'], + 'description': video.get('excerpt'), + 'timestamp': parse_iso8601(video.get('published')), + 'series': video.get('show'), + } - return OoyalaIE._build_url_result(ooyala_code) + formats = [] + src = video.get('src') or {} + for f_id, f in src.items(): + for q_id, q in f.items(): + q_url = q.get('url') + if not q_url: + continue + ext = determine_ext(q_url, mimetype2ext(q.get('type'))) + if ext == 'smil': + formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False)) + elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'): + formats.extend(self._extract_m3u8_formats( + q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': '-'.join([f_id, q_id]), + 'url': q_url, + 'width': int_or_none(q.get('width')), + 'height': int_or_none(q.get('height')), + 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')), + 'ext': 'mp4' if ext == 'm3u8' else ext, + 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + else: + info.update({ + '_type': 'url', + 'url': 'anvato:uni:' + video['video_ids']['anvato'], + 'ie_key': 'Anvato', + }) + + return info From f5629946608861097b6ce5095efb9a9e8ac7f056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:18:10 +0700 Subject: [PATCH 24/28] [drtv] Relax _VALID_URL --- youtube_dl/extractor/drtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 0c7e350f0..218f10209 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -24,7 +24,7 @@ from ..utils import ( class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _GEO_BYPASS = False _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' @@ -80,6 +80,9 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'only_matching': True, }] def _real_extract(self, url): From 6625bf200d08baf64764e99caa48b4fb3a48ff8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 27 Jun 2019 17:24:46 +0200 Subject: [PATCH 25/28] [mixer:vod] Relax _VALID_URL (closes #21531) (#21536) --- youtube_dl/extractor/beampro.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index 2eaec1ab4..e264a145f 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\w+)' + _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', 'info_dict': { @@ -119,7 +119,10 @@ class BeamProVodIE(BeamProBaseIE): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', + 'only_matching': True, + }] @staticmethod def _extract_format(vod, vod_type): From 4f71473ef186c0797596e96755e86df80f357a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:59:30 +0700 Subject: [PATCH 26/28] [go] Add support for disneynow.com (closes #21528) --- youtube_dl/extractor/go.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 206d89e82..5916f9a8f 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,7 @@ class GoIE(AdobePassIE): 'resource_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -71,6 +71,9 @@ class GoIE(AdobePassIE): # brand 008 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', 'only_matching': True, + }, { + 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', + 'only_matching': True, }] def _extract_videos(self, brand, video_id='-1', show_id='-1'): @@ -89,7 +92,7 @@ class GoIE(AdobePassIE): # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', - default=None) + default=video_id) if not site_info: brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', From 232331c0d2f446af760403ed5a0439cdc3deb112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:55:15 +0700 Subject: [PATCH 27/28] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2d9988da3..985d14a28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version + +Extractors ++ [go] Add support for disneynow.com (#21528) +* [mixer:vod] Relax URL regular expression (#21531, #21536) +* [drtv] Relax URL regular expression +* [fusion] Fix extraction (#17775, #21269) +- [nfb] Remove extractor (#21518) ++ [beeg] Add support for api/v6 v2 URLs (#21511) ++ [brightcove:new] Add support for playlists (#21331) ++ [openload] Add support for oload.life (#21495) +* [vimeo:channel,group] Make title extraction non fatal +* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) ++ [pornhub] Add support for more paged video sources ++ [pornhub] Add support for downloading single pages and search pages (#15570) +* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) ++ [youtube] Add another signature function pattern +* [tf1] Fix extraction (#21365, #21372) +* [crunchyroll] Move Accept-Language workaround to video extractor since + it causes playlists not to list any videos +* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) + + version 2019.06.21 Core From 8c8cae91ece9841567aa48095245f92ae8f4b295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:57:33 +0700 Subject: [PATCH 28/28] release 2019.06.27 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 6 +++--- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 7a2b16827..d7c15e85a 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index d6180e672..741862590 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7cb981abf..4fb035ea4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 802fa2313..73ed62012 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5153864a1..a9d3653e2 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 985d14a28..4ae3d6c7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.27 Extractors + [go] Add support for disneynow.com (#21528) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bfd15b4dc..55ae43144 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -581,7 +581,6 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** - - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** - **nhl.com** @@ -692,8 +691,9 @@ - **PornerBros** - **PornHd** - **PornHub**: PornHub and Thumbzilla - - **PornHubPlaylist** - - **PornHubUserVideos** + - **PornHubPagedVideoList** + - **PornHubUser** + - **PornHubUserVideosUpload** - **Pornotube** - **PornoVoisines** - **PornoXO** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 33474a452..01896873d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.21' +__version__ = '2019.06.27'