From 46cc54ca8f13c7b823c1a12446cdd76d060c74b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Mar 2020 06:23:39 +0700 Subject: [PATCH 01/47] [pornhub] Improve title extraction (closes #24184) --- youtube_dl/extractor/pornhub.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index b3251ccd9..b8f65af7c 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -189,10 +189,10 @@ class PornHubIE(PornHubBaseIE): # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. title = self._html_search_meta( - 'twitter:title', webpage, default=None) or self._search_regex( - (r']+class=["\']title["\'][^>]*>(?P[^<]+)', - r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', - r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), + 'twitter:title', webpage, default=None) or self._html_search_regex( + (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>', + r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), webpage, 'title', group='title') video_urls = [] From 12ee431676bb655f04c7dd416a73c1f142ed368d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 3 Mar 2020 12:33:38 +0100 Subject: [PATCH 02/47] [vimeo] fix showcase password protected video extraction(closes #24224) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8cd611e1e..cea686afc 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -585,7 +585,7 @@ class VimeoIE(VimeoBaseInfoExtractor): url = 'https://vimeo.com/' + video_id elif is_player: url = 'https://player.vimeo.com/video/' + video_id - elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): + elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf', '/album/', '/showcase/')): url = 'https://vimeo.com/' + video_id try: From dc879c5a37dae588a5bb35d416635678356ad1b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 5 Mar 2020 23:48:25 +0700 Subject: [PATCH 03/47] [youtube] Fix age-gated videos support without login (closes #24248) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e06290427..91b9d59c6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1256,7 +1256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', + r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -2035,7 +2035,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: player_version = self._search_regex( [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', - r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], + r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version From 5429d6a9cb2db07472735b4555eeec89ea4f3c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Mar 2020 00:05:50 +0700 Subject: [PATCH 04/47] [youtube] Fix tests --- youtube_dl/extractor/youtube.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 91b9d59c6..d3e18a6ad 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -570,7 +570,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20120506', 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', 'alt_title': 'I Love It (feat. Charli XCX)', - 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8', + 'description': 'md5:19a2f98d9032b9311e686ed039564f63', 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'iconic ep', 'iconic', 'love', 'it'], @@ -685,12 +685,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'nfWlot6h_JM', 'ext': 'm4a', 'title': 'Taylor Swift - Shake It Off', - 'description': 'md5:bec2185232c05479482cb5a9b82719bf', + 'description': 'md5:307195cd21ff7fa352270fe884570ef0', 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', - 'creator': 'Taylor Swift', }, 'params': { 'youtube_include_dash_manifest': True, @@ -755,11 +754,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20100430', 'uploader_id': 'deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', - 'creator': 'deadmau5', + 'creator': 'Dada Life, deadmau5', 'description': 'md5:12c56784b8032162bb936a5f76d55360', 'uploader': 'deadmau5', 'title': 'Deadmau5 - Some Chords (HD)', - 'alt_title': 'Some Chords', + 'alt_title': 'This Machine Kills Some Chords', }, 'expected_warnings': [ 'DASH manifest missing', @@ -1135,6 +1134,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, 'youtube_include_dash_manifest': False, }, + 'skip': 'not actual anymore', }, { # Youtube Music Auto-generated description @@ -1145,8 +1145,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Voyeur Girl', 'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'upload_date': '20190312', - 'uploader': 'Various Artists - Topic', - 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw', + 'uploader': 'Stephen - Topic', + 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', 'artist': 'Stephen', 'track': 'Voyeur Girl', 'album': 'it\'s too much love to know my dear', @@ -1210,7 +1210,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': '-hcAI0g-f5M', 'ext': 'mp4', 'title': 'Put It On Me', - 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e', + 'description': 'md5:f6422397c07c4c907c6638e1fee380a5', 'upload_date': '20180426', 'uploader': 'Matt Maeson - Topic', 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ', From 2db9ac228d0f7ccc1d8078d5e29030665f0e3239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Mar 2020 00:23:14 +0700 Subject: [PATCH 05/47] [ChangeLog] Actualize [ci skip] --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1a676f4f2..00b97f965 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version <unreleased> + +Extractors +* [youtube] Fix age-gated videos support without login (#24248) +* [vimeo] Fix showcase password protected video extraction (#24224) +* [pornhub] Improve title extraction (#24184) +* [peertube] Improve extraction (#23657) ++ [servus] Add support for new URL schema (#23475, #23583, #24142) +* [vimeo] Fix subtitles URLs (#24209) + + version 2020.03.01 Core From 34525a3885946bbbdb2bfda85f3dcc67c66018f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Mar 2020 00:25:43 +0700 Subject: [PATCH 06/47] release 2020.03.06 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 0721d49c3..444a86ee3 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.03.01** +- [ ] I've verified that I'm running youtube-dl version **2020.03.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.01 + [debug] youtube-dl version 2020.03.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 1e67f724d..a1c69a45b 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.03.01** +- [ ] I've verified that I'm running youtube-dl version **2020.03.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 1290b55c4..d391b6d6b 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.01** +- [ ] I've verified that I'm running youtube-dl version **2020.03.06** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 3f006bef8..7422446b0 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.03.01** +- [ ] I've verified that I'm running youtube-dl version **2020.03.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.01 + [debug] youtube-dl version 2020.03.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 202bb9b2f..247d3594d 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.01** +- [ ] I've verified that I'm running youtube-dl version **2020.03.06** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 00b97f965..0efae7d9e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.03.06 Extractors * [youtube] Fix age-gated videos support without login (#24248) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fabc1e543..56330ea2e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.03.01' +__version__ = '2020.03.06' From 0ec9d4e565c1471c1234634bb3be0c7c7662d864 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 6 Mar 2020 20:12:35 +0100 Subject: [PATCH 07/47] [nhk] update API version(closes #24270) --- youtube_dl/extractor/nhk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 6a2c6cb7b..d2cbc9f54 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -31,7 +31,7 @@ class NhkVodIE(InfoExtractor): 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'only_matching': True, }] - _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json' + _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json' def _real_extract(self, url): lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() From f93abcf1da5aa1ca122896254bdec3a3c831ac24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 05:09:02 +0700 Subject: [PATCH 08/47] [youtube] Improve extraction in 429 error conditions (closes #24283) --- youtube_dl/extractor/youtube.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d3e18a6ad..9cbdc7ac5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1790,11 +1790,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): query['el'] = el if sts: query['sts'] = sts - video_info_webpage = self._download_webpage( - '%s://www.youtube.com/get_video_info' % proto, - video_id, note=False, - errnote='unable to download video info webpage', - fatal=False, query=query) + try: + video_info_webpage = self._download_webpage( + '%s://www.youtube.com/get_video_info' % proto, + video_id, note=False, + errnote='unable to download video info webpage', + query=query) + except ExtractorError as e: + # Skip further retries if we get 429 since solving + # captcha only unblocks access to website but + # not get_video_info end point + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 429: + break + continue if not video_info_webpage: continue get_video_info = compat_parse_qs(video_info_webpage) @@ -1833,13 +1841,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if messages: return '\n'.join(messages) - if not video_info: + if not video_info and not player_response: unavailable_message = extract_unavailable_message() if not unavailable_message: unavailable_message = 'Unable to extract video data' raise ExtractorError( 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) + if not isinstance(video_info, dict): + video_info = {} + video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} From d332ec725db5b1102473a75b9fc3212913bda618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 05:37:47 +0700 Subject: [PATCH 09/47] [youtube] Improve age-gated videos extraction in 429 error conditions (refs #24283) --- youtube_dl/extractor/youtube.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9cbdc7ac5..906988e6f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1726,6 +1726,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True + video_info = None # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube url = proto + '://www.youtube.com/embed/%s' % video_id @@ -1737,15 +1738,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data - video_info_webpage = self._download_webpage( - video_info_url, video_id, - note='Refetching age-gated info webpage', - errnote='unable to download video info webpage') - video_info = compat_parse_qs(video_info_webpage) - pl_response = video_info.get('player_response', [None])[0] - player_response = extract_player_response(pl_response, video_id) - add_dash_mpd(video_info) - view_count = extract_view_count(video_info) + try: + video_info_webpage = self._download_webpage( + video_info_url, video_id, + note='Refetching age-gated info webpage', + errnote='unable to download video info webpage') + except ExtractorError: + video_info_webpage = None + if video_info_webpage: + video_info = compat_parse_qs(video_info_webpage) + pl_response = video_info.get('player_response', [None])[0] + player_response = extract_player_response(pl_response, video_id) + add_dash_mpd(video_info) + view_count = extract_view_count(video_info) else: age_gate = False video_info = None From 43ebf77df3bbd93dbbd0336b0243d8d50895ab72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 08:34:17 +0700 Subject: [PATCH 10/47] [youtube] Remove outdated code Additional get_video_info requests don't seem to provide any extra itags any longer --- youtube_dl/extractor/youtube.py | 108 ++++++-------------------------- 1 file changed, 18 insertions(+), 90 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 906988e6f..908defecd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -29,7 +29,6 @@ from ..compat import ( from ..utils import ( bool_or_none, clean_html, - dict_get, error_to_compat_str, extract_attributes, ExtractorError, @@ -1708,9 +1707,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) - def extract_token(v_info): - return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) - def extract_player_response(player_response, video_id): pl_response = str_or_none(player_response) if not pl_response: @@ -1723,10 +1719,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response = {} # Get video info + video_info = {} embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True - video_info = None # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube url = proto + '://www.youtube.com/embed/%s' % video_id @@ -1753,8 +1749,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = extract_view_count(video_info) else: age_gate = False - video_info = None - sts = None # Try looking directly into the video webpage ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) if ytplayer_config: @@ -1771,69 +1765,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True - sts = ytplayer_config.get('sts') if not player_response: player_response = extract_player_response(args.get('player_response'), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) - # We also try looking in get_video_info since it may contain different dashmpd - # URL that points to a DASH manifest with possibly different itag set (some itags - # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH - # manifest pointed by get_video_info's dashmpd). - # The general idea is to take a union of itags of both DASH manifests (for example - # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093) - self.report_video_info_webpage_download(video_id) - for el in ('embedded', 'detailpage', 'vevo', ''): - query = { - 'video_id': video_id, - 'ps': 'default', - 'eurl': '', - 'gl': 'US', - 'hl': 'en', - } - if el: - query['el'] = el - if sts: - query['sts'] = sts - try: - video_info_webpage = self._download_webpage( - '%s://www.youtube.com/get_video_info' % proto, - video_id, note=False, - errnote='unable to download video info webpage', - query=query) - except ExtractorError as e: - # Skip further retries if we get 429 since solving - # captcha only unblocks access to website but - # not get_video_info end point - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 429: - break - continue - if not video_info_webpage: - continue - get_video_info = compat_parse_qs(video_info_webpage) - if not player_response: - pl_response = get_video_info.get('player_response', [None])[0] - player_response = extract_player_response(pl_response, video_id) - add_dash_mpd(get_video_info) - if view_count is None: - view_count = extract_view_count(get_video_info) - if not video_info: - video_info = get_video_info - get_token = extract_token(get_video_info) - if get_token: - # Different get_video_info requests may report different results, e.g. - # some may report video unavailability, but some may serve it without - # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362, - # the original webpage as well as el=info and el=embedded get_video_info - # requests report video unavailability due to geo restriction while - # el=detailpage succeeds and returns valid data). This is probably - # due to YouTube measures against IP ranges of hosting providers. - # Working around by preferring the first succeeded video_info containing - # the token if no such video_info yet was found. - token = extract_token(video_info) - if not token: - video_info = get_video_info - break def extract_unavailable_message(): messages = [] @@ -2408,30 +2343,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['stretched_ratio'] = ratio if not formats: - token = extract_token(video_info) - if not token: - if 'reason' in video_info: - if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta( - 'regionsAllowed', video_webpage, default=None) - countries = regions_allowed.split(',') if regions_allowed else None - self.raise_geo_restricted( - msg=video_info['reason'][0], countries=countries) - reason = video_info['reason'][0] - if 'Invalid parameters' in reason: - unavailable_message = extract_unavailable_message() - if unavailable_message: - reason = unavailable_message - raise ExtractorError( - 'YouTube said: %s' % reason, - expected=True, video_id=video_id) - else: - raise ExtractorError( - '"token" parameter not in video info for unknown reason', - video_id=video_id) - - if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])): - raise ExtractorError('This video is DRM protected.', expected=True) + if 'reason' in video_info: + if 'The uploader has not made this video available in your country.' in video_info['reason']: + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) + reason = video_info['reason'][0] + if 'Invalid parameters' in reason: + unavailable_message = extract_unavailable_message() + if unavailable_message: + reason = unavailable_message + raise ExtractorError( + 'YouTube said: %s' % reason, + expected=True, video_id=video_id) + if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']): + raise ExtractorError('This video is DRM protected.', expected=True) self._sort_formats(formats) From ea782aca520ff17fbf32771bfcfd9cbd36123900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 09:17:17 +0700 Subject: [PATCH 11/47] [README.md] Clarify 429 error --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 01f975958..4f54a5240 100644 --- a/README.md +++ b/README.md @@ -835,7 +835,9 @@ In February 2015, the new YouTube player contained a character sequence in a str ### HTTP Error 429: Too Many Requests or 402: Payment Required -These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address. +These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-youtube-dl) to youtube-dl. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds). + +If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address. ### SyntaxError: Non-ASCII character From fa9b8c662808a50605bb05f90af101e13b30fce6 Mon Sep 17 00:00:00 2001 From: Tristan Waddington <tristan.waddington@gmail.com> Date: Sun, 8 Mar 2020 04:00:25 -0700 Subject: [PATCH 12/47] [pornhub] Add support for pornhubpremium.com (#24288) --- youtube_dl/extractor/pornhub.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index b8f65af7c..3567a3283 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -52,7 +52,7 @@ class PornHubIE(PornHubBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) @@ -149,6 +149,9 @@ class PornHubIE(PornHubBaseIE): }, { 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', 'only_matching': True, + }, { + 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', + 'only_matching': True, }] @staticmethod @@ -166,6 +169,13 @@ class PornHubIE(PornHubBaseIE): host = mobj.group('host') or 'pornhub.com' video_id = mobj.group('id') + if 'premium' in host: + if not self._downloader.params.get('cookiefile'): + raise ExtractorError( + 'PornHub Premium requires authentication.' + ' You may want to use --cookies.', + expected=True) + self._set_cookie(host, 'age_verified', '1') def dl_webpage(platform): @@ -405,7 +415,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -473,7 +483,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -588,7 +598,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': { From cff99c91d150df2a4e21962a3ca8d4ae94533b8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 17:52:19 +0700 Subject: [PATCH 13/47] [utils] Add support for cookies with spaces used instead of tabs --- test/test_YoutubeDLCookieJar.py | 14 +++++++++----- test/testdata/cookies/cookie_file_with_spaces.txt | 5 +++++ youtube_dl/utils.py | 5 +++++ 3 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 test/testdata/cookies/cookie_file_with_spaces.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index f959798de..f833efac5 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -14,6 +14,9 @@ from youtube_dl.utils import YoutubeDLCookieJar class TestYoutubeDLCookieJar(unittest.TestCase): + def __assert_cookie_has_value(self, cookiejar, key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + def test_keep_session_cookies(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) @@ -32,12 +35,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase): def test_strip_httponly_prefix(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) + self.__assert_cookie_has_value(cookiejar, 'HTTPONLY_COOKIE') + self.__assert_cookie_has_value(cookiejar, 'JS_ACCESSIBLE_COOKIE') - def assert_cookie_has_value(key): - self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') - - assert_cookie_has_value('HTTPONLY_COOKIE') - assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + def test_convert_spaces_to_tabs(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/cookie_file_with_spaces.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + self.__assert_cookie_has_value(cookiejar, 'COOKIE') if __name__ == '__main__': diff --git a/test/testdata/cookies/cookie_file_with_spaces.txt b/test/testdata/cookies/cookie_file_with_spaces.txt new file mode 100644 index 000000000..6fda35fa0 --- /dev/null +++ b/test/testdata/cookies/cookie_file_with_spaces.txt @@ -0,0 +1,5 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +www.foobar.foobar FALSE / TRUE 2147483647 COOKIE COOKIE_VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8ccf25489..93d1dec05 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2752,6 +2752,11 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): for line in f: if line.startswith(self._HTTPONLY_PREFIX): line = line[len(self._HTTPONLY_PREFIX):] + # Cookie file may contain spaces instead of tabs. + # Replace all spaces with tabs to make such cookie files work + # with MozillaCookieJar. + if not line.startswith('#'): + line = re.sub(r' +', r'\t', line) cf.write(compat_str(line)) cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) From 434f57304683552b5d3c1a76130319ebd8139340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 18:16:17 +0700 Subject: [PATCH 14/47] [ChangeLog] Actualize [ci skip] --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 0efae7d9e..12815f3e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Core ++ [utils] Add support for cookie files with spaces + +Extractors ++ [pornhub] Add support for pornhubpremium.com (#24288) +- [youtube] Remove outdated code and unnecessary requests +* [youtube] Improve extraction in 429 HTTP error conditions (#24283) +* [nhk] Update API version (#24270) + + version 2020.03.06 Extractors From 68fa15155f40f7d646079e6649df03696a5e7d4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Mar 2020 18:27:20 +0700 Subject: [PATCH 15/47] release 2020.03.08 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 444a86ee3..d82ff9111 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.03.06** +- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.06 + [debug] youtube-dl version 2020.03.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a1c69a45b..04b350f76 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.03.06** +- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index d391b6d6b..6f17ad7bc 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.06** +- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 7422446b0..efb179ea5 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.03.06** +- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.06 + [debug] youtube-dl version 2020.03.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 247d3594d..cf4874bcc 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.06** +- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 12815f3e1..84b43c642 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.03.08 Core + [utils] Add support for cookie files with spaces diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 56330ea2e..0f768f7c1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.03.06' +__version__ = '2020.03.08' From 042b66493398dd8c3bb31216e3f828b98716810d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 10 Mar 2020 04:51:20 +0700 Subject: [PATCH 16/47] Revert "[utils] Add support for cookies with spaces used instead of tabs" According to [1] TABs must be used as separators between fields. Files produces by some tools with spaces as separators are considered malformed. 1. https://curl.haxx.se/docs/http-cookies.html This reverts commit cff99c91d150df2a4e21962a3ca8d4ae94533b8c. --- test/test_YoutubeDLCookieJar.py | 14 +++++--------- test/testdata/cookies/cookie_file_with_spaces.txt | 5 ----- youtube_dl/utils.py | 5 ----- 3 files changed, 5 insertions(+), 19 deletions(-) delete mode 100644 test/testdata/cookies/cookie_file_with_spaces.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index f833efac5..f959798de 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -14,9 +14,6 @@ from youtube_dl.utils import YoutubeDLCookieJar class TestYoutubeDLCookieJar(unittest.TestCase): - def __assert_cookie_has_value(self, cookiejar, key): - self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') - def test_keep_session_cookies(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) @@ -35,13 +32,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase): def test_strip_httponly_prefix(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar.load(ignore_discard=True, ignore_expires=True) - self.__assert_cookie_has_value(cookiejar, 'HTTPONLY_COOKIE') - self.__assert_cookie_has_value(cookiejar, 'JS_ACCESSIBLE_COOKIE') - def test_convert_spaces_to_tabs(self): - cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/cookie_file_with_spaces.txt') - cookiejar.load(ignore_discard=True, ignore_expires=True) - self.__assert_cookie_has_value(cookiejar, 'COOKIE') + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') if __name__ == '__main__': diff --git a/test/testdata/cookies/cookie_file_with_spaces.txt b/test/testdata/cookies/cookie_file_with_spaces.txt deleted file mode 100644 index 6fda35fa0..000000000 --- a/test/testdata/cookies/cookie_file_with_spaces.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Netscape HTTP Cookie File -# http://curl.haxx.se/rfc/cookie_spec.html -# This is a generated file! Do not edit. - -www.foobar.foobar FALSE / TRUE 2147483647 COOKIE COOKIE_VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 93d1dec05..8ccf25489 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2752,11 +2752,6 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): for line in f: if line.startswith(self._HTTPONLY_PREFIX): line = line[len(self._HTTPONLY_PREFIX):] - # Cookie file may contain spaces instead of tabs. - # Replace all spaces with tabs to make such cookie files work - # with MozillaCookieJar. - if not line.startswith('#'): - line = re.sub(r' +', r'\t', line) cf.write(compat_str(line)) cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) From f1a8511f7b5bbbd9e64f597c87791ad3b4310efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 10 Mar 2020 04:59:02 +0700 Subject: [PATCH 17/47] [utils] Add reference to cookie file format --- youtube_dl/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8ccf25489..38262bee4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2729,6 +2729,11 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + """ + See [1] for cookie file format. + + 1. https://curl.haxx.se/docs/http-cookies.html + """ _HTTPONLY_PREFIX = '#HttpOnly_' def save(self, filename=None, ignore_discard=False, ignore_expires=False): From 40b6495d403a4636ea8be1dd9a2dad33c136a1e3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 13 Mar 2020 08:59:10 +0100 Subject: [PATCH 18/47] Revert "[vimeo] fix showcase password protected video extraction(closes #24224)" This reverts commit 12ee431676bb655f04c7dd416a73c1f142ed368d. --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cea686afc..8cd611e1e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -585,7 +585,7 @@ class VimeoIE(VimeoBaseInfoExtractor): url = 'https://vimeo.com/' + video_id elif is_player: url = 'https://player.vimeo.com/video/' + video_id - elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf', '/album/', '/showcase/')): + elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id try: From fcaf4d7a067e9ac6b36a5f7b1100be391f492a79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Mar 2020 04:39:21 +0700 Subject: [PATCH 19/47] [nhk] Relax _VALID_URL (#24329) --- youtube_dl/extractor/nhk.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index d2cbc9f54..45bc4d85e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[a-z]+-\d{8}-\d+)' + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)' # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ @@ -30,6 +30,9 @@ class NhkVodIE(InfoExtractor): }, { 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'only_matching': True, + }, { + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', + 'only_matching': True, }] _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json' From 9bfe08859491882e40603ed6c4eb59760ed35ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Mar 2020 04:40:11 +0700 Subject: [PATCH 20/47] [nhk] Remove obsolete rtmp formats (closes #24329) --- youtube_dl/extractor/nhk.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 45bc4d85e..ce13f6bb9 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -87,13 +87,6 @@ class NhkVodIE(InfoExtractor): info['formats'] = self._extract_m3u8_formats( 'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path, episode_id, 'm4a', m3u8_id='hls', fatal=False) - for proto in ('rtmpt', 'rtmp'): - info['formats'].append({ - 'ext': 'flv', - 'format_id': proto, - 'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path), - 'vcodec': 'none', - }) for f in info['formats']: f['language'] = lang return info From 541fe3eaff579f72ccc14f97009a8904f739368f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Mar 2020 04:42:40 +0700 Subject: [PATCH 21/47] [nhk] Update m3u8 URL and use native hls (#24329) --- youtube_dl/extractor/nhk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index ce13f6bb9..de6a707c4 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -85,8 +85,9 @@ class NhkVodIE(InfoExtractor): audio = episode['audio'] audio_path = audio['audio'] info['formats'] = self._extract_m3u8_formats( - 'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path, - episode_id, 'm4a', m3u8_id='hls', fatal=False) + 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, + episode_id, 'm4a', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) for f in info['formats']: f['language'] = lang return info From 4cbce88f8b44ab17d55fe1b7615e37a2c1f142d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Mar 2020 04:58:24 +0700 Subject: [PATCH 22/47] [ndr] Fix extraction (closes #24326) --- youtube_dl/extractor/ndr.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 9c8bf05af..2447c812e 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, + merge_dicts, parse_iso8601, qualities, try_get, @@ -87,21 +88,25 @@ class NDRIE(NDRBaseIE): def _extract_embed(self, webpage, display_id): embed_url = self._html_search_meta( - 'embedURL', webpage, 'embed URL', fatal=True) + 'embedURL', webpage, 'embed URL', + default=None) or self._search_regex( + r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'embed URL', group='url') description = self._search_regex( r'<p[^>]+itemprop="description">([^<]+)</p>', webpage, 'description', default=None) or self._og_search_description(webpage) timestamp = parse_iso8601( self._search_regex( r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"', - webpage, 'upload date', fatal=False)) - return { + webpage, 'upload date', default=None)) + info = self._search_json_ld(webpage, display_id, default={}) + return merge_dicts({ '_type': 'url_transparent', 'url': embed_url, 'display_id': display_id, 'description': description, 'timestamp': timestamp, - } + }, info) class NJoyIE(NDRBaseIE): From 4568a11802b47c22ac43d1187435ab2e6d7a3c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Mar 2020 22:57:10 +0700 Subject: [PATCH 23/47] [xtube] Fix formats extraction (closes #24348) --- youtube_dl/extractor/xtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 47caec1de..79dd647b3 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -98,7 +98,7 @@ class XTubeIE(InfoExtractor): title = config.get('title') thumbnail = config.get('poster') duration = int_or_none(config.get('duration')) - sources = config.get('sources') + sources = config.get('sources') or config.get('format') if isinstance(sources, dict): sources = self._parse_json(self._search_regex( From 158bc5ac03a175d95337979afa02f7423c2bf445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Mar 2020 22:58:10 +0700 Subject: [PATCH 24/47] [xtube] Fix typo --- youtube_dl/extractor/xtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 79dd647b3..01b253dcb 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -100,7 +100,7 @@ class XTubeIE(InfoExtractor): duration = int_or_none(config.get('duration')) sources = config.get('sources') or config.get('format') - if isinstance(sources, dict): + if not isinstance(sources, dict): sources = self._parse_json(self._search_regex( r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', webpage, 'sources', group='sources'), video_id, From 73453430c11002a7193eaa9fb8cf5349fa326c93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 Mar 2020 00:59:48 +0700 Subject: [PATCH 25/47] [hellporno] Fix extraction (closes #24399) --- youtube_dl/extractor/hellporno.py | 73 ++++++++++++++++--------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py index 0ee8ea712..fae425103 100644 --- a/youtube_dl/extractor/hellporno.py +++ b/youtube_dl/extractor/hellporno.py @@ -1,12 +1,11 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - js_to_json, + int_or_none, + merge_dicts, remove_end, - determine_ext, + unified_timestamp, ) @@ -14,15 +13,21 @@ class HellPornoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', - 'md5': '1fee339c610d2049699ef2aa699439f1', + 'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3', 'info_dict': { 'id': '149116', 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic', 'ext': 'mp4', 'title': 'Dixie is posing with naked ass very erotic', + 'description': 'md5:9a72922749354edb1c4b6e540ad3d215', + 'categories': list, 'thumbnail': r're:https?://.*\.jpg$', + 'duration': 240, + 'timestamp': 1398762720, + 'upload_date': '20140429', + 'view_count': int, 'age_limit': 18, - } + }, }, { 'url': 'http://hellporno.net/v/186271/', 'only_matching': True, @@ -36,40 +41,36 @@ class HellPornoIE(InfoExtractor): title = remove_end(self._html_search_regex( r'<title>([^<]+)', webpage, 'title'), ' - Hell Porno') - flashvars = self._parse_json(self._search_regex( - r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), - display_id, transform_source=js_to_json) + info = self._parse_html5_media_entries(url, webpage, display_id)[0] + self._sort_formats(info['formats']) - video_id = flashvars.get('video_id') - thumbnail = flashvars.get('preview_url') - ext = determine_ext(flashvars.get('postfix'), 'mp4') + video_id = self._search_regex( + (r'chs_object\s*=\s*["\'](\d+)', + r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id', + default=display_id) + description = self._search_regex( + r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage, + 'description', fatal=False) + categories = [ + c.strip() + for c in self._html_search_meta( + 'keywords', webpage, 'categories', default='').split(',') + if c.strip()] + duration = int_or_none(self._og_search_property( + 'video:duration', webpage, fatal=False)) + timestamp = unified_timestamp(self._og_search_property( + 'video:release_date', webpage, fatal=False)) + view_count = int_or_none(self._search_regex( + r'>Views\s+(\d+)', webpage, 'view count', fatal=False)) - formats = [] - for video_url_key in ['video_url', 'video_alt_url']: - video_url = flashvars.get(video_url_key) - if not video_url: - continue - video_text = flashvars.get('%s_text' % video_url_key) - fmt = { - 'url': video_url, - 'ext': ext, - 'format_id': video_text, - } - m = re.search(r'^(?P\d+)[pP]', video_text) - if m: - fmt['height'] = int(m.group('height')) - formats.append(fmt) - self._sort_formats(formats) - - categories = self._html_search_meta( - 'keywords', webpage, 'categories', default='').split(',') - - return { + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, 'title': title, - 'thumbnail': thumbnail, + 'description': description, 'categories': categories, + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, 'age_limit': 18, - 'formats': formats, - } + }) From 787c3604671283bd4945eefb87866d01fb973097 Mon Sep 17 00:00:00 2001 From: Devon Meunier Date: Sun, 19 May 2019 07:32:46 -0400 Subject: [PATCH 26/47] [cbc:watch] Add support for authentication --- youtube_dl/extractor/cbc.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 751a3a8f2..b02cddbfd 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import json import re +from xml.sax.saxutils import escape from .common import InfoExtractor from ..compat import ( @@ -216,6 +217,29 @@ class CBCWatchBaseIE(InfoExtractor): 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', } _GEO_COUNTRIES = ['CA'] + _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login' + _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token' + _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' + _NETRC_MACHINE = 'cbcwatch' + + def _signature(self, email, password): + data = json.dumps({ + 'email': email, + 'password': password, + }).encode() + headers = {'content-type': 'application/json'} + query = {'apikey': self._API_KEY} + resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query) + access_token = resp['access_token'] + + # token + query = { + 'access_token': access_token, + 'apikey': self._API_KEY, + 'jwtapp': 'jwt', + } + resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query) + return resp['signature'] def _call_api(self, path, video_id): url = path if path.startswith('http') else self._API_BASE_URL + path @@ -249,13 +273,21 @@ class CBCWatchBaseIE(InfoExtractor): return self._device_id and self._device_token def _register_device(self): - self._device_id = self._device_token = None result = self._download_xml( self._API_BASE_URL + 'device/register', None, 'Acquiring device token', data=b'web') self._device_id = xpath_text(result, 'deviceId', fatal=True) - self._device_token = xpath_text(result, 'deviceToken', fatal=True) + anon_device_token = xpath_text(result, 'deviceToken', fatal=True) + email, password = self._get_login_info() + if email and password: + signature = self._signature(email, password) + data = '{0}{1}web'.format(escape(signature), escape(self._device_id)).encode() + url = self._API_BASE_URL + 'device/login' + result = self._download_xml(url, None, data=data, headers={'content-type': 'application/xml'}) + self._device_token = xpath_text(result, 'token', fatal=True) + else: + self._device_token = anon_device_token self._downloader.cache.store( 'cbcwatch', 'device', { 'id': self._device_id, From c76cdf2382c91af13de0c7580b1b5e1b24484664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2020 01:41:54 +0700 Subject: [PATCH 27/47] [cbc:watch] Fix authenticated device token caching (closes #19160) --- youtube_dl/extractor/cbc.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index b02cddbfd..fd5ec6033 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib import json import re from xml.sax.saxutils import escape @@ -263,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor): def _real_initialize(self): if self._valid_device_token(): return - device = self._downloader.cache.load('cbcwatch', 'device') or {} + device = self._downloader.cache.load( + 'cbcwatch', self._cache_device_key()) or {} self._device_id, self._device_token = device.get('id'), device.get('token') if self._valid_device_token(): return @@ -272,24 +274,30 @@ class CBCWatchBaseIE(InfoExtractor): def _valid_device_token(self): return self._device_id and self._device_token + def _cache_device_key(self): + email, _ = self._get_login_info() + return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device' + def _register_device(self): result = self._download_xml( self._API_BASE_URL + 'device/register', None, 'Acquiring device token', data=b'web') self._device_id = xpath_text(result, 'deviceId', fatal=True) - anon_device_token = xpath_text(result, 'deviceToken', fatal=True) email, password = self._get_login_info() if email and password: signature = self._signature(email, password) - data = '{0}{1}web'.format(escape(signature), escape(self._device_id)).encode() + data = '{0}{1}web'.format( + escape(signature), escape(self._device_id)).encode() url = self._API_BASE_URL + 'device/login' - result = self._download_xml(url, None, data=data, headers={'content-type': 'application/xml'}) + result = self._download_xml( + url, None, data=data, + headers={'content-type': 'application/xml'}) self._device_token = xpath_text(result, 'token', fatal=True) else: - self._device_token = anon_device_token + self._device_token = xpath_text(result, 'deviceToken', fatal=True) self._downloader.cache.store( - 'cbcwatch', 'device', { + 'cbcwatch', self._cache_device_key(), { 'id': self._device_id, 'token': self._device_token, }) From a6c5859d6b106733905c3a95fc52b53a784c94da Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 22 Mar 2020 09:24:07 +0100 Subject: [PATCH 28/47] [soundcloud] fix download url extraction(closes #24394) --- youtube_dl/extractor/soundcloud.py | 91 ++++++++---------------------- 1 file changed, 24 insertions(+), 67 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index a1372d389..ff6be0b54 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -27,6 +27,7 @@ from ..utils import ( unified_timestamp, update_url_query, url_or_none, + urlhandle_detect_ext, ) @@ -96,7 +97,7 @@ class SoundcloudIE(InfoExtractor): 'repost_count': int, } }, - # not streamable song, preview + # geo-restricted { 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'info_dict': { @@ -108,17 +109,13 @@ class SoundcloudIE(InfoExtractor): 'uploader_id': '9615865', 'timestamp': 1337635207, 'upload_date': '20120521', - 'duration': 30, + 'duration': 227.155, 'license': 'all-rights-reserved', 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, }, - 'params': { - # rtmp - 'skip_download': True, - }, }, # private link { @@ -229,7 +226,6 @@ class SoundcloudIE(InfoExtractor): 'skip_download': True, }, }, - # not available via api.soundcloud.com/i1/tracks/id/streams { 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', @@ -250,11 +246,9 @@ class SoundcloudIE(InfoExtractor): 'comment_count': int, 'repost_count': int, }, - 'expected_warnings': ['Unable to download JSON metadata'], } ] - _API_BASE = 'https://api.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' @@ -316,10 +310,9 @@ class SoundcloudIE(InfoExtractor): def _resolv_url(cls, url): return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url - def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): + def _extract_info_dict(self, info, full_title=None, secret_token=None): track_id = compat_str(info['id']) title = info['title'] - track_base_url = self._API_BASE + 'tracks/%s' % track_id format_urls = set() formats = [] @@ -328,21 +321,22 @@ class SoundcloudIE(InfoExtractor): query['secret_token'] = secret_token if info.get('downloadable') and info.get('has_downloads_left'): - format_url = update_url_query( - info.get('download_url') or track_base_url + '/download', query) - format_urls.add(format_url) - if version == 2: - v1_info = self._download_json( - track_base_url, track_id, query=query, fatal=False) or {} - else: - v1_info = info - formats.append({ - 'format_id': 'download', - 'ext': v1_info.get('original_format') or 'mp3', - 'filesize': int_or_none(v1_info.get('original_content_size')), - 'url': format_url, - 'preference': 10, - }) + download_url = update_url_query( + self._API_V2_BASE + 'tracks/' + track_id + '/download', query) + redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') + if redirect_url: + urlh = self._request_webpage( + HEADRequest(redirect_url), track_id, fatal=False) + if urlh: + format_url = urlh.geturl() + format_urls.add(format_url) + formats.append({ + 'format_id': 'download', + 'ext': urlhandle_detect_ext(urlh) or 'mp3', + 'filesize': int_or_none(urlh.headers.get('Content-Length')), + 'url': format_url, + 'preference': 10, + }) def invalid_url(url): return not url or url in format_urls @@ -406,42 +400,11 @@ class SoundcloudIE(InfoExtractor): }, 'http' if protocol == 'progressive' else protocol, t.get('snipped') or '/preview/' in format_url) - if not formats: - # Old API, does not work for some tracks (e.g. - # https://soundcloud.com/giovannisarani/mezzo-valzer) - # and might serve preview URLs (e.g. - # http://www.soundcloud.com/snbrn/ele) - format_dict = self._download_json( - track_base_url + '/streams', track_id, - 'Downloading track url', query=query, fatal=False) or {} - - for key, stream_url in format_dict.items(): - if invalid_url(stream_url): - continue - format_urls.add(stream_url) - mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key) - if mobj: - protocol, ext, abr = mobj.groups() - add_format({ - 'abr': abr, - 'ext': ext, - 'url': stream_url, - }, protocol) - - if not formats: - # We fallback to the stream_url in the original info, this - # cannot be always used, sometimes it can give an HTTP 404 error - urlh = self._request_webpage( - HEADRequest(info.get('stream_url') or track_base_url + '/stream'), - track_id, query=query, fatal=False) - if urlh: - stream_url = urlh.geturl() - if not invalid_url(stream_url): - add_format({'url': stream_url}, 'http') - for f in formats: f['vcodec'] = 'none' + if not formats and info.get('policy') == 'BLOCK': + self.raise_geo_restricted() self._sort_formats(formats) user = info.get('user') or {} @@ -511,16 +474,10 @@ class SoundcloudIE(InfoExtractor): resolve_title += '/%s' % token info_json_url = self._resolv_url(self._BASE_URL + resolve_title) - version = 2 info = self._download_json( - info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False) - if not info: - info = self._download_json( - info_json_url.replace(self._API_V2_BASE, self._API_BASE), - full_title, 'Downloading info JSON', query=query) - version = 1 + info_json_url, full_title, 'Downloading info JSON', query=query) - return self._extract_info_dict(info, full_title, token, version) + return self._extract_info_dict(info, full_title, token) class SoundcloudPlaylistBaseIE(SoundcloudIE): From 2e20cb36364b91c1d928ce896064fdc7c49e82f8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 23 Mar 2020 12:57:10 +0100 Subject: [PATCH 29/47] [limelight] remove disabled API requests(closes #24255) --- youtube_dl/extractor/limelight.py | 125 ++++++++++++----------------- youtube_dl/extractor/pokemon.py | 12 +-- youtube_dl/extractor/telequebec.py | 2 - youtube_dl/extractor/tfo.py | 6 +- 4 files changed, 59 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 729d8de50..39f74d282 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -18,7 +18,6 @@ from ..utils import ( class LimelightBaseIE(InfoExtractor): _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' - _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' @classmethod def _extract_urls(cls, webpage, source_url): @@ -70,7 +69,8 @@ class LimelightBaseIE(InfoExtractor): try: return self._download_json( self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) + item_id, 'Downloading PlaylistService %s JSON' % method, + fatal=fatal, headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission'] @@ -79,22 +79,22 @@ class LimelightBaseIE(InfoExtractor): raise ExtractorError(error, expected=True) raise - def _call_api(self, organization_id, item_id, method): - return self._download_json( - self._API_URL % (organization_id, self._API_PATH, item_id, method), - item_id, 'Downloading API %s JSON' % method) - - def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None): + def _extract(self, item_id, pc_method, mobile_method, referer=None): pc = self._call_playlist_service(item_id, pc_method, referer=referer) - metadata = self._call_api(pc['orgId'], item_id, meta_method) - mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer) - return pc, mobile, metadata + mobile = self._call_playlist_service( + item_id, mobile_method, fatal=False, referer=referer) + return pc, mobile + + def _extract_info(self, pc, mobile, i, referer): + get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {} + pc_item = get_item(pc, 'playlistItems') + mobile_item = get_item(mobile, 'mediaList') + video_id = pc_item.get('mediaId') or mobile_item['mediaId'] + title = pc_item.get('title') or mobile_item['title'] - def _extract_info(self, streams, mobile_urls, properties): - video_id = properties['media_id'] formats = [] urls = [] - for stream in streams: + for stream in pc_item.get('streams', []): stream_url = stream.get('url') if not stream_url or stream.get('drmProtected') or stream_url in urls: continue @@ -155,7 +155,7 @@ class LimelightBaseIE(InfoExtractor): }) formats.append(fmt) - for mobile_url in mobile_urls: + for mobile_url in mobile_item.get('mobileUrls', []): media_url = mobile_url.get('mobileUrl') format_id = mobile_url.get('targetMediaPlatform') if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls: @@ -179,54 +179,34 @@ class LimelightBaseIE(InfoExtractor): self._sort_formats(formats) - title = properties['title'] - description = properties.get('description') - timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date')) - duration = float_or_none(properties.get('duration_in_milliseconds'), 1000) - filesize = int_or_none(properties.get('total_storage_in_bytes')) - categories = [properties.get('category')] - tags = properties.get('tags', []) - thumbnails = [{ - 'url': thumbnail['url'], - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] - subtitles = {} - for caption in properties.get('captions', []): - lang = caption.get('language_code') - subtitles_url = caption.get('url') - if lang and subtitles_url: - subtitles.setdefault(lang, []).append({ - 'url': subtitles_url, - }) - closed_captions_url = properties.get('closed_captions_url') - if closed_captions_url: - subtitles.setdefault('en', []).append({ - 'url': closed_captions_url, - 'ext': 'ttml', - }) + for flag in mobile_item.get('flags'): + if flag == 'ClosedCaptions': + closed_captions = self._call_playlist_service( + video_id, 'getClosedCaptionsDetailsByMediaId', + False, referer) or [] + for cc in closed_captions: + cc_url = cc.get('webvttFileUrl') + if not cc_url: + continue + lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en') + subtitles.setdefault(lang, []).append({ + 'url': cc_url, + }) + break + + get_meta = lambda x: pc_item.get(x) or mobile_item.get(x) return { 'id': video_id, 'title': title, - 'description': description, + 'description': get_meta('description'), 'formats': formats, - 'timestamp': timestamp, - 'duration': duration, - 'filesize': filesize, - 'categories': categories, - 'tags': tags, - 'thumbnails': thumbnails, + 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000), + 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'), 'subtitles': subtitles, } - def _extract_info_helper(self, pc, mobile, i, metadata): - return self._extract_info( - try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [], - try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [], - metadata) - class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' @@ -251,8 +231,6 @@ class LimelightMediaIE(LimelightBaseIE): 'description': 'md5:8005b944181778e313d95c1237ddb640', 'thumbnail': r're:^https?://.*\.jpeg$', 'duration': 144.23, - 'timestamp': 1244136834, - 'upload_date': '20090604', }, 'params': { # m3u8 download @@ -268,30 +246,29 @@ class LimelightMediaIE(LimelightBaseIE): 'title': '3Play Media Overview Video', 'thumbnail': r're:^https?://.*\.jpeg$', 'duration': 78.101, - 'timestamp': 1338929955, - 'upload_date': '20120605', - 'subtitles': 'mincount:9', + # TODO: extract all languages that were accessible via API + # 'subtitles': 'mincount:9', + 'subtitles': 'mincount:1', }, }, { 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', 'only_matching': True, }] _PLAYLIST_SERVICE_PATH = 'media' - _API_PATH = 'media' def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) + source_url = smuggled_data.get('source_url') self._initialize_geo_bypass({ 'countries': smuggled_data.get('geo_countries'), }) - pc, mobile, metadata = self._extract( + pc, mobile = self._extract( video_id, 'getPlaylistByMediaId', - 'getMobilePlaylistByMediaId', 'properties', - smuggled_data.get('source_url')) + 'getMobilePlaylistByMediaId', source_url) - return self._extract_info_helper(pc, mobile, 0, metadata) + return self._extract_info(pc, mobile, 0, source_url) class LimelightChannelIE(LimelightBaseIE): @@ -313,6 +290,7 @@ class LimelightChannelIE(LimelightBaseIE): 'info_dict': { 'id': 'ab6a524c379342f9b23642917020c082', 'title': 'Javascript Sample Code', + 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html', }, 'playlist_mincount': 3, }, { @@ -320,22 +298,23 @@ class LimelightChannelIE(LimelightBaseIE): 'only_matching': True, }] _PLAYLIST_SERVICE_PATH = 'channel' - _API_PATH = 'channels' def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) channel_id = self._match_id(url) + source_url = smuggled_data.get('source_url') - pc, mobile, medias = self._extract( + pc, mobile = self._extract( channel_id, 'getPlaylistByChannelId', 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', - 'media', smuggled_data.get('source_url')) + source_url) entries = [ - self._extract_info_helper(pc, mobile, i, medias['media_list'][i]) - for i in range(len(medias['media_list']))] + self._extract_info(pc, mobile, i, source_url) + for i in range(len(pc['playlistItems']))] - return self.playlist_result(entries, channel_id, pc['title']) + return self.playlist_result( + entries, channel_id, pc.get('title'), mobile.get('description')) class LimelightChannelListIE(LimelightBaseIE): @@ -368,10 +347,12 @@ class LimelightChannelListIE(LimelightBaseIE): def _real_extract(self, url): channel_list_id = self._match_id(url) - channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById') + channel_list = self._call_playlist_service( + channel_list_id, 'getMobileChannelListById') entries = [ self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel') for channel in channel_list['channelList']] - return self.playlist_result(entries, channel_list_id, channel_list['title']) + return self.playlist_result( + entries, channel_list_id, channel_list['title']) diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py index dd5f17f11..80222d428 100644 --- a/youtube_dl/extractor/pokemon.py +++ b/youtube_dl/extractor/pokemon.py @@ -20,20 +20,16 @@ class PokemonIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Ol’ Raise and Switch!', 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', - 'timestamp': 1511824728, - 'upload_date': '20171127', }, 'add_id': ['LimelightMedia'], }, { # no data-video-title - 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008', + 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008', 'info_dict': { - 'id': '99f3bae270bf4e5097274817239ce9c8', + 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1', 'ext': 'mp4', - 'title': 'Pokémon: The Rise of Darkrai', - 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d', - 'timestamp': 1417778347, - 'upload_date': '20141205', + 'title': "Pokémon : L'ascension de Darkrai", + 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5', }, 'add_id': ['LimelightMedia'], 'params': { diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index ae9f66787..c82c94b3a 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -38,8 +38,6 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'ext': 'mp4', 'title': 'Un petit choc et puis repart!', 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374', - 'upload_date': '20180222', - 'timestamp': 1519326631, }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py index 0e2370cd8..0631cb7ab 100644 --- a/youtube_dl/extractor/tfo.py +++ b/youtube_dl/extractor/tfo.py @@ -17,14 +17,12 @@ class TFOIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' _TEST = { 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', - 'md5': '47c987d0515561114cf03d1226a9d4c7', + 'md5': 'cafbe4f47a8dae0ca0159937878100d6', 'info_dict': { - 'id': '100463871', + 'id': '7da3d50e495c406b8fc0b997659cc075', 'ext': 'mp4', 'title': 'Video Game Hackathon', 'description': 'md5:558afeba217c6c8d96c60e5421795c07', - 'upload_date': '20160212', - 'timestamp': 1455310233, } } From b4eb08bb03f69c587f8440912cf56aadc9e52879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 00:11:39 +0700 Subject: [PATCH 30/47] [bilibili] Add support for new URL schema with BV ids (closes #24439, closes #24442) --- youtube_dl/extractor/bilibili.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 80bd696e2..e9d0a8d0c 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -24,7 +24,18 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|bangumi)\.)? + bilibili\.(?:tv|com)/ + (?: + (?: + video/[aA][vV]| + anime/(?P\d+)/play\# + )(?P\d+)| + video/[bB][vV](?P[^/?#&]+) + ) + ''' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -92,6 +103,10 @@ class BiliBiliIE(InfoExtractor): 'skip_download': True, # Test metadata only }, }] + }, { + # new BV video id format + 'url': 'https://www.bilibili.com/video/BV1JE411F741', + 'only_matching': True, }] _APP_KEY = 'iVGUTjsxvpLeuDCf' @@ -109,7 +124,7 @@ class BiliBiliIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = mobj.group('id') or mobj.group('id_bv') anime_id = mobj.group('anime_id') webpage = self._download_webpage(url, video_id) From 63dce3094bf45964b49a2c9f26c94b10cf60c2c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 00:24:39 +0700 Subject: [PATCH 31/47] [bilibili] Add support for player.bilibili.com (closes #24402) --- youtube_dl/extractor/bilibili.py | 14 ++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index e9d0a8d0c..4dc597e16 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -434,3 +434,17 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): entries, am_id, album_title, album_data.get('intro')) return self.playlist_result(entries, am_id) + + +class BiliBiliPlayerIE(InfoExtractor): + _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P\d+)' + _TEST = { + 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1', + 'only_matching': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + 'http://www.bilibili.tv/video/av%s/' % video_id, + ie=BiliBiliIE.ie_key(), video_id=video_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..ef803b8a7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -105,6 +105,7 @@ from .bilibili import ( BiliBiliBangumiIE, BilibiliAudioIE, BilibiliAudioAlbumIE, + BiliBiliPlayerIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( From 4560adc820a5d4bda5babc62f0f7fc306b13ad86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 02:43:24 +0700 Subject: [PATCH 32/47] [teachable] Extract chapter metadata (closes #24421) --- youtube_dl/extractor/teachable.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 4316a6962..290c65754 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -7,7 +7,9 @@ from .wistia import WistiaIE from ..utils import ( clean_html, ExtractorError, + int_or_none, get_element_by_class, + strip_or_none, urlencode_postdata, urljoin, ) @@ -173,11 +175,34 @@ class TeachableIE(TeachableBaseIE): title = self._og_search_title(webpage, default=None) + chapter = None + chapter_number = None + section_item = self._search_regex( + r'(?s)(?P
  • ]+\bdata-lecture-id=["\']%s[^>]+>.+?
  • )' % video_id, + webpage, 'section item', default=None, group='li') + if section_item: + chapter_number = int_or_none(self._search_regex( + r'data-ss-position=["\'](\d+)', section_item, 'section id', + default=None)) + if chapter_number is not None: + sections = [] + for s in re.findall( + r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)', webpage): + section = strip_or_none(clean_html(s)) + if not section: + sections = [] + break + sections.append(section) + if chapter_number <= len(sections): + chapter = sections[chapter_number - 1] + entries = [{ '_type': 'url_transparent', 'url': wistia_url, 'ie_key': WistiaIE.ie_key(), 'title': title, + 'chapter': chapter, + 'chapter_number': chapter_number, } for wistia_url in wistia_urls] return self.playlist_result(entries, video_id, title) From be7dacf9cfc3603ba6e4f818a8988a527f06d6d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 02:46:37 +0700 Subject: [PATCH 33/47] [generic] Look for teachable embeds before wistia --- youtube_dl/extractor/generic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d1ec56be9..a495ee15a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2536,6 +2536,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) + # Look for Teachable embeds, must be before Wistia + teachable_url = TeachableIE._extract_url(webpage, url) + if teachable_url: + return self.url_result(teachable_url) + # Look for embedded Wistia player wistia_urls = WistiaIE._extract_urls(webpage) if wistia_urls: @@ -3141,10 +3146,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key()) - teachable_url = TeachableIE._extract_url(webpage, url) - if teachable_url: - return self.url_result(teachable_url) - indavideo_urls = IndavideoEmbedIE._extract_urls(webpage) if indavideo_urls: return self.playlist_from_matches( From 08a27407c45745239de819f059a86559e7a75087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 02:46:55 +0700 Subject: [PATCH 34/47] [teachable] Update upskillcourses domain New version does not use teachable platform any longer --- youtube_dl/extractor/teachable.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 290c65754..4de67b75e 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -21,7 +21,7 @@ class TeachableBaseIE(InfoExtractor): _SITES = { # Only notable ones here - 'upskillcourses.com': 'upskill', + 'v1.upskillcourses.com': 'upskill', 'academy.gns3.com': 'gns3', 'academyhacker.com': 'academyhacker', 'stackskills.com': 'stackskills', @@ -111,7 +111,7 @@ class TeachableIE(TeachableBaseIE): ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [{ - 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', + 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', 'info_dict': { 'id': 'uzw6zw58or', 'ext': 'mp4', @@ -125,13 +125,13 @@ class TeachableIE(TeachableBaseIE): 'skip_download': True, }, }, { - 'url': 'http://upskillcourses.com/courses/119763/lectures/1747100', + 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', 'only_matching': True, }, { 'url': 'https://academy.gns3.com/courses/423415/lectures/6885939', 'only_matching': True, }, { - 'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', + 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', 'only_matching': True, }] @@ -217,17 +217,17 @@ class TeachableCourseIE(TeachableBaseIE): /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+) ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [{ - 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/', + 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', 'info_dict': { 'id': 'essential-web-developer-course', 'title': 'The Essential Web Developer Course (Free)', }, 'playlist_count': 192, }, { - 'url': 'http://upskillcourses.com/courses/119763/', + 'url': 'http://v1.upskillcourses.com/courses/119763/', 'only_matching': True, }, { - 'url': 'http://upskillcourses.com/courses/enrolled/119763', + 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', 'only_matching': True, }, { 'url': 'https://academy.gns3.com/courses/enrolled/423415', From 38fa761a4549dc2c3b155306a8a9441944bdcf01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 02:55:17 +0700 Subject: [PATCH 35/47] [teachable] Update gns3 domain --- youtube_dl/extractor/teachable.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 4de67b75e..2d9d354e8 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -22,7 +22,7 @@ class TeachableBaseIE(InfoExtractor): _SITES = { # Only notable ones here 'v1.upskillcourses.com': 'upskill', - 'academy.gns3.com': 'gns3', + 'gns3.teachable.com': 'gns3', 'academyhacker.com': 'academyhacker', 'stackskills.com': 'stackskills', 'market.saleshacker.com': 'saleshacker', @@ -128,7 +128,7 @@ class TeachableIE(TeachableBaseIE): 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', 'only_matching': True, }, { - 'url': 'https://academy.gns3.com/courses/423415/lectures/6885939', + 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', 'only_matching': True, }, { 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', @@ -230,7 +230,7 @@ class TeachableCourseIE(TeachableBaseIE): 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', 'only_matching': True, }, { - 'url': 'https://academy.gns3.com/courses/enrolled/423415', + 'url': 'https://gns3.teachable.com/courses/enrolled/423415', 'only_matching': True, }, { 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', From 6e47200b6ecaeafc65a8f5a19cd12d6e91ad186e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 02:55:52 +0700 Subject: [PATCH 36/47] [teachable] Update test --- youtube_dl/extractor/teachable.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 2d9d354e8..a75369dbe 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -111,15 +111,17 @@ class TeachableIE(TeachableBaseIE): ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [{ - 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', + 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', 'info_dict': { - 'id': 'uzw6zw58or', - 'ext': 'mp4', - 'title': 'Welcome to the Course!', - 'description': 'md5:65edb0affa582974de4625b9cdea1107', - 'duration': 138.763, - 'timestamp': 1479846621, - 'upload_date': '20161122', + 'id': 'untlgzk1v7', + 'ext': 'bin', + 'title': 'Overview', + 'description': 'md5:071463ff08b86c208811130ea1c2464c', + 'duration': 736.4, + 'timestamp': 1542315762, + 'upload_date': '20181115', + 'chapter': 'Welcome', + 'chapter_number': 1, }, 'params': { 'skip_download': True, From b439634f0e9a1f251d117303dc60f02fd0ab11ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 03:07:34 +0700 Subject: [PATCH 37/47] [ChangeLog] Actualize [ci skip] --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index 84b43c642..c53cde141 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version + +Extractors +* [teachable] Update upskillcourses and gns3 domains +* [generic] Look for teachable embeds before wistia ++ [teachable] Extract chapter metadata (#24421) ++ [bilibili] Add support for player.bilibili.com (#24402) ++ [bilibili] Add support for new URL schema with BV ids (#24439, #24442) +* [limelight] Remove disabled API requests (#24255) +* [soundcloud] Fix download URL extraction (#24394) ++ [cbc:watch] Add support for authentication (#19160) +* [hellporno] Fix extraction (#24399) +* [xtube] Fix formats extraction (#24348) +* [ndr] Fix extraction (#24326) +* [nhk] Update m3u8 URL and use native HLS downloader (#24329) +- [nhk] Remove obsolete rtmp formats (#24329) +* [nhk] Relax URL regular expression (#24329) + + version 2020.03.08 Core From 30b5121a1c63c3f84251e9add3c9bf9e3c490228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 03:12:15 +0700 Subject: [PATCH 38/47] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c53cde141..fc1e28020 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ version +Core +- [utils] Revert support for cookie files with spaces used instead of tabs + Extractors * [teachable] Update upskillcourses and gns3 domains * [generic] Look for teachable embeds before wistia @@ -15,12 +18,13 @@ Extractors * [nhk] Update m3u8 URL and use native HLS downloader (#24329) - [nhk] Remove obsolete rtmp formats (#24329) * [nhk] Relax URL regular expression (#24329) +- [vimeo] Revert fix showcase password protected video extraction (#24224) version 2020.03.08 Core -+ [utils] Add support for cookie files with spaces ++ [utils] Add support for cookie files with spaces used instead of tabs Extractors + [pornhub] Add support for pornhubpremium.com (#24288) From 049c0486bbd57a9bb5fb5a6a5eeff82fd4ac03ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Mar 2020 03:14:30 +0700 Subject: [PATCH 39/47] release 2020.03.24 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d82ff9111..40a869113 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.03.08** +- [ ] I've verified that I'm running youtube-dl version **2020.03.24** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.08 + [debug] youtube-dl version 2020.03.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 04b350f76..7b10df3d4 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.03.08** +- [ ] I've verified that I'm running youtube-dl version **2020.03.24** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 6f17ad7bc..04bbcfa68 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.08** +- [ ] I've verified that I'm running youtube-dl version **2020.03.24** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index efb179ea5..a9e231817 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.03.08** +- [ ] I've verified that I'm running youtube-dl version **2020.03.24** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.08 + [debug] youtube-dl version 2020.03.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index cf4874bcc..4a3d32d51 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.08** +- [ ] I've verified that I'm running youtube-dl version **2020.03.24** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index fc1e28020..f753972c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.03.24 Core - [utils] Revert support for cookie files with spaces used instead of tabs diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 02bc088ab..174b83bf3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,7 @@ - **BiliBili** - **BilibiliAudio** - **BilibiliAudioAlbum** + - **BiliBiliPlayer** - **BioBioChileTV** - **BIQLE** - **BitChute** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0f768f7c1..5aedd3268 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.03.08' +__version__ = '2020.03.24' From d44a707fdde6c0138e9e275ed5b4ffb0b8f72966 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Apr 2020 20:34:57 +0700 Subject: [PATCH 40/47] [spankwire] Fix extraction (closes #18924, closes #20648) --- youtube_dl/extractor/spankwire.py | 201 +++++++++++++++++++----------- 1 file changed, 125 insertions(+), 76 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 44d8fa52f..8f67463ed 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -3,34 +3,47 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) from ..utils import ( - sanitized_Request, + float_or_none, + int_or_none, + merge_dicts, + str_or_none, str_to_int, - unified_strdate, + url_or_none, ) -from ..aes import aes_decrypt_text class SpankwireIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pspankwire\.com/[^/]*/video(?P[0-9]+)/?)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?spankwire\.com/ + (?: + [^/]+/video| + EmbedPlayer\.aspx/?\?.*?\bArticleId= + ) + (?P\d+) + ''' _TESTS = [{ # download URL pattern: */P_K_.mp4 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', - 'md5': '8bbfde12b101204b39e4b9fe7eb67095', + 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd', 'info_dict': { 'id': '103545', 'ext': 'mp4', 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', 'description': 'Crazy Bitch X rated music video.', + 'duration': 222, 'uploader': 'oreusz', 'uploader_id': '124697', - 'upload_date': '20070507', + 'timestamp': 1178587885, + 'upload_date': '20070508', + 'average_rating': float, + 'view_count': int, + 'comment_count': int, 'age_limit': 18, - } + 'categories': list, + 'tags': list, + }, }, { # download URL pattern: */mp4__.mp4 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', @@ -45,83 +58,119 @@ class SpankwireIE(InfoExtractor): 'upload_date': '20150822', 'age_limit': 18, }, + 'params': { + 'proxy': '127.0.0.1:8118' + }, + 'skip': 'removed', + }, { + 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - req = sanitized_Request('http://www.' + mobj.group('url')) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + video = self._download_json( + 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id) - title = self._html_search_regex( - r'

    ([^<]+)', webpage, 'title') - description = self._html_search_regex( - r'(?s)(.+?)', - webpage, 'description', fatal=False) - thumbnail = self._html_search_regex( - r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']', - webpage, 'thumbnail', fatal=False) - - uploader = self._html_search_regex( - r'by:\s*]*>(.+?)', - webpage, 'uploader', fatal=False) - uploader_id = self._html_search_regex( - r'by:\s* on (.+?) at \d+:\d+', - webpage, 'upload date', fatal=False)) - - view_count = str_to_int(self._html_search_regex( - r'
    ([\d,\.]+) views
    ', - webpage, 'view count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r']*>([\d,\.]+)', - webpage, 'comment count', fatal=False)) - - videos = re.findall( - r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage) - heights = [int(video[0]) for video in videos] - video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos])) - if webpage.find(r'flashvars\.encrypted = "true"') != -1: - password = self._search_regex( - r'flashvars\.video_title = "([^"]+)', - webpage, 'password').replace('+', ' ') - video_urls = list(map( - lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), - video_urls)) + title = video['title'] formats = [] - for height, video_url in zip(heights, video_urls): - path = compat_urllib_parse_urlparse(video_url).path - m = re.search(r'/(?P\d+)[pP]_(?P\d+)[kK]', path) - if m: - tbr = int(m.group('tbr')) - height = int(m.group('height')) - else: - tbr = None - formats.append({ - 'url': video_url, - 'format_id': '%dp' % height, - 'height': height, - 'tbr': tbr, + videos = video.get('videos') + if isinstance(videos, dict): + for format_id, format_url in videos.items(): + video_url = url_or_none(format_url) + if not format_url: + continue + height = int_or_none(self._search_regex( + r'(\d+)[pP]', format_id, 'height', default=None)) + m = re.search( + r'/(?P\d+)[pP]_(?P\d+)[kK]', video_url) + if m: + tbr = int(m.group('tbr')) + height = height or int(m.group('height')) + else: + tbr = None + formats.append({ + 'url': video_url, + 'format_id': '%dp' % height if height else format_id, + 'height': height, + 'tbr': tbr, + }) + m3u8_url = url_or_none(video.get('HLS')) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id')) + + view_count = str_to_int(video.get('viewed')) + + thumbnails = [] + for preference, t in enumerate(('', '2x'), start=0): + thumbnail_url = url_or_none(video.get('poster%s' % t)) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'preference': preference, }) - self._sort_formats(formats) - age_limit = self._rta_search(webpage) + def extract_names(key): + entries_list = video.get(key) + if not isinstance(entries_list, list): + return + entries = [] + for entry in entries_list: + name = str_or_none(entry.get('name')) + if name: + entries.append(name) + return entries - return { + categories = extract_names('categories') + tags = extract_names('tags') + + uploader = None + info = {} + + webpage = self._download_webpage( + 'https://www.spankwire.com/_/video%s/' % video_id, video_id, + fatal=False) + if webpage: + info = self._search_json_ld(webpage, video_id, default={}) + thumbnail_url = None + if 'thumbnail' in info: + thumbnail_url = url_or_none(info['thumbnail']) + del info['thumbnail'] + if not thumbnail_url: + thumbnail_url = self._og_search_thumbnail(webpage) + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'preference': 10, + }) + uploader = self._html_search_regex( + r'(?s)by\s*]+\bclass=["\']uploaded__by[^>]*>(.+?)
    ', + webpage, 'uploader', fatal=False) + if not view_count: + view_count = str_to_int(self._search_regex( + r'data-views=["\']([\d,.]+)', webpage, 'view count', + fatal=False)) + + return merge_dicts({ 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'thumbnails': thumbnails, 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, + 'uploader_id': str_or_none(video.get('userId')), + 'timestamp': int_or_none(video.get('time_approved_on')), + 'average_rating': float_or_none(video.get('rating')), 'view_count': view_count, - 'comment_count': comment_count, + 'comment_count': int_or_none(video.get('comments')), + 'age_limit': 18, + 'categories': categories, + 'tags': tags, 'formats': formats, - 'age_limit': age_limit, - } + }, info) From 8fae1a04eb20279a76d6b1eccdb8249718ad9942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Apr 2020 20:42:10 +0700 Subject: [PATCH 41/47] [spankwire] Add support for generic embeds (refs #24633) --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/spankwire.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a495ee15a..63b52306a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE from .tube8 import Tube8IE +from .spankwire import SpankwireIE from .vimeo import VimeoIE from .dailymotion import DailymotionIE from .dailymail import DailyMailIE @@ -2715,6 +2716,11 @@ class GenericIE(InfoExtractor): if tube8_urls: return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key()) + # Look for embedded Spankwire player + spankwire_urls = SpankwireIE._extract_urls(webpage) + if spankwire_urls: + return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 8f67463ed..35ab9ec37 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -67,6 +67,12 @@ class SpankwireIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) From 52c4c51556df15f98c9cda911e36995fe0fc0a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Apr 2020 20:56:14 +0700 Subject: [PATCH 42/47] [youporn] Add support form generic embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/youporn.py | 23 +++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 63b52306a..0ada6354e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -61,6 +61,7 @@ from .drtuber import DrTuberIE from .redtube import RedTubeIE from .tube8 import Tube8IE from .spankwire import SpankwireIE +from .youporn import YouPornIE from .vimeo import VimeoIE from .dailymotion import DailymotionIE from .dailymail import DailyMailIE @@ -2721,6 +2722,11 @@ class GenericIE(InfoExtractor): if spankwire_urls: return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key()) + # Look for embedded YouPorn player + youporn_urls = YouPornIE._extract_urls(webpage) + if youporn_urls: + return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d4eccb4b2..e7fca22de 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, - sanitized_Request, str_to_int, unescapeHTML, unified_strdate, @@ -15,7 +14,7 @@ from ..aes import aes_decrypt_text class YouPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P\d+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'md5': '3744d24c50438cf5b6f6d59feb5055c2', @@ -57,16 +56,28 @@ class YouPornIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/', + 'only_matching': True, + }, { + 'url': 'http://www.youporn.com/watch/505835', + 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)', + webpage) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or video_id - request = sanitized_Request(url) - request.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(request, display_id) + webpage = self._download_webpage( + 'http://www.youporn.com/watch/%s' % video_id, display_id, + headers={'Cookie': 'age_verified=1'}) title = self._html_search_regex( r'(?s)]+class=["\']watchVideoTitle[^>]+>(.+?)', From 4e7b5bba5fb73502476c61e4931284c9c3d3d232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Apr 2020 21:27:36 +0700 Subject: [PATCH 43/47] [mofosex] Add support for generic embeds (closes #24633) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/mofosex.py | 23 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef803b8a7..e407ab3d9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -636,7 +636,10 @@ from .mixcloud import ( from .mlb import MLBIE from .mnet import MnetIE from .moevideo import MoeVideoIE -from .mofosex import MofosexIE +from .mofosex import ( + MofosexIE, + MofosexEmbedIE, +) from .mojvideo import MojvideoIE from .morningstar import MorningstarIE from .motherless import ( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0ada6354e..ce8252f6a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE from .tube8 import Tube8IE +from .mofosex import MofosexEmbedIE from .spankwire import SpankwireIE from .youporn import YouPornIE from .vimeo import VimeoIE @@ -2717,6 +2718,11 @@ class GenericIE(InfoExtractor): if tube8_urls: return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key()) + # Look for embedded Mofosex player + mofosex_urls = MofosexEmbedIE._extract_urls(webpage) + if mofosex_urls: + return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key()) + # Look for embedded Spankwire player spankwire_urls = SpankwireIE._extract_urls(webpage) if spankwire_urls: diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index 1c652813a..5234cac02 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -1,5 +1,8 @@ from __future__ import unicode_literals +import re + +from .common import InfoExtractor from ..utils import ( int_or_none, str_to_int, @@ -54,3 +57,23 @@ class MofosexIE(KeezMoviesIE): }) return info + + +class MofosexEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P\d+)' + _TESTS = [{ + 'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)', + webpage) + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + 'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id), + ie=MofosexIE.ie_key(), video_id=video_id) From 6a6e1a0cd8bacf5a23f731eedaa1783503470227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Apr 2020 02:05:06 +0700 Subject: [PATCH 44/47] [tele5] Fix extraction (closes #24553) --- youtube_dl/extractor/tele5.py | 61 ++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index 33a72083b..364556a1f 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -1,9 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from .jwplatform import JWPlatformIE from .nexx import NexxIE -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + NO_DEFAULT, + try_get, +) class Tele5IE(InfoExtractor): @@ -44,14 +54,49 @@ class Tele5IE(InfoExtractor): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] - if not video_id: + NEXX_ID_RE = r'\d{6,}' + JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}' + + def nexx_result(nexx_id): + return self.url_result( + 'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id, + ie=NexxIE.ie_key(), video_id=nexx_id) + + nexx_id = jwplatform_id = None + + if video_id: + if re.match(NEXX_ID_RE, video_id): + return nexx_result(video_id) + elif re.match(JWPLATFORM_ID_RE, video_id): + jwplatform_id = video_id + + if not nexx_id: display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._html_search_regex( - (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)', - r'\s+id\s*=\s*["\']player_(\d{6,})', - r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id') + + def extract_id(pattern, name, default=NO_DEFAULT): + return self._html_search_regex( + (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern, + r'\s+id\s*=\s*["\']player_(%s)' % pattern, + r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name, + default=default) + + nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None) + if nexx_id: + return nexx_result(nexx_id) + + if not jwplatform_id: + jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id') + + media = self._download_json( + 'https://cdn.jwplayer.com/v2/media/' + jwplatform_id, + display_id) + nexx_id = try_get( + media, lambda x: x['playlist'][0]['nexx_id'], compat_str) + + if nexx_id: + return nexx_result(nexx_id) return self.url_result( - 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, - ie=NexxIE.ie_key(), video_id=video_id) + 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), + video_id=jwplatform_id) From 13b08034b53efdcf7055df92199a0f35cf1e172e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Apr 2020 22:54:34 +0700 Subject: [PATCH 45/47] [extractor/common] Skip malformed ISM manifest XMLs while extracting ISM formats (#24667) --- youtube_dl/extractor/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index eaae5e484..c51a3a07d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2340,6 +2340,8 @@ class InfoExtractor(object): if res is False: return [] ism_doc, urlh = res + if ism_doc is None: + return [] return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) From 91bd3bd0194119fccc91b7eafb7afdcda646ad57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Apr 2020 22:55:36 +0700 Subject: [PATCH 46/47] [tv4] Fix ISM formats extraction (closes #24667) --- youtube_dl/extractor/tv4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index a819d048c..c498b0191 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -99,7 +99,7 @@ class TV4IE(InfoExtractor): manifest_url.replace('.m3u8', '.f4m'), video_id, f4m_id='hds', fatal=False)) formats.extend(self._extract_ism_formats( - re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url), + re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url), video_id, ism_id='mss', fatal=False)) if not formats and info.get('is_geo_restricted'): From c9595ee78027ecf6bedbdc33c690228fa7d3a5bb Mon Sep 17 00:00:00 2001 From: Felix Stupp Date: Tue, 7 Apr 2020 16:21:25 +0000 Subject: [PATCH 47/47] [twitch:clips] Extend _VALID_URL (closes #24290) (#24642) --- youtube_dl/extractor/twitch.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 0db2dca41..78ee0115c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -643,7 +643,14 @@ class TwitchStreamIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE): IE_NAME = 'twitch:clips' - _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P[^/?#&]+)' + _VALID_URL = r'''(?x) + https?:// + (?: + clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)| + (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/ + ) + (?P[^/?#&]+) + ''' _TESTS = [{ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', @@ -669,6 +676,12 @@ class TwitchClipsIE(TwitchBaseIE): }, { 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited', 'only_matching': True, + }, { + 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank', + 'only_matching': True, + }, { + 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank', + 'only_matching': True, }] def _real_extract(self, url):