From f749ac67abe624afa74fda7fd56129af5b1ecc01 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis Date: Wed, 20 Feb 2019 10:42:59 +0100 Subject: [PATCH 001/123] first implementation of intl.dropout.tv --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/intldropout.py | 101 ++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/intldropout.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de38c6641..20b660d5c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -308,6 +308,7 @@ from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE +from .intldropout import IntlDropoutIE from .dw import ( DWIE, DWArticleIE, diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py new file mode 100644 index 000000000..21f598116 --- /dev/null +++ b/youtube_dl/extractor/intldropout.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from .vimeo import VHXEmbedIE + +from ..utils import ( + ExtractorError, + sanitized_Request, + urlencode_postdata +) + +import re + +# https://intl.dropout.tv/login +# GET +# authenticity_token + +# https://intl.dropout.tv/login +# POST +# authenticity_token +# email +# password +# utf8 ✓ + + +# https://embed.vhx.tv/videos/414462?api=1&autoplay=1&referrer=https%3A%2F%2Fintl.dropout.tv%2Fbrowse&playsinline=1&title=0&context=https%3A%2F%2Fintl.dropout.tv%2Fbrowse&back=Browse&color=feea3b&sharing=1&auth-user-token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjo1Mzk0NDEwLCJleHAiOjE1NDc0NzA1NDB9._y4H94pKyIOu_GT11qC2SeJnSou6EzN9jI1A-P3tbo8&live=0 +# https://vhx-adaptive-hap.akamaized.net/-ctx--user_id,5394410--platform_id,27--video_id,414462--channel_id,55407--plan,standard-/vods3cf/0/amlst:c-55407/v-414462/2220471,2220472,2220473,2220474,2220475,2220476/playlist.m3u8?token=exp=1547481565~acl=/-ctx--user_id,5394410--platform_id,27--video_id,414462--channel_id,55407--plan,standard-/vods3cf/0/amlst:c-55407/v-414462/2220471,2220472,2220473,2220474,2220475,2220476/*~hmac=ceb8508146d2dec2b868db9ca304ec13d54502cca0a7d1cd0def7a85a9ef3962& +# https://api.vhx.tv/videos/414462/files?auth_user_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjo1Mzk0NDEwLCJleHAiOjE1NDc0NzA1NDB9._y4H94pKyIOu_GT11qC2SeJnSou6EzN9jI1A-P3tbo8&_=1547463565300 + + +class IntlDropoutIE(VHXEmbedIE): + _LOGIN_URL = 'https://intl.dropout.tv/login' + _LOGOUT_URL = 'https://intl.dropout.tv/logout' + _VALID_URL = r'https://intl\.dropout\.tv/(?P[^/]+/.+)' + _TEST = { + 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_initialize(self): + self._login() + + def _login(self): + email, password = self._get_login_info() + if email is None or password is None: + if self._downloader.params.get('cookiefile') is None: + raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) + return True + + login_page = self._download_webpage( + self._LOGIN_URL, None, + note='Downloading login page', + errnote='unable to fetch login page', fatal=False + ) + + if login_page is False: + return + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'passwordless': 0, + 'email': email, + 'password': password + }) + + request = sanitized_Request( + self._LOGIN_URL, urlencode_postdata(login_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + try: + self._download_webpage(request, None, 'Logging in') + except Exception: + print('error') + + def _real_extract(self, url): + webpage = self._download_webpage(url, None) + embed = self._html_search_regex(r']+"(?Phttps://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') + + print(embed) + + self._download_webpage( + self._LOGOUT_URL, None, + note='logging out', + errnote='unable to logout', fatal=False + ) + + return self.url_result(embed) + From 89ed0c9eedc3cb6c1316f8e0c697dccfd47df408 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis Date: Wed, 20 Feb 2019 18:58:00 +0100 Subject: [PATCH 002/123] finished dropout extractor --- youtube_dl/extractor/intldropout.py | 37 +++++++++++++---------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 21f598116..5f5696428 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -1,14 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor - from .vimeo import VHXEmbedIE from ..utils import ( ExtractorError, sanitized_Request, - urlencode_postdata + urlencode_postdata, + RegexNotFoundError ) import re @@ -31,9 +30,10 @@ import re class IntlDropoutIE(VHXEmbedIE): + IE_DESC = 'International Dropout.tv' _LOGIN_URL = 'https://intl.dropout.tv/login' _LOGOUT_URL = 'https://intl.dropout.tv/logout' - _VALID_URL = r'https://intl\.dropout\.tv/(?P[^/]+/.+)' + _VALID_URL = r'https://intl\.dropout\.tv/(?P.+)' _TEST = { 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -59,16 +59,16 @@ class IntlDropoutIE(VHXEmbedIE): if self._downloader.params.get('cookiefile') is None: raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return True - + login_page = self._download_webpage( self._LOGIN_URL, None, note='Downloading login page', errnote='unable to fetch login page', fatal=False ) - + if login_page is False: return - + login_form = self._hidden_inputs(login_page) login_form.update({ @@ -76,7 +76,7 @@ class IntlDropoutIE(VHXEmbedIE): 'email': email, 'password': password }) - + request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') @@ -84,18 +84,15 @@ class IntlDropoutIE(VHXEmbedIE): self._download_webpage(request, None, 'Logging in') except Exception: print('error') - + def _real_extract(self, url): webpage = self._download_webpage(url, None) - embed = self._html_search_regex(r']+"(?Phttps://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') - - print(embed) - - self._download_webpage( - self._LOGOUT_URL, None, - note='logging out', - errnote='unable to logout', fatal=False - ) - - return self.url_result(embed) + try: + video = self._html_search_regex(r']+"(?Phttps://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') + except RegexNotFoundError: + items = re.findall(r'.+)', url, 'id') + playlist_title = self._html_search_regex(r'

]*>(?P[^<]+)<', webpage, 'title') + return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) + return self.url_result(video) From 7781e2710dbb16dfdb1a7cf14981ce07142faa68 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Wed, 20 Feb 2019 18:58:00 +0100 Subject: [PATCH 003/123] finished dropout extractor --- youtube_dl/extractor/intldropout.py | 37 +++++++++++++---------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 21f598116..5f5696428 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -1,14 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor - from .vimeo import VHXEmbedIE from ..utils import ( ExtractorError, sanitized_Request, - urlencode_postdata + urlencode_postdata, + RegexNotFoundError ) import re @@ -31,9 +30,10 @@ import re class IntlDropoutIE(VHXEmbedIE): + IE_DESC = 'International Dropout.tv' _LOGIN_URL = 'https://intl.dropout.tv/login' _LOGOUT_URL = 'https://intl.dropout.tv/logout' - _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>[^/]+/.+)' + _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' _TEST = { 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -59,16 +59,16 @@ class IntlDropoutIE(VHXEmbedIE): if self._downloader.params.get('cookiefile') is None: raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return True - + login_page = self._download_webpage( self._LOGIN_URL, None, note='Downloading login page', errnote='unable to fetch login page', fatal=False ) - + if login_page is False: return - + login_form = self._hidden_inputs(login_page) login_form.update({ @@ -76,7 +76,7 @@ class IntlDropoutIE(VHXEmbedIE): 'email': email, 'password': password }) - + request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') @@ -84,18 +84,15 @@ class IntlDropoutIE(VHXEmbedIE): self._download_webpage(request, None, 'Logging in') except Exception: print('error') - + def _real_extract(self, url): webpage = self._download_webpage(url, None) - embed = self._html_search_regex(r'<iframe[^>]+"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') - - print(embed) - - self._download_webpage( - self._LOGOUT_URL, None, - note='logging out', - errnote='unable to logout', fatal=False - ) - - return self.url_result(embed) + try: + video = self._html_search_regex(r'<iframe[^>]+"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') + except RegexNotFoundError: + items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/videos/[^"]+)"', webpage) + playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') + playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') + return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) + return self.url_result(video) From 34e46f3bca07427ef3e1f0e274cd99d871b8e29c Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Thu, 21 Feb 2019 17:19:05 +0100 Subject: [PATCH 004/123] fixed login --- youtube_dl/extractor/intldropout.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 5f5696428..eea5f1095 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -31,6 +31,7 @@ import re class IntlDropoutIE(VHXEmbedIE): IE_DESC = 'International Dropout.tv' + _NETRC_MACHINE = 'intl.dropout.tv' _LOGIN_URL = 'https://intl.dropout.tv/login' _LOGOUT_URL = 'https://intl.dropout.tv/logout' _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' From f278dad081c8c68fc09d39039a943aac5e69ac69 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Thu, 21 Feb 2019 18:22:42 +0100 Subject: [PATCH 005/123] added test data --- youtube_dl/extractor/intldropout.py | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index eea5f1095..1628646a2 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -37,17 +37,15 @@ class IntlDropoutIE(VHXEmbedIE): _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' _TEST = { 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'md5': 'e6cbf01c24ad9fb8281c23357416ec97', 'info_dict': { - 'id': '42', + 'id': '397785', 'ext': 'mp4', - 'title': 'Video title goes here', + 'title': "C-3PO's Origins, HP Lovecraft, the Food Album (with Weird Al Yankovic)", 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'description': 'Caldwell Tanner, Siobhan Thompson, and Nate Dern inspect guns and review the Diagon Alley bar scene.', + 'upload_date': '20181206', + 'timestamp': 1544117975, } } @@ -64,7 +62,8 @@ class IntlDropoutIE(VHXEmbedIE): login_page = self._download_webpage( self._LOGIN_URL, None, note='Downloading login page', - errnote='unable to fetch login page', fatal=False + errnote='unable to fetch login page', fatal=False, + expected_status=200 ) if login_page is False: @@ -82,18 +81,27 @@ class IntlDropoutIE(VHXEmbedIE): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') try: - self._download_webpage(request, None, 'Logging in') + self._download_webpage(request, None, 'Logging in', expected_status=302) except Exception: - print('error') + raise ExtractorError( + 'Unable to login', + expected=True) def _real_extract(self, url): - webpage = self._download_webpage(url, None) try: - video = self._html_search_regex(r'<iframe[^>]+"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') + webpage = self._download_webpage(url, None, expected_status=200) + except Exception: + raise ExtractorError( + 'Unable to fetch page', + expected=True) + try: + video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') except RegexNotFoundError: items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/videos/[^"]+)"', webpage) playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) - return self.url_result(video) + video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') + video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>(<strong>)?(?P<title>[^<]+)<', webpage, 'title') + return self.url_result(video, video_id=video_id, video_title=video_title) From 845704f1a8b1b4d82bb7f3f9533644be4a31fa0b Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Thu, 21 Feb 2019 18:24:23 +0100 Subject: [PATCH 006/123] fixed IE_NAME --- youtube_dl/extractor/intldropout.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 1628646a2..fd808e8dc 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -30,6 +30,7 @@ import re class IntlDropoutIE(VHXEmbedIE): + IE_NAME = 'intldropout' IE_DESC = 'International Dropout.tv' _NETRC_MACHINE = 'intl.dropout.tv' _LOGIN_URL = 'https://intl.dropout.tv/login' From 0e0e8abf10d89eadfa1b12bee8b59b5f70a94aa3 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Fri, 22 Feb 2019 18:15:02 +0100 Subject: [PATCH 007/123] [intldropout] added tests --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/intldropout.py | 88 +++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 20b660d5c..e38a33714 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -308,7 +308,10 @@ from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE -from .intldropout import IntlDropoutIE +from .intldropout import ( + IntlDropoutIE, + IntlDropoutPlaylistIE, +) from .dw import ( DWIE, DWArticleIE, diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index fd808e8dc..3fedaef7b 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -7,7 +7,6 @@ from ..utils import ( ExtractorError, sanitized_Request, urlencode_postdata, - RegexNotFoundError ) import re @@ -35,20 +34,35 @@ class IntlDropoutIE(VHXEmbedIE): _NETRC_MACHINE = 'intl.dropout.tv' _LOGIN_URL = 'https://intl.dropout.tv/login' _LOGOUT_URL = 'https://intl.dropout.tv/logout' - _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' - _TEST = { - 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', - 'md5': 'e6cbf01c24ad9fb8281c23357416ec97', - 'info_dict': { - 'id': '397785', - 'ext': 'mp4', - 'title': "C-3PO's Origins, HP Lovecraft, the Food Album (with Weird Al Yankovic)", - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Caldwell Tanner, Siobhan Thompson, and Nate Dern inspect guns and review the Diagon Alley bar scene.', - 'upload_date': '20181206', - 'timestamp': 1544117975, + _VALID_URL = r'https://intl\.dropout\.tv/([^/]+/season:[^/]+/)?videos/(?P<id>.+)' + _TESTS = [ + { + 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', + 'md5': '8beaac579b6ba762f63cd452fd28dcce', + 'info_dict': { + 'id': '397785', + 'ext': 'mp4', + 'title': "C-3PO's Origins, HP Lovecraft, the Food Album (with Weird Al Yankovic)", + 'thumbnail': r're:^https://vhx.imgix.net/.*\.jpg$', + 'description': 'Caldwell Tanner, Siobhan Thompson, and Nate Dern inspect guns and review the Diagon Alley bar scene.', + 'upload_date': '20181206', + 'timestamp': 1544117975, + } + }, + { + 'url': 'https://intl.dropout.tv/videos/um-actually-behind-the-scenes', + 'md5': 'b974927cd563423fe50945dbfdbb894c', + 'info_dict': { + 'id': '397943', + 'ext': 'mp4', + 'title': 'Um, Actually: Behind the Scenes', + 'thumbnail': r're:^https://vhx.imgix.net/.*\.jpg$', + 'description': 'What does it take to stump the nerdy? Mike Trapp and team pull back the curtain.', + 'upload_date': '20181206', + 'timestamp': 1544118409, + } } - } + ] def _real_initialize(self): self._login() @@ -95,14 +109,44 @@ class IntlDropoutIE(VHXEmbedIE): raise ExtractorError( 'Unable to fetch page', expected=True) - try: - video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') - except RegexNotFoundError: - items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/videos/[^"]+)"', webpage) - playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') - playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') - return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) - + video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>(<strong>)?(?P<title>[^<]+)<', webpage, 'title') return self.url_result(video, video_id=video_id, video_title=video_title) + + +class IntlDropoutPlaylistIE(IntlDropoutIE): + IE_NAME = 'intldropout:playlist' + _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>[^/]+(/season:[^/]+)?)$' + _TESTS = [ + { + 'url': 'https://intl.dropout.tv/um-actually-the-web-series', + 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', + 'playlist_count': 9, + 'info_dict': { + 'id': 'um-actually-the-web-series', + 'title': 'Um, Actually: The Web Series', + } + }, + { + 'url': 'https://intl.dropout.tv/new-releases', + 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', + 'playlist_count': 21, + 'info_dict': { + 'id': 'new-releases', + 'title': 'New Releases', + } + } + ] + + def _real_extract(self, url): + try: + webpage = self._download_webpage(url, None, expected_status=200) + except Exception: + raise ExtractorError( + 'Unable to fetch page', + expected=True) + items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) + playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') + playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') + return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) From 657d46d95c8b40635b57562a6a4cf12e67de4d28 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Sun, 24 Feb 2019 18:12:08 +0100 Subject: [PATCH 008/123] [intldropout] cleaned things up --- youtube_dl/extractor/intldropout.py | 37 ++++------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 3fedaef7b..e4e673ee5 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -11,22 +11,6 @@ from ..utils import ( import re -# https://intl.dropout.tv/login -# GET -# authenticity_token - -# https://intl.dropout.tv/login -# POST -# authenticity_token -# email -# password -# utf8 ✓ - - -# https://embed.vhx.tv/videos/414462?api=1&autoplay=1&referrer=https%3A%2F%2Fintl.dropout.tv%2Fbrowse&playsinline=1&title=0&context=https%3A%2F%2Fintl.dropout.tv%2Fbrowse&back=Browse&color=feea3b&sharing=1&auth-user-token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjo1Mzk0NDEwLCJleHAiOjE1NDc0NzA1NDB9._y4H94pKyIOu_GT11qC2SeJnSou6EzN9jI1A-P3tbo8&live=0 -# https://vhx-adaptive-hap.akamaized.net/-ctx--user_id,5394410--platform_id,27--video_id,414462--channel_id,55407--plan,standard-/vods3cf/0/amlst:c-55407/v-414462/2220471,2220472,2220473,2220474,2220475,2220476/playlist.m3u8?token=exp=1547481565~acl=/-ctx--user_id,5394410--platform_id,27--video_id,414462--channel_id,55407--plan,standard-/vods3cf/0/amlst:c-55407/v-414462/2220471,2220472,2220473,2220474,2220475,2220476/*~hmac=ceb8508146d2dec2b868db9ca304ec13d54502cca0a7d1cd0def7a85a9ef3962& -# https://api.vhx.tv/videos/414462/files?auth_user_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjo1Mzk0NDEwLCJleHAiOjE1NDc0NzA1NDB9._y4H94pKyIOu_GT11qC2SeJnSou6EzN9jI1A-P3tbo8&_=1547463565300 - class IntlDropoutIE(VHXEmbedIE): IE_NAME = 'intldropout' @@ -77,8 +61,7 @@ class IntlDropoutIE(VHXEmbedIE): login_page = self._download_webpage( self._LOGIN_URL, None, note='Downloading login page', - errnote='unable to fetch login page', fatal=False, - expected_status=200 + errnote='unable to fetch login page', fatal=False ) if login_page is False: @@ -103,12 +86,7 @@ class IntlDropoutIE(VHXEmbedIE): expected=True) def _real_extract(self, url): - try: - webpage = self._download_webpage(url, None, expected_status=200) - except Exception: - raise ExtractorError( - 'Unable to fetch page', - expected=True) + webpage = self._download_webpage(url, None) video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>(<strong>)?(?P<title>[^<]+)<', webpage, 'title') @@ -117,7 +95,7 @@ class IntlDropoutIE(VHXEmbedIE): class IntlDropoutPlaylistIE(IntlDropoutIE): IE_NAME = 'intldropout:playlist' - _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>[^/]+(/season:[^/]+)?)$' + _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>[^/]+(/season:[^/]+)?)' _TESTS = [ { 'url': 'https://intl.dropout.tv/um-actually-the-web-series', @@ -140,13 +118,8 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): ] def _real_extract(self, url): - try: - webpage = self._download_webpage(url, None, expected_status=200) - except Exception: - raise ExtractorError( - 'Unable to fetch page', - expected=True) - items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') + webpage = self._download_webpage(url, playlist_id) + items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) From f6a4af595b016ada86dcf4cb671bfd26eb7f7407 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Sun, 24 Feb 2019 18:38:21 +0100 Subject: [PATCH 009/123] [intldropout] adjusted regexes --- youtube_dl/extractor/intldropout.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index e4e673ee5..1b401b4a0 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -18,7 +18,7 @@ class IntlDropoutIE(VHXEmbedIE): _NETRC_MACHINE = 'intl.dropout.tv' _LOGIN_URL = 'https://intl.dropout.tv/login' _LOGOUT_URL = 'https://intl.dropout.tv/logout' - _VALID_URL = r'https://intl\.dropout\.tv/([^/]+/season:[^/]+/)?videos/(?P<id>.+)' + _VALID_URL = r'https://intl\.dropout\.tv/(?:[^/]+/season:[^/]+/)?videos/(?P<id>.+)' _TESTS = [ { 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', @@ -89,13 +89,13 @@ class IntlDropoutIE(VHXEmbedIE): webpage = self._download_webpage(url, None) video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') - video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>(<strong>)?(?P<title>[^<]+)<', webpage, 'title') + video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title') return self.url_result(video, video_id=video_id, video_title=video_title) class IntlDropoutPlaylistIE(IntlDropoutIE): IE_NAME = 'intldropout:playlist' - _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>[^/]+(/season:[^/]+)?)' + _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>[^/]+(/season:[^/]+)?)$' _TESTS = [ { 'url': 'https://intl.dropout.tv/um-actually-the-web-series', From 4419648461f08c684f52f0e8f6bbb8cd1e28b028 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Sun, 24 Feb 2019 18:58:03 +0100 Subject: [PATCH 010/123] [intldropout] fixed Playlist Matching --- youtube_dl/extractor/intldropout.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 1b401b4a0..fa35dcf3d 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -95,7 +95,7 @@ class IntlDropoutIE(VHXEmbedIE): class IntlDropoutPlaylistIE(IntlDropoutIE): IE_NAME = 'intldropout:playlist' - _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>[^/]+(/season:[^/]+)?)$' + _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>.+)' _TESTS = [ { 'url': 'https://intl.dropout.tv/um-actually-the-web-series', @@ -114,9 +114,22 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): 'id': 'new-releases', 'title': 'New Releases', } + }, + { + 'url': 'https://intl.dropout.tv/troopers/season:2', + 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', + 'playlist_count': 10, + 'info_dict': { + 'id': 'troopers/season:2', + 'title': 'Troopers', + } } ] + @classmethod + def suitable(cls, url): + return False if IntlDropoutIE.suitable(url) else super(IntlDropoutPlaylistIE, cls).suitable(url) + def _real_extract(self, url): playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') webpage = self._download_webpage(url, playlist_id) From 7a99eaf5d68be35eb274c7b91fb703b7762701a6 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Sun, 24 Feb 2019 18:59:28 +0100 Subject: [PATCH 011/123] updated Test --- youtube_dl/extractor/intldropout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index fa35dcf3d..acb20e5c2 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -109,7 +109,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): { 'url': 'https://intl.dropout.tv/new-releases', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 21, + 'playlist_count': 16, 'info_dict': { 'id': 'new-releases', 'title': 'New Releases', From 26115f5e84050488bed017f44014f7c6c799c29e Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Sun, 3 Mar 2019 13:53:11 +0100 Subject: [PATCH 012/123] [intldropout] fixed netrc name --- youtube_dl/extractor/intldropout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index acb20e5c2..4ebdb203c 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -15,7 +15,7 @@ import re class IntlDropoutIE(VHXEmbedIE): IE_NAME = 'intldropout' IE_DESC = 'International Dropout.tv' - _NETRC_MACHINE = 'intl.dropout.tv' + _NETRC_MACHINE = 'intldropouttv' _LOGIN_URL = 'https://intl.dropout.tv/login' _LOGOUT_URL = 'https://intl.dropout.tv/logout' _VALID_URL = r'https://intl\.dropout\.tv/(?:[^/]+/season:[^/]+/)?videos/(?P<id>.+)' From 1855b9d9651e01bb0b3d847332dc1d62996a17d7 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Tue, 5 Mar 2019 11:16:50 +0100 Subject: [PATCH 013/123] [intldropout] fixed playlist extractor --- youtube_dl/extractor/intldropout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 4ebdb203c..98bebf042 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -109,7 +109,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): { 'url': 'https://intl.dropout.tv/new-releases', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 16, + 'playlist_count': 22, 'info_dict': { 'id': 'new-releases', 'title': 'New Releases', @@ -133,6 +133,6 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): def _real_extract(self, url): playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') webpage = self._download_webpage(url, playlist_id) - items = re.findall(r'<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) + items = re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) From a787465d8a647be49d21292833c603e2d83f655a Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Mon, 18 Mar 2019 09:49:59 +0100 Subject: [PATCH 014/123] [intldropout] implemented feedback --- youtube_dl/extractor/intldropout.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 98bebf042..d2dbe4b7d 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -5,7 +5,6 @@ from .vimeo import VHXEmbedIE from ..utils import ( ExtractorError, - sanitized_Request, urlencode_postdata, ) @@ -75,27 +74,22 @@ class IntlDropoutIE(VHXEmbedIE): 'password': password }) - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - try: - self._download_webpage(request, None, 'Logging in', expected_status=302) - except Exception: - raise ExtractorError( - 'Unable to login', - expected=True) + self._download_webpage(self._LOGIN_URL, None, 'Logging in', 'Login failed', + expected_status=302, + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) def _real_extract(self, url): webpage = self._download_webpage(url, None) video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') - video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title') + video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False) return self.url_result(video, video_id=video_id, video_title=video_title) class IntlDropoutPlaylistIE(IntlDropoutIE): IE_NAME = 'intldropout:playlist' - _VALID_URL = r'^https://intl\.dropout\.tv/(?P<id>.+)' + _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' _TESTS = [ { 'url': 'https://intl.dropout.tv/um-actually-the-web-series', @@ -131,7 +125,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): return False if IntlDropoutIE.suitable(url) else super(IntlDropoutPlaylistIE, cls).suitable(url) def _real_extract(self, url): - playlist_id = self._search_regex(r'https://intl.dropout.tv/(?P<id>.+)', url, 'id') + playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) items = re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') From cb588acb5a65f5b71938bfdddd0e1c7518a0e182 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Thu, 25 Apr 2019 10:36:16 +0200 Subject: [PATCH 015/123] [intldropout] prevent double login --- youtube_dl/extractor/intldropout.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index d2dbe4b7d..531906bba 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -66,6 +66,9 @@ class IntlDropoutIE(VHXEmbedIE): if login_page is False: return + if "You are now signed in" in login_page: + return + login_form = self._hidden_inputs(login_page) login_form.update({ From f7f9c27896da2cb6d2fa95ebab7ef4e4265ebb0a Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Thu, 25 Apr 2019 10:48:39 +0200 Subject: [PATCH 016/123] [intldropout] display device limit error message --- youtube_dl/extractor/intldropout.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 531906bba..070998883 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -66,7 +66,7 @@ class IntlDropoutIE(VHXEmbedIE): if login_page is False: return - if "You are now signed in" in login_page: + if "You are now signed in." in login_page: return login_form = self._hidden_inputs(login_page) @@ -84,6 +84,8 @@ class IntlDropoutIE(VHXEmbedIE): def _real_extract(self, url): webpage = self._download_webpage(url, None) + if "The device limit for your account has been reached" in webpage: + raise ExtractorError('Device Limit reached', expected=True) video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False) From 12a7c5e8c22272ff97c7c40a9f70e1759f0263dc Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Fri, 26 Apr 2019 19:19:22 +0200 Subject: [PATCH 017/123] [intldropout] better login checks --- youtube_dl/extractor/intldropout.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 070998883..493dfe525 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -52,20 +52,16 @@ class IntlDropoutIE(VHXEmbedIE): def _login(self): email, password = self._get_login_info() - if email is None or password is None: - if self._downloader.params.get('cookiefile') is None: + if (email is None or password is None) and self._downloader.params.get('cookiefile') is None: raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) - return True login_page = self._download_webpage( self._LOGIN_URL, None, note='Downloading login page', - errnote='unable to fetch login page', fatal=False + errnote='unable to fetch login page' ) - if login_page is False: - return - + """check if user is already logged in via cookies""" if "You are now signed in." in login_page: return @@ -86,6 +82,9 @@ class IntlDropoutIE(VHXEmbedIE): webpage = self._download_webpage(url, None) if "The device limit for your account has been reached" in webpage: raise ExtractorError('Device Limit reached', expected=True) + if "Start your free trial" in webpage or "Start Free Trial" in webpage or "Sign in" in webpage: + raise ExtractorError('You don\'t seem to be logged in', expected=True) + video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False) From e53f2ee17fabb90dafef5feb4fd9f4b8579f0d0c Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Wed, 8 May 2019 16:51:08 +0200 Subject: [PATCH 018/123] [intldropout] moved login to VHXEmbedIE --- youtube_dl/extractor/intldropout.py | 29 ++--------------------------- youtube_dl/extractor/vimeo.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 493dfe525..1cdc7e125 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals from .vimeo import VHXEmbedIE -from ..utils import ( - ExtractorError, - urlencode_postdata, -) +from ..utils import ExtractorError import re @@ -54,29 +51,7 @@ class IntlDropoutIE(VHXEmbedIE): email, password = self._get_login_info() if (email is None or password is None) and self._downloader.params.get('cookiefile') is None: raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) - - login_page = self._download_webpage( - self._LOGIN_URL, None, - note='Downloading login page', - errnote='unable to fetch login page' - ) - - """check if user is already logged in via cookies""" - if "You are now signed in." in login_page: - return - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'passwordless': 0, - 'email': email, - 'password': password - }) - - self._download_webpage(self._LOGIN_URL, None, 'Logging in', 'Login failed', - expected_status=302, - data=urlencode_postdata(login_form), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) + self._vhx_login(email, password, self._LOGIN_URL) def _real_extract(self, url): webpage = self._download_webpage(url, None) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a41178bab..effa879bc 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1128,6 +1128,30 @@ class VHXEmbedIE(InfoExtractor): IE_NAME = 'vhx:embed' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' + def _vhx_login(self, email, password, login_url): + login_page = self._download_webpage( + login_url, None, + note='Downloading login page', + errnote='unable to fetch login page' + ) + + """check if user is already logged in via cookies""" + if "You are now signed in." in login_page: + return + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'passwordless': 0, + 'email': email, + 'password': password + }) + + self._download_webpage(login_url, None, 'Logging in', 'Login failed', + expected_status=302, + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + def _call_api(self, video_id, access_token, path='', query=None): return self._download_json( 'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={ From 6ef0a6268b451b2acd96a14132d3a3a60ba6fc18 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Wed, 8 May 2019 17:14:35 +0200 Subject: [PATCH 019/123] [intldropout] support multiple pages on playlists --- youtube_dl/extractor/intldropout.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 1cdc7e125..68e724e37 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -71,30 +71,30 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' _TESTS = [ { - 'url': 'https://intl.dropout.tv/um-actually-the-web-series', + 'url': 'https://intl.dropout.tv/um-actually', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 9, + 'playlist_count': 30, 'info_dict': { - 'id': 'um-actually-the-web-series', - 'title': 'Um, Actually: The Web Series', + 'id': 'um-actually', + 'title': 'Um, Actually', } }, { 'url': 'https://intl.dropout.tv/new-releases', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 22, + 'playlist_count': 24, 'info_dict': { 'id': 'new-releases', 'title': 'New Releases', } }, { - 'url': 'https://intl.dropout.tv/troopers/season:2', + 'url': 'https://intl.dropout.tv/troopers-the-web-series/season:2', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', 'playlist_count': 10, 'info_dict': { - 'id': 'troopers/season:2', - 'title': 'Troopers', + 'id': 'troopers-the-web-series/season:2', + 'title': 'Troopers: The Web Series', } } ] @@ -106,6 +106,14 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - items = re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage) - playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') + playlist_title = self._html_search_regex(r'<h1 class="[^"]*collection-title[^"]*"[^>]*>(?P<title>[^<]+)<', webpage, 'title') + + items = [] + while True: + items.extend(re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage)) + next_page_url = self._search_regex(r'href="(/[^\?]+\?page=\d+)"', webpage, 'next page url', default=None) + if not next_page_url: + break + webpage = self._download_webpage('https://intl.dropout.tv' + next_page_url, playlist_id) + return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) From 10bbeb3d24c3bba3d22ec150a234d4d46691f5b5 Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Wed, 8 May 2019 17:17:55 +0200 Subject: [PATCH 020/123] [intldropout] fixed test --- youtube_dl/extractor/intldropout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index 68e724e37..e8689bd86 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -82,7 +82,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): { 'url': 'https://intl.dropout.tv/new-releases', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 24, + 'playlist_count': 31, 'info_dict': { 'id': 'new-releases', 'title': 'New Releases', From 7b34e67aa27d575716c2536f02eaeefcda05417c Mon Sep 17 00:00:00 2001 From: Thomas Tsiakalakis <tt@dplusc.de> Date: Fri, 11 Oct 2019 17:28:32 +0200 Subject: [PATCH 021/123] [intldropout] adjusted regex to recent changes --- youtube_dl/extractor/intldropout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index e8689bd86..f71e3e975 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -62,7 +62,7 @@ class IntlDropoutIE(VHXEmbedIE): video = self._html_search_regex(r'<iframe[^>]*"(?P<embed>https://embed.vhx.tv/videos/[0-9]+[^"]*)"[^>]*>', webpage, 'embed') video_id = self._search_regex(r'https://embed.vhx.tv/videos/(?P<id>[0-9]+)', video, 'id') - video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*><strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False) + video_title = self._html_search_regex(r'<h1 class="[^"]*video-title[^"]*"[^>]*>\s*<strong>(?P<title>[^<]+)<', webpage, 'title', fatal=False) return self.url_result(video, video_id=video_id, video_title=video_title) From 453750d3fbdfad7d546d6096531affbafd7b2583 Mon Sep 17 00:00:00 2001 From: tsia <github@tsia.de> Date: Wed, 29 Jan 2020 13:13:02 +0100 Subject: [PATCH 022/123] quickfix to support www.dropout.tv intl.dropout.tv has been discontinued --- youtube_dl/extractor/intldropout.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/intldropout.py index f71e3e975..a190f0629 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/intldropout.py @@ -12,12 +12,12 @@ class IntlDropoutIE(VHXEmbedIE): IE_NAME = 'intldropout' IE_DESC = 'International Dropout.tv' _NETRC_MACHINE = 'intldropouttv' - _LOGIN_URL = 'https://intl.dropout.tv/login' - _LOGOUT_URL = 'https://intl.dropout.tv/logout' - _VALID_URL = r'https://intl\.dropout\.tv/(?:[^/]+/season:[^/]+/)?videos/(?P<id>.+)' + _LOGIN_URL = 'https://www.dropout.tv/login' + _LOGOUT_URL = 'https://www.dropout.tv/logout' + _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/season:[^/]+/)?videos/(?P<id>.+)' _TESTS = [ { - 'url': 'https://intl.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', + 'url': 'https://www.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', 'md5': '8beaac579b6ba762f63cd452fd28dcce', 'info_dict': { 'id': '397785', @@ -30,7 +30,7 @@ class IntlDropoutIE(VHXEmbedIE): } }, { - 'url': 'https://intl.dropout.tv/videos/um-actually-behind-the-scenes', + 'url': 'https://www.dropout.tv/videos/um-actually-behind-the-scenes', 'md5': 'b974927cd563423fe50945dbfdbb894c', 'info_dict': { 'id': '397943', @@ -68,10 +68,10 @@ class IntlDropoutIE(VHXEmbedIE): class IntlDropoutPlaylistIE(IntlDropoutIE): IE_NAME = 'intldropout:playlist' - _VALID_URL = r'https://intl\.dropout\.tv/(?P<id>.+)' + _VALID_URL = r'https://www\.dropout\.tv/(?P<id>.+)' _TESTS = [ { - 'url': 'https://intl.dropout.tv/um-actually', + 'url': 'https://www.dropout.tv/um-actually', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', 'playlist_count': 30, 'info_dict': { @@ -80,7 +80,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): } }, { - 'url': 'https://intl.dropout.tv/new-releases', + 'url': 'https://www.dropout.tv/new-releases', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', 'playlist_count': 31, 'info_dict': { @@ -89,7 +89,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): } }, { - 'url': 'https://intl.dropout.tv/troopers-the-web-series/season:2', + 'url': 'https://www.dropout.tv/troopers-the-web-series/season:2', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', 'playlist_count': 10, 'info_dict': { @@ -110,10 +110,10 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): items = [] while True: - items.extend(re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://intl.dropout.tv/[^/]+/[^"]+)"', webpage)) + items.extend(re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://www.dropout.tv/[^/]+/[^"]+)"', webpage)) next_page_url = self._search_regex(r'href="(/[^\?]+\?page=\d+)"', webpage, 'next page url', default=None) if not next_page_url: break - webpage = self._download_webpage('https://intl.dropout.tv' + next_page_url, playlist_id) + webpage = self._download_webpage('https://www.dropout.tv' + next_page_url, playlist_id) return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=playlist_title) From 6b65dae7b111555b4c88a642f1c9b348d667c5b8 Mon Sep 17 00:00:00 2001 From: tsia <mail@tsia.de> Date: Sun, 1 Mar 2020 17:48:42 +0100 Subject: [PATCH 023/123] [intldropout] reworked Extractor for www.dropout.tv --- .../extractor/{intldropout.py => dropout.py} | 46 +++++++++---------- youtube_dl/extractor/extractors.py | 6 +-- 2 files changed, 25 insertions(+), 27 deletions(-) rename youtube_dl/extractor/{intldropout.py => dropout.py} (71%) diff --git a/youtube_dl/extractor/intldropout.py b/youtube_dl/extractor/dropout.py similarity index 71% rename from youtube_dl/extractor/intldropout.py rename to youtube_dl/extractor/dropout.py index a190f0629..3fc33f828 100644 --- a/youtube_dl/extractor/intldropout.py +++ b/youtube_dl/extractor/dropout.py @@ -8,38 +8,36 @@ from ..utils import ExtractorError import re -class IntlDropoutIE(VHXEmbedIE): - IE_NAME = 'intldropout' - IE_DESC = 'International Dropout.tv' - _NETRC_MACHINE = 'intldropouttv' +class DropoutIE(VHXEmbedIE): + IE_NAME = 'dropout' + IE_DESC = 'Dropout.tv' + _NETRC_MACHINE = 'dropouttv' _LOGIN_URL = 'https://www.dropout.tv/login' _LOGOUT_URL = 'https://www.dropout.tv/logout' - _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/season:[^/]+/)?videos/(?P<id>.+)' + _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/(?:season:[^/]/))?videos/(?P<id>.+)' _TESTS = [ { - 'url': 'https://www.dropout.tv/um-actually/season:1/videos/c-3po-s-origins-hp-lovecraft-the-food-album-with-weird-al-yankovic', - 'md5': '8beaac579b6ba762f63cd452fd28dcce', + 'url': 'https://www.dropout.tv/dimension-20-tiny-heist/season:1/videos/big-little-crimes', + 'md5': '46edf4c6d632e2771a42a235f920b8f7', 'info_dict': { - 'id': '397785', + 'id': '382486557', 'ext': 'mp4', - 'title': "C-3PO's Origins, HP Lovecraft, the Food Album (with Weird Al Yankovic)", - 'thumbnail': r're:^https://vhx.imgix.net/.*\.jpg$', - 'description': 'Caldwell Tanner, Siobhan Thompson, and Nate Dern inspect guns and review the Diagon Alley bar scene.', - 'upload_date': '20181206', - 'timestamp': 1544117975, + 'uploader': 'OTT Videos', + 'uploader_id': 'user80538407', + 'title': "Untitled", + 'thumbnail': r're:^https://i.vimeocdn.com/.*\.jpg$', } }, { 'url': 'https://www.dropout.tv/videos/um-actually-behind-the-scenes', - 'md5': 'b974927cd563423fe50945dbfdbb894c', + 'md5': '7fd342c652a86b996bae2920695593af', 'info_dict': { - 'id': '397943', + 'id': '265656116', 'ext': 'mp4', + 'uploader': 'OTT Videos', + 'uploader_id': 'user80538407', 'title': 'Um, Actually: Behind the Scenes', - 'thumbnail': r're:^https://vhx.imgix.net/.*\.jpg$', - 'description': 'What does it take to stump the nerdy? Mike Trapp and team pull back the curtain.', - 'upload_date': '20181206', - 'timestamp': 1544118409, + 'thumbnail': r're:^https://i.vimeocdn.com/.*\.jpg$', } } ] @@ -66,14 +64,14 @@ class IntlDropoutIE(VHXEmbedIE): return self.url_result(video, video_id=video_id, video_title=video_title) -class IntlDropoutPlaylistIE(IntlDropoutIE): - IE_NAME = 'intldropout:playlist' +class DropoutPlaylistIE(DropoutIE): + IE_NAME = 'dropout:playlist' _VALID_URL = r'https://www\.dropout\.tv/(?P<id>.+)' _TESTS = [ { 'url': 'https://www.dropout.tv/um-actually', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 30, + 'playlist_count': 33, 'info_dict': { 'id': 'um-actually', 'title': 'Um, Actually', @@ -82,7 +80,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): { 'url': 'https://www.dropout.tv/new-releases', 'md5': 'ebcd26ef54f546225e7cb96e79da31cc', - 'playlist_count': 31, + 'playlist_count': 15, 'info_dict': { 'id': 'new-releases', 'title': 'New Releases', @@ -101,7 +99,7 @@ class IntlDropoutPlaylistIE(IntlDropoutIE): @classmethod def suitable(cls, url): - return False if IntlDropoutIE.suitable(url) else super(IntlDropoutPlaylistIE, cls).suitable(url) + return False if DropoutIE.suitable(url) else super(DropoutPlaylistIE, cls).suitable(url) def _real_extract(self, url): playlist_id = self._match_id(url) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 511bcc39d..61c587dbd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -293,9 +293,9 @@ from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE -from .intldropout import ( - IntlDropoutIE, - IntlDropoutPlaylistIE, +from .dropout import ( + DropoutIE, + DropoutPlaylistIE, ) from .dw import ( DWIE, From 62c7c8c6d4c30af2867e66abc8b68128c6334a75 Mon Sep 17 00:00:00 2001 From: tsia <mail@tsia.de> Date: Sun, 1 Mar 2020 17:49:09 +0100 Subject: [PATCH 024/123] [dropout] fixed next_page_url --- youtube_dl/extractor/dropout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dropout.py b/youtube_dl/extractor/dropout.py index 3fc33f828..b2fcd2ab6 100644 --- a/youtube_dl/extractor/dropout.py +++ b/youtube_dl/extractor/dropout.py @@ -109,7 +109,7 @@ class DropoutPlaylistIE(DropoutIE): items = [] while True: items.extend(re.findall(r'browse-item-title[^>]+>[^<]*<a href="(?P<url>https://www.dropout.tv/[^/]+/[^"]+)"', webpage)) - next_page_url = self._search_regex(r'href="(/[^\?]+\?page=\d+)"', webpage, 'next page url', default=None) + next_page_url = self._search_regex(r'href="([^"]+\?[^"]*(?:&|&)?page=\d+)"', webpage, 'next page url', default=None) if not next_page_url: break webpage = self._download_webpage('https://www.dropout.tv' + next_page_url, playlist_id) From b6384bf9b13d37e6a3ed4cc41397695c2c122ee8 Mon Sep 17 00:00:00 2001 From: tsia <mail@tsia.de> Date: Sun, 1 Mar 2020 18:28:06 +0100 Subject: [PATCH 025/123] [dropout] fixed typo --- youtube_dl/extractor/dropout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dropout.py b/youtube_dl/extractor/dropout.py index b2fcd2ab6..1bc7a4f8d 100644 --- a/youtube_dl/extractor/dropout.py +++ b/youtube_dl/extractor/dropout.py @@ -14,7 +14,7 @@ class DropoutIE(VHXEmbedIE): _NETRC_MACHINE = 'dropouttv' _LOGIN_URL = 'https://www.dropout.tv/login' _LOGOUT_URL = 'https://www.dropout.tv/logout' - _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/(?:season:[^/]/))?videos/(?P<id>.+)' + _VALID_URL = r'https://www\.dropout\.tv/(?:[^/]+/(?:season:[^/]/)?)?videos/(?P<id>.+)' _TESTS = [ { 'url': 'https://www.dropout.tv/dimension-20-tiny-heist/season:1/videos/big-little-crimes', From 34c669d7003ccf98f7bad742ff46b5b41c34212b Mon Sep 17 00:00:00 2001 From: willbeaufoy <will@willbeaufoy.net> Date: Thu, 23 Apr 2020 20:31:38 +0100 Subject: [PATCH 026/123] [options] Clarify doc on --exec command (closes #19087) (#24883) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 8826b382c..6d5ac62b3 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -853,7 +853,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--exec', metavar='CMD', dest='exec_cmd', - help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') + help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') postproc.add_option( '--convert-subs', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, From 027a1f601ee1ea0efd491a5e818f473078498a5e Mon Sep 17 00:00:00 2001 From: Philipp Stehle <anderschwiedu@googlemail.com> Date: Thu, 23 Apr 2020 21:44:13 +0200 Subject: [PATCH 027/123] [prosiebensat1] Improve extraction and remove 7tv.de support (#24948) --- youtube_dl/extractor/prosiebensat1.py | 29 +++++++++------------------ 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1bc4f9b6b..74074606e 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -175,7 +175,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?: (?:beta\.)? (?: - prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia )\.(?:de|at|ch)| ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ) @@ -193,7 +193,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'info_dict': { 'id': '2104602', 'ext': 'mp4', - 'title': 'Episode 18 - Staffel 2', + 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, @@ -300,7 +300,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'info_dict': { 'id': '2572814', 'ext': 'mp4', - 'title': 'Andreas Kümmert: Rocket Man', + 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'upload_date': '20131017', 'duration': 469.88, @@ -310,7 +310,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): }, }, { - 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', + 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', 'info_dict': { 'id': '2156342', 'ext': 'mp4', @@ -332,19 +332,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'playlist_count': 2, 'skip': 'This video is unavailable', }, - { - 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', - 'info_dict': { - 'id': '4187506', - 'ext': 'mp4', - 'title': 'Best of Circus HalliGalli', - 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', - 'upload_date': '20151229', - }, - 'params': { - 'skip_download': True, - }, - }, { # title in <h2 class="subtitle"> 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', @@ -421,7 +408,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', ] _UPLOAD_DATE_REGEXES = [ - r'<meta property="og:published_time" content="(.+?)">', r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', @@ -451,8 +437,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): if description is None: description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate(self._html_search_regex( - self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) + upload_date = unified_strdate( + self._html_search_meta('og:published_time', webpage, + 'upload date', default=None) + or self._html_search_regex(self._UPLOAD_DATE_REGEXES, + webpage, 'upload date', default=None)) info.update({ 'id': clip_id, From 56e167e10897a99f99ad3b789ca68a780beec5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2020 02:56:10 +0700 Subject: [PATCH 028/123] [prosiebensat1] Extract series metadata --- youtube_dl/extractor/prosiebensat1.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 74074606e..e47088292 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -11,6 +11,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + merge_dicts, unified_strdate, ) @@ -197,6 +198,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, + 'series': 'CIRCUS HALLIGALLI', + 'season_number': 2, + 'episode': 'Episode 18 - Staffel 2', + 'episode_number': 18, }, }, { @@ -302,6 +307,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'ext': 'mp4', 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', + 'timestamp': 1382041620, 'upload_date': '20131017', 'duration': 469.88, }, @@ -443,14 +449,15 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): or self._html_search_regex(self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) - info.update({ + json_ld = self._search_json_ld(webpage, clip_id, default={}) + + return merge_dicts(info, { 'id': clip_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, - }) - return info + }, json_ld) def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( From 2c2363aec7ce2ea1962add23bcf4eb028467a964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2020 12:41:33 +0700 Subject: [PATCH 029/123] [tenplay] Relax _VALID_URL (closes #25001) --- youtube_dl/extractor/tenplay.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py index dff44a4e2..af325fea8 100644 --- a/youtube_dl/extractor/tenplay.py +++ b/youtube_dl/extractor/tenplay.py @@ -10,8 +10,8 @@ from ..utils import ( class TenPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/[^/]+/episodes/[^/]+/[^/]+/(?P<id>tpv\d{6}[a-z]{5})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})' + _TESTS = [{ 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga', 'info_dict': { 'id': '6060533435001', @@ -27,7 +27,10 @@ class TenPlayIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, } - } + }, { + 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', + 'only_matching': True, + }] BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s' def _real_extract(self, url): From 49e8286d03d9c2a06df94fd8886d5679164709b3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 29 Apr 2020 13:38:58 +0100 Subject: [PATCH 030/123] [tvplay] fix Viafree extraction(closes #15189)(closes #24473)(closes #24789) --- youtube_dl/extractor/tvplay.py | 131 +++++++++++---------------------- 1 file changed, 43 insertions(+), 88 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index d82d48f94..3c2450dd0 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_str, compat_urlparse, ) from ..utils import ( @@ -15,9 +14,7 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, - smuggle_url, try_get, - unsmuggle_url, update_url_query, url_or_none, ) @@ -235,11 +232,6 @@ class TVPlayIE(InfoExtractor): ] def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - self._initialize_geo_bypass({ - 'countries': smuggled_data.get('geo_countries'), - }) - video_id = self._match_id(url) geo_country = self._search_regex( r'https?://[^/]+\.([a-z]{2})', url, @@ -285,8 +277,6 @@ class TVPlayIE(InfoExtractor): 'ext': ext, } if video_url.startswith('rtmp'): - if smuggled_data.get('skip_rtmp'): - continue m = re.search( r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url) if not m: @@ -347,115 +337,80 @@ class ViafreeIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?:www\.)? - viafree\. - (?: - (?:dk|no)/programmer| - se/program - ) - /(?:[^/]+/)+(?P<id>[^/?#&]+) + viafree\.(?P<country>dk|no|se) + /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+) ''' _TESTS = [{ - 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2', + 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'info_dict': { - 'id': '395375', + 'id': '757786', 'ext': 'mp4', - 'title': 'Husräddarna S02E02', - 'description': 'md5:4db5c933e37db629b5a2f75dfb34829e', - 'series': 'Husräddarna', - 'season': 'Säsong 2', + 'title': 'Det beste vorspielet - Sesong 2 - Episode 1', + 'description': 'md5:b632cb848331404ccacd8cd03e83b4c3', + 'series': 'Det beste vorspielet', 'season_number': 2, - 'duration': 2576, - 'timestamp': 1400596321, - 'upload_date': '20140520', + 'duration': 1116, + 'timestamp': 1471200600, + 'upload_date': '20160814', }, 'params': { 'skip_download': True, }, - 'add_ie': [TVPlayIE.ie_key()], }, { # with relatedClips 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1', - 'info_dict': { - 'id': '758770', - 'ext': 'mp4', - 'title': 'Sommaren med YouTube-stjärnorna S01E01', - 'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f', - 'series': 'Sommaren med YouTube-stjärnorna', - 'season': 'Säsong 1', - 'season_number': 1, - 'duration': 1326, - 'timestamp': 1470905572, - 'upload_date': '20160811', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [TVPlayIE.ie_key()], + 'only_matching': True, }, { # Different og:image URL schema 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', 'only_matching': True, }, { - 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', + 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2', 'only_matching': True, }, { 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5', 'only_matching': True, }] + _GEO_BYPASS = False @classmethod def suitable(cls, url): return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url) def _real_extract(self, url): - video_id = self._match_id(url) + country, path = re.match(self._VALID_URL, url).groups() + content = self._download_json( + 'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path) + program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program'] + guid = program['guid'] + meta = content['meta'] + title = meta['title'] - webpage = self._download_webpage(url, video_id) + try: + stream_href = self._download_json( + program['_links']['streamLink']['href'], guid, + headers=self.geo_verification_headers())['embedded']['prioritizedStreams'][0]['links']['stream']['href'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted(countries=[country]) + raise - data = self._parse_json( - self._search_regex( - r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script', - webpage, 'data', default='{}'), - video_id, transform_source=lambda x: re.sub( - r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*', - 'null', x), fatal=False) + formats = self._extract_m3u8_formats(stream_href, guid, 'mp4') + self._sort_formats(formats) + episode = program.get('episode') or {} - video_id = None - - if data: - video_id = try_get( - data, lambda x: x['context']['dispatcher']['stores'][ - 'ContentPageProgramStore']['currentVideo']['id'], - compat_str) - - # Fallback #1 (extract from og:image URL schema) - if not video_id: - thumbnail = self._og_search_thumbnail(webpage, default=None) - if thumbnail: - video_id = self._search_regex( - # Patterns seen: - # http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg - # http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg - r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/', - thumbnail, 'video id', default=None) - - # Fallback #2. Extract from raw JSON string. - # May extract wrong video id if relatedClips is present. - if not video_id: - video_id = self._search_regex( - r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})', - webpage, 'video id') - - return self.url_result( - smuggle_url( - 'mtg:%s' % video_id, - { - 'geo_countries': [ - compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]], - # rtmp host mtgfs.fplive.net for viafree is unresolvable - 'skip_rtmp': True, - }), - ie=TVPlayIE.ie_key(), video_id=video_id) + return { + 'id': guid, + 'title': title, + 'thumbnail': meta.get('image'), + 'description': meta.get('description'), + 'series': episode.get('seriesTitle'), + 'episode_number': int_or_none(episode.get('episodeNumber')), + 'season_number': int_or_none(episode.get('seasonNumber')), + 'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000), + 'timestamp': parse_iso8601(try_get(program, lambda x: x['availability']['start'])), + 'formats': formats, + } class TVPlayHomeIE(InfoExtractor): From 00efe66681ee988267a0d89b57cd483ef1bd24b5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 29 Apr 2020 14:56:32 +0100 Subject: [PATCH 031/123] [yahoo] fix GYAO Player extraction and relax title URL regex(closes #24178)(closes #24778) --- youtube_dl/extractor/yahoo.py | 40 ++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 238d9cea0..e4615376c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -12,6 +12,7 @@ from ..compat import ( ) from ..utils import ( clean_html, + ExtractorError, int_or_none, mimetype2ext, parse_iso8601, @@ -368,31 +369,47 @@ class YahooGyaOPlayerIE(InfoExtractor): 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682', 'only_matching': True, }] + _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url).replace('/', ':') - video = self._download_json( - 'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id, - video_id, query={ - 'fields': 'longDescription,title,videoId', - }, headers={ - 'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web', - }) + headers = self.geo_verification_headers() + headers['Accept'] = 'application/json' + resp = self._download_json( + 'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={ + 'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-', + 'query': '''{ + content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) { + video { + delivery { + id + } + title + } + } +}''' % video_id, + }, headers=headers) + content = resp['data']['content'] + if not content: + msg = resp['errors'][0]['message'] + if msg == 'not in japan': + self.raise_geo_restricted(countries=['JP']) + raise ExtractorError(msg) + video = content['video'] return { '_type': 'url_transparent', 'id': video_id, 'title': video['title'], 'url': smuggle_url( - 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['videoId'], + 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'], {'geo_countries': ['JP']}), - 'description': video.get('longDescription'), 'ie_key': BrightcoveNewIE.ie_key(), } class YahooGyaOIE(InfoExtractor): IE_NAME = 'yahoo:gyao' - _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title/[^/]+)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/', 'info_dict': { @@ -405,6 +422,9 @@ class YahooGyaOIE(InfoExtractor): }, { 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf', 'only_matching': True, + }, { + 'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf', + 'only_matching': True, }] def _real_extract(self, url): From 6b001ab981ed077b41f941ba6adacb42fc656058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2020 00:40:38 +0700 Subject: [PATCH 032/123] [youtube] Use redirected video id if any (closes #25063) --- youtube_dl/extractor/youtube.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index afaa12b1b..28886cff2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1227,6 +1227,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q', 'only_matching': True, }, + { + # invalid -> valid video id redirection + 'url': 'DJztXj2GPfl', + 'info_dict': { + 'id': 'DJztXj2GPfk', + 'ext': 'mp4', + 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)', + 'description': 'md5:bf577a41da97918e94fa9798d9228825', + 'upload_date': '20090125', + 'uploader': 'Prochorowka', + 'uploader_id': 'Prochorowka', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka', + 'artist': 'Panjabi MC', + 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix', + 'album': 'Beware of the Boys (Mundian To Bach Ke)', + }, + 'params': { + 'skip_download': True, + }, + } ] def __init__(self, *args, **kwargs): @@ -1678,7 +1698,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id - video_webpage = self._download_webpage(url, video_id) + video_webpage, urlh = self._download_webpage_handle(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query) + video_id = qs.get('v', [None])[0] or video_id # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) From 2be06b35d285866bf26c8fdb8cfb02df9d691922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2020 07:18:08 +0700 Subject: [PATCH 033/123] [youtube] Improve player id extraction and add tests --- test/test_youtube_signature.py | 22 +++++++++++++++++++ youtube_dl/extractor/youtube.py | 38 +++++++++++++++------------------ 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f0c370eee..69df30eda 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -74,6 +74,28 @@ _TESTS = [ ] +class TestPlayerInfo(unittest.TestCase): + def test_youtube_extract_player_info(self): + PLAYER_URLS = ( + ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), + # obsolete + ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), + ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), + ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'), + ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'), + ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'), + ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'), + ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'), + ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'), + ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'), + ) + for player_url, expected_player_id in PLAYER_URLS: + expected_player_type = player_url.split('.')[-1] + player_type, player_id = YoutubeIE._extract_player_info(player_url) + self.assertEqual(player_type, expected_player_type) + self.assertEqual(player_id, expected_player_id) + + class TestSignature(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 28886cff2..5ea66c962 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -426,6 +426,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?(1).+)? # if we found the ID, everything can follow $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' + _PLAYER_INFO_RE = ( + r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$', + r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$', + ) _formats = { '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, @@ -1273,14 +1277,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): """ Return a string representation of a signature """ return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) - def _extract_signature_function(self, video_id, player_url, example_sig): - id_m = re.match( - r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', - player_url) - if not id_m: + @classmethod + def _extract_player_info(cls, player_url): + for player_re in cls._PLAYER_INFO_RE: + id_m = re.search(player_re, player_url) + if id_m: + break + else: raise ExtractorError('Cannot identify player %r' % player_url) - player_type = id_m.group('ext') - player_id = id_m.group('id') + return id_m.group('ext'), id_m.group('id') + + def _extract_signature_function(self, video_id, player_url, example_sig): + player_type, player_id = self._extract_player_info(player_url) # Read from filesystem cache func_id = '%s_%s_%s' % ( @@ -2009,22 +2017,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if self._downloader.params.get('verbose'): if player_url is None: - player_version = 'unknown' player_desc = 'unknown' else: - if player_url.endswith('swf'): - player_version = self._search_regex( - r'-(.+?)(?:/watch_as3)?\.swf$', player_url, - 'flash player', fatal=False) - player_desc = 'flash player %s' % player_version - else: - player_version = self._search_regex( - [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', - r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], - player_url, - 'html5 player', fatal=False) - player_desc = 'html5 player %s' % player_version - + player_type, player_version = self._extract_player_info(player_url) + player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version) parts_sizes = self._signature_cache_id(encrypted_sig) self.to_screen('{%s} signature length %s, %s' % (format_id, parts_sizes, player_desc)) From a91e9e402737e69630c4f223a7df9f7ae7fc042a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2020 23:40:30 +0700 Subject: [PATCH 034/123] [extractor/common] Extract multiple JSON-LD entries --- youtube_dl/extractor/common.py | 41 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c51a3a07d..e9306d806 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1182,16 +1182,33 @@ class InfoExtractor(object): 'twitter card player') def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): - json_ld = self._search_regex( - JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs) + json_ld_list = list(re.finditer(JSON_LD_RE, html)) default = kwargs.get('default', NO_DEFAULT) - if not json_ld: - return default if default is not NO_DEFAULT else {} # JSON-LD may be malformed and thus `fatal` should be respected. # At the same time `default` may be passed that assumes `fatal=False` # for _search_regex. Let's simulate the same behavior here as well. fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False - return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type) + json_ld = [] + for mobj in json_ld_list: + json_ld_item = self._parse_json( + mobj.group('json_ld'), video_id, fatal=fatal) + if not json_ld_item: + continue + if isinstance(json_ld_item, dict): + json_ld.append(json_ld_item) + elif isinstance(json_ld_item, (list, tuple)): + json_ld.extend(json_ld_item) + if json_ld: + json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type) + if json_ld: + return json_ld + if default is not NO_DEFAULT: + return default + elif fatal: + raise RegexNotFoundError('Unable to extract JSON-LD') + else: + self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message()) + return {} def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): if isinstance(json_ld, compat_str): @@ -1256,10 +1273,10 @@ class InfoExtractor(object): extract_interaction_statistic(e) for e in json_ld: - if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')): + if '@context' in e: item_type = e.get('@type') if expected_type is not None and expected_type != item_type: - return info + continue if item_type in ('TVEpisode', 'Episode'): episode_name = unescapeHTML(e.get('name')) info.update({ @@ -1293,11 +1310,17 @@ class InfoExtractor(object): }) elif item_type == 'VideoObject': extract_video_object(e) - continue + if expected_type is None: + continue + else: + break video = e.get('video') if isinstance(video, dict) and video.get('@type') == 'VideoObject': extract_video_object(video) - break + if expected_type is None: + continue + else: + break return dict((k, v) for k, v in info.items() if v is not None) @staticmethod From 633e33d682870613c906c4fcba03630ed68290b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2020 23:42:51 +0700 Subject: [PATCH 035/123] [crunchyroll] Fix and improve extraction (closes #25096, closes #25060) --- youtube_dl/extractor/crunchyroll.py | 56 ++++++++++++++++------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 85a9a577f..bc2d1fa8b 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -13,6 +13,7 @@ from ..compat import ( compat_b64decode, compat_etree_Element, compat_etree_fromstring, + compat_str, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -25,9 +26,9 @@ from ..utils import ( intlist_to_bytes, int_or_none, lowercase_escape, + merge_dicts, remove_end, sanitized_Request, - unified_strdate, urlencode_postdata, xpath_text, ) @@ -136,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): # rtmp 'skip_download': True, }, + 'skip': 'Video gone', }, { 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1', 'info_dict': { @@ -157,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): 'info_dict': { 'id': '702409', 'ext': 'mp4', - 'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant', - 'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9', + 'title': compat_str, + 'description': compat_str, 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'TV TOKYO', - 'upload_date': '20160508', + 'uploader': 'Re:Zero Partners', + 'timestamp': 1462098900, + 'upload_date': '20160501', }, 'params': { # m3u8 download @@ -172,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): 'info_dict': { 'id': '727589', 'ext': 'mp4', - 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!", - 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d', + 'title': compat_str, + 'description': compat_str, 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Kadokawa Pictures Inc.', - 'upload_date': '20170118', - 'series': "KONOSUBA -God's blessing on this wonderful world!", + 'timestamp': 1484130900, + 'upload_date': '20170111', + 'series': compat_str, 'season': "KONOSUBA -God's blessing on this wonderful world! 2", 'season_number': 2, 'episode': 'Give Me Deliverance From This Judicial Injustice!', @@ -200,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): 'info_dict': { 'id': '535080', 'ext': 'mp4', - 'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka', - 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".', + 'title': compat_str, + 'description': compat_str, 'uploader': 'Marvelous AQL Inc.', - 'upload_date': '20091021', + 'timestamp': 1255512600, + 'upload_date': '20091014', }, 'params': { # Just test metadata extraction @@ -224,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): # just test metadata extraction 'skip_download': True, }, + 'skip': 'Video gone', }, { # A video with a vastly different season name compared to the series name 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532', 'info_dict': { 'id': '590532', 'ext': 'mp4', - 'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test', - 'description': 'Mahiro and Nyaruko talk about official certification.', + 'title': compat_str, + 'description': compat_str, 'uploader': 'TV TOKYO', + 'timestamp': 1330956000, 'upload_date': '20120305', 'series': 'Nyarko-san: Another Crawling Chaos', 'season': 'Haiyoru! Nyaruani (ONA)', @@ -442,23 +449,21 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage, 'language', default=None, group='lang') video_title = self._html_search_regex( - r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>', - webpage, 'video_title') + (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>', + r'<title>(.+?),\s+-\s+.+? Crunchyroll'), + webpage, 'video_title', default=None) + if not video_title: + video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage)) video_title = re.sub(r' {2,}', ' ', video_title) video_description = (self._parse_json(self._html_search_regex( r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, webpage, 'description', default='{}'), video_id) or media_metadata).get('description') if video_description: video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) - video_upload_date = self._html_search_regex( - [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'], - webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) - if video_upload_date: - video_upload_date = unified_strdate(video_upload_date) video_uploader = self._html_search_regex( # try looking for both an uploader that's a link and one that's not [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], - webpage, 'video_uploader', fatal=False) + webpage, 'video_uploader', default=False) formats = [] for stream in media.get('streams', []): @@ -611,14 +616,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', webpage, 'season number', default=None)) - return { + info = self._search_json_ld(webpage, video_id, default={}) + + return merge_dicts({ 'id': video_id, 'title': video_title, 'description': video_description, 'duration': duration, 'thumbnail': thumbnail, 'uploader': video_uploader, - 'upload_date': video_upload_date, 'series': series, 'season': season, 'season_number': season_number, @@ -626,7 +632,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'episode_number': episode_number, 'subtitles': subtitles, 'formats': formats, - } + }, info) class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): From 8e27247368ffa1f012b86219933908104aab9ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2020 23:59:25 +0700 Subject: [PATCH 036/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/ChangeLog b/ChangeLog index f753972c4..6cd586019 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +version <unreleased> + +Core ++ [extractor/common] Extract multiple JSON-LD entries +* [options] Clarify doc on --exec command (#19087, #24883) +* [extractor/common] Skip malformed ISM manifest XMLs while extracting + ISM formats (#24667) + +Extractors +* [crunchyroll] Fix and improve extraction (#25096, #25060) +* [youtube] Improve player id extraction +* [youtube] Use redirected video id if any (#25063) +* [yahoo] Fix GYAO Player extraction and relax URL regular expression + (#24178, #24778) +* [tvplay] Fix Viafree extraction (#15189, #24473, #24789) +* [tenplay] Relax URL regular expression (#25001) ++ [prosiebensat1] Extract series metadata +* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948) +- [prosiebensat1] Remove 7tv.de support (#24948) +* [youtube] Fix DRM videos detection (#24736) +* [thisoldhouse] Fix video id extraction (#24548, #24549) ++ [soundcloud] Extract AAC format (#19173, #24708) +* [youtube] Skip broken multifeed videos (#24711) +* [nova:embed] Fix extraction (#24700) +* [motherless] Fix extraction (#24699) +* [twitch:clips] Extend URL regular expression (#24290, #24642) +* [tv4] Fix ISM formats extraction (#24667) +* [tele5] Fix extraction (#24553) ++ [mofosex] Add support for generic embeds (#24633) ++ [youporn] Add support for generic embeds ++ [spankwire] Add support for generic embeds (#24633) +* [spankwire] Fix extraction (#18924, #20648) + + version 2020.03.24 Core From 3b22f0aabe1fed0ec4b34ab3f44006b91e62a829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 May 2020 00:05:05 +0700 Subject: [PATCH 037/123] release 2020.05.03 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- README.md | 6 +++--- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 40a869113..487de9298 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.03.24** +- [ ] I've verified that I'm running youtube-dl version **2020.05.03** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.24 + [debug] youtube-dl version 2020.05.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 7b10df3d4..da4b17db9 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.03.24** +- [ ] I've verified that I'm running youtube-dl version **2020.05.03** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 04bbcfa68..e64e39516 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.24** +- [ ] I've verified that I'm running youtube-dl version **2020.05.03** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a9e231817..11ac95173 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.03.24** +- [ ] I've verified that I'm running youtube-dl version **2020.05.03** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.03.24 + [debug] youtube-dl version 2020.05.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4a3d32d51..c75c2a073 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.03.24** +- [ ] I've verified that I'm running youtube-dl version **2020.05.03** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 6cd586019..200df7c03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.05.03 Core + [extractor/common] Extract multiple JSON-LD entries diff --git a/README.md b/README.md index 4f54a5240..12dc00b3e 100644 --- a/README.md +++ b/README.md @@ -434,9 +434,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo either the path to the binary or its containing directory. --exec CMD Execute a command on the file after - downloading, similar to find's -exec - syntax. Example: --exec 'adb push {} - /sdcard/Music/ && rm {}' + downloading and post-processing, similar to + find's -exec syntax. Example: --exec 'adb + push {} /sdcard/Music/ && rm {}' --convert-subs FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 174b83bf3..843dc2dc0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -497,6 +497,7 @@ - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** + - **MofosexEmbed** - **Mojvideo** - **Morningstar**: morningstar.com - **Motherless** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5aedd3268..f933eb8ec 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.03.24' +__version__ = '2020.05.03' From 2cf485b9f81c8b801792edbc7988765b538520e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2020 21:15:19 +0700 Subject: [PATCH 038/123] [puhutv] Remove no longer available HTTP formats (closes #25124) --- youtube_dl/extractor/puhutv.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index fb704a3c4..ca71665e0 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -82,17 +82,6 @@ class PuhuTVIE(InfoExtractor): urls = [] formats = [] - def add_http_from_hls(m3u8_f): - http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4') - if http_url != m3u8_f['url']: - f = m3u8_f.copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - 'url': http_url, - }) - formats.append(f) - for video in videos['data']['videos']: media_url = url_or_none(video.get('url')) if not media_url or media_url in urls: @@ -101,12 +90,9 @@ class PuhuTVIE(InfoExtractor): playlist = video.get('is_playlist') if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - for m3u8_f in m3u8_formats: - formats.append(m3u8_f) - add_http_from_hls(m3u8_f) + m3u8_id='hls', fatal=False)) continue quality = int_or_none(video.get('quality')) @@ -128,8 +114,6 @@ class PuhuTVIE(InfoExtractor): format_id += '-%sp' % quality f['format_id'] = format_id formats.append(f) - if is_hls: - add_http_from_hls(f) self._sort_formats(formats) creator = try_get( From 36301a939b8b9d3b021cb327086bc139cab6a38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2020 04:19:33 +0700 Subject: [PATCH 039/123] [utils] Improve cookie files support + Add support for UTF-8 in cookie files * Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at) --- test/test_YoutubeDLCookieJar.py | 7 ++ test/testdata/cookies/malformed_cookies.txt | 9 +++ youtube_dl/utils.py | 82 +++++++++++++++++++-- 3 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 test/testdata/cookies/malformed_cookies.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index f959798de..05f48bd74 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase): assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + def test_malformed_cookies(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + # Cookies should be empty since all malformed cookie file entries + # will be ignored + self.assertFalse(cookiejar._cookies) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt new file mode 100644 index 000000000..17bc40354 --- /dev/null +++ b/test/testdata/cookies/malformed_cookies.txt @@ -0,0 +1,9 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +# Cookie file entry with invalid number of fields - 6 instead of 7 +www.foobar.foobar FALSE / FALSE 0 COOKIE + +# Cookie file entry with invalid expires at +www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 38262bee4..112279ed7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -7,6 +7,7 @@ import base64 import binascii import calendar import codecs +import collections import contextlib import ctypes import datetime @@ -30,6 +31,7 @@ import ssl import subprocess import sys import tempfile +import time import traceback import xml.etree.ElementTree import zlib @@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): 1. https://curl.haxx.se/docs/http-cookies.html """ _HTTPONLY_PREFIX = '#HttpOnly_' + _ENTRY_LEN = 7 + _HEADER = '''# Netscape HTTP Cookie File +# This file is generated by youtube-dl. Do not edit. + +''' + _CookieFileEntry = collections.namedtuple( + 'CookieFileEntry', + ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """ + Save cookies to a file. + + Most of the code is taken from CPython 3.8 and slightly adapted + to support cookie files with UTF-8 in both python 2 and 3. + """ + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + # Store session cookies with `expires` set to 0 instead of an empty # string for cookie in self: if cookie.expires is None: cookie.expires = 0 - compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) + + with io.open(filename, 'w', encoding='utf-8') as f: + f.write(self._HEADER) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: + secure = 'TRUE' + else: + secure = 'FALSE' + if cookie.domain.startswith('.'): + initial_dot = 'TRUE' + else: + initial_dot = 'FALSE' + if cookie.expires is not None: + expires = compat_str(cookie.expires) + else: + expires = '' + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = '' + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + '\t'.join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value]) + '\n') def load(self, filename=None, ignore_discard=False, ignore_expires=False): """Load cookies from a file.""" @@ -2752,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): else: raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + def prepare_line(line): + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + # comments and empty lines are fine + if line.startswith('#') or not line.strip(): + return line + cookie_list = line.split('\t') + if len(cookie_list) != self._ENTRY_LEN: + raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list)) + cookie = self._CookieFileEntry(*cookie_list) + if cookie.expires_at and not cookie.expires_at.isdigit(): + raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + return line + cf = io.StringIO() - with open(filename) as f: + with io.open(filename, encoding='utf-8') as f: for line in f: - if line.startswith(self._HTTPONLY_PREFIX): - line = line[len(self._HTTPONLY_PREFIX):] - cf.write(compat_str(line)) + try: + cf.write(prepare_line(line)) + except compat_cookiejar.LoadError as e: + write_string( + 'WARNING: skipping cookie file entry due to %s: %r\n' + % (e, line), sys.stderr) + continue cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to From f443feb7825acef16babbf281081b12dd53c9aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2020 05:09:07 +0700 Subject: [PATCH 040/123] [dailymotion] Fix typo --- youtube_dl/extractor/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 327fdb04a..b8529050c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -32,7 +32,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): @staticmethod def _get_cookie_value(cookies, name): - cookie = cookies.get('name') + cookie = cookies.get(name) if cookie: return cookie.value From 6148b0929d780bfd9cd176988cb9e286175eb119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2020 05:54:10 +0700 Subject: [PATCH 041/123] [compat] Introduce compat_cookiejar_Cookie --- youtube_dl/compat.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index d1b86bd13..0ee9bc760 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -57,6 +57,17 @@ try: except ImportError: # Python 2 import cookielib as compat_cookiejar +if sys.version_info[0] == 2: + class compat_cookiejar_Cookie(compat_cookiejar.Cookie): + def __init__(self, version, name, value, *args, **kwargs): + if isinstance(name, compat_str): + name = name.encode() + if isinstance(value, compat_str): + value = value.encode() + compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs) +else: + compat_cookiejar_Cookie = compat_cookiejar.Cookie + try: import http.cookies as compat_cookies except ImportError: # Python 2 @@ -2987,6 +2998,7 @@ __all__ = [ 'compat_basestring', 'compat_chr', 'compat_cookiejar', + 'compat_cookiejar_Cookie', 'compat_cookies', 'compat_ctypes_WINFUNCTYPE', 'compat_etree_Element', From 83c6b7f224bf628b2cd0ddf98ddf6510d0d64d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2020 06:00:37 +0700 Subject: [PATCH 042/123] [extractor/common] Use compat_cookiejar_Cookie for _set_cookie (closes #23256, closes #24776) To always ensure cookie name and value are bytestrings on python 2. --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e9306d806..a61753b17 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -15,7 +15,7 @@ import time import math from ..compat import ( - compat_cookiejar, + compat_cookiejar_Cookie, compat_cookies, compat_etree_Element, compat_etree_fromstring, @@ -2843,7 +2843,7 @@ class InfoExtractor(object): def _set_cookie(self, domain, name, value, expire_time=None, port=None, path='/', secure=False, discard=False, rest={}, **kwargs): - cookie = compat_cookiejar.Cookie( + cookie = compat_cookiejar_Cookie( 0, name, value, port, port is not None, domain, True, domain.startswith('.'), path, True, secure, expire_time, discard, None, None, rest) From ba0582edeaf6e7e83e4bf933dbbd22951c1d7f7d Mon Sep 17 00:00:00 2001 From: hh0rva1h <61889859+hh0rva1h@users.noreply.github.com> Date: Tue, 5 May 2020 01:22:50 +0200 Subject: [PATCH 043/123] [orf] Add support for more radio stations (closes #24938) (#24968) --- youtube_dl/extractor/extractors.py | 10 +++ youtube_dl/extractor/orf.py | 139 ++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7f67256be..18e49ca29 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -808,6 +808,16 @@ from .orf import ( ORFFM4IE, ORFFM4StoryIE, ORFOE1IE, + ORFOE3IE, + ORFNOEIE, + ORFWIEIE, + ORFBGLIE, + ORFOOEIE, + ORFSTMIE, + ORFKTNIE, + ORFSBGIE, + ORFTIRIE, + ORFVBGIE, ORFIPTVIE, ) from .outsidetv import OutsideTVIE diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index d54b8ace6..700ce448c 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -162,13 +162,12 @@ class ORFTVthekIE(InfoExtractor): class ORFRadioIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - station = mobj.group('station') show_date = mobj.group('date') show_id = mobj.group('show') data = self._download_json( 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' - % (station, show_id, show_date), show_id) + % (self._API_STATION, show_id, show_date), show_id) entries = [] for info in data['streams']: @@ -183,7 +182,7 @@ class ORFRadioIE(InfoExtractor): duration = end - start if end and start else None entries.append({ 'id': loop_stream_id.replace('.mp3', ''), - 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id), + 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id), 'title': title, 'description': clean_html(data.get('subtitle')), 'duration': duration, @@ -205,6 +204,8 @@ class ORFFM4IE(ORFRadioIE): IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)' + _API_STATION = 'fm4' + _LOOP_STATION = 'fm4' _TEST = { 'url': 'http://fm4.orf.at/player/20170107/4CC', @@ -223,10 +224,142 @@ class ORFFM4IE(ORFRadioIE): } +class ORFNOEIE(ORFRadioIE): + IE_NAME = 'orf:noe' + IE_DESC = 'Radio Niederösterreich' + _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'noe' + _LOOP_STATION = 'oe2n' + + _TEST = { + 'url': 'https://noe.orf.at/player/20200423/NGM', + 'only_matching': True, + } + + +class ORFWIEIE(ORFRadioIE): + IE_NAME = 'orf:wien' + IE_DESC = 'Radio Wien' + _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'wie' + _LOOP_STATION = 'oe2w' + + _TEST = { + 'url': 'https://wien.orf.at/player/20200423/WGUM', + 'only_matching': True, + } + + +class ORFBGLIE(ORFRadioIE): + IE_NAME = 'orf:burgenland' + IE_DESC = 'Radio Burgenland' + _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'bgl' + _LOOP_STATION = 'oe2b' + + _TEST = { + 'url': 'https://burgenland.orf.at/player/20200423/BGM', + 'only_matching': True, + } + + +class ORFOOEIE(ORFRadioIE): + IE_NAME = 'orf:oberoesterreich' + IE_DESC = 'Radio Oberösterreich' + _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'ooe' + _LOOP_STATION = 'oe2o' + + _TEST = { + 'url': 'https://ooe.orf.at/player/20200423/OGMO', + 'only_matching': True, + } + + +class ORFSTMIE(ORFRadioIE): + IE_NAME = 'orf:steiermark' + IE_DESC = 'Radio Steiermark' + _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'stm' + _LOOP_STATION = 'oe2st' + + _TEST = { + 'url': 'https://steiermark.orf.at/player/20200423/STGMS', + 'only_matching': True, + } + + +class ORFKTNIE(ORFRadioIE): + IE_NAME = 'orf:kaernten' + IE_DESC = 'Radio Kärnten' + _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'ktn' + _LOOP_STATION = 'oe2k' + + _TEST = { + 'url': 'https://kaernten.orf.at/player/20200423/KGUMO', + 'only_matching': True, + } + + +class ORFSBGIE(ORFRadioIE): + IE_NAME = 'orf:salzburg' + IE_DESC = 'Radio Salzburg' + _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'sbg' + _LOOP_STATION = 'oe2s' + + _TEST = { + 'url': 'https://salzburg.orf.at/player/20200423/SGUM', + 'only_matching': True, + } + + +class ORFTIRIE(ORFRadioIE): + IE_NAME = 'orf:tirol' + IE_DESC = 'Radio Tirol' + _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'tir' + _LOOP_STATION = 'oe2t' + + _TEST = { + 'url': 'https://tirol.orf.at/player/20200423/TGUMO', + 'only_matching': True, + } + + +class ORFVBGIE(ORFRadioIE): + IE_NAME = 'orf:vorarlberg' + IE_DESC = 'Radio Vorarlberg' + _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'vbg' + _LOOP_STATION = 'oe2v' + + _TEST = { + 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM', + 'only_matching': True, + } + + +class ORFOE3IE(ORFRadioIE): + IE_NAME = 'orf:oe3' + IE_DESC = 'Radio Österreich 3' + _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'oe3' + _LOOP_STATION = 'oe3' + + _TEST = { + 'url': 'https://oe3.orf.at/player/20200424/3WEK', + 'only_matching': True, + } + + class ORFOE1IE(ORFRadioIE): IE_NAME = 'orf:oe1' IE_DESC = 'Radio Österreich 1' _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' + _API_STATION = 'oe1' + _LOOP_STATION = 'oe1' _TEST = { 'url': 'http://oe1.orf.at/player/20170108/456544', From a0d742b1af7107ac0ca97f80fe6c658c8877a693 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 May 2020 11:19:19 +0100 Subject: [PATCH 044/123] [uol] fix extraction(closes #22007) --- youtube_dl/extractor/uol.py | 139 ++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 77 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index 08f0c072e..628adf219 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -2,12 +2,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_urlencode, +) from ..utils import ( clean_html, int_or_none, parse_duration, + parse_iso8601, + qualities, update_url_query, - str_or_none, ) @@ -16,21 +21,25 @@ class UOLIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)' _TESTS = [{ 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931', - 'md5': '25291da27dc45e0afb5718a8603d3816', + 'md5': '4f1e26683979715ff64e4e29099cf020', 'info_dict': { 'id': '15951931', 'ext': 'mp4', 'title': 'Miss simpatia é encontrada morta', 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2', + 'timestamp': 1470421860, + 'upload_date': '20160805', } }, { 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326', - 'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9', + 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2', 'info_dict': { 'id': '15954259', 'ext': 'mp4', 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres', 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.', + 'timestamp': 1470674520, + 'upload_date': '20160808', } }, { 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931', @@ -55,91 +64,55 @@ class UOLIE(InfoExtractor): 'only_matching': True, }] - _FORMATS = { - '2': { - 'width': 640, - 'height': 360, - }, - '5': { - 'width': 1280, - 'height': 720, - }, - '6': { - 'width': 426, - 'height': 240, - }, - '7': { - 'width': 1920, - 'height': 1080, - }, - '8': { - 'width': 192, - 'height': 144, - }, - '9': { - 'width': 568, - 'height': 320, - }, - '11': { - 'width': 640, - 'height': 360, - } - } - def _real_extract(self, url): video_id = self._match_id(url) - media_id = None - - if video_id.isdigit(): - media_id = video_id - - if not media_id: - embed_page = self._download_webpage( - 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, - video_id, 'Downloading embed page', fatal=False) - if embed_page: - media_id = self._search_regex( - (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'), - embed_page, 'media id', default=None) - - if not media_id: - webpage = self._download_webpage(url, video_id) - media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id') video_data = self._download_json( - 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, - media_id)['item'] + # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID] + 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id, + video_id)['item'] + media_id = compat_str(video_data['mediaId']) title = video_data['title'] + ver = video_data.get('revision', 2) - query = { - 'ver': video_data.get('numRevision', 2), - 'r': 'http://mais.uol.com.br', - } - for k in ('token', 'sign'): - v = video_data.get(k) - if v: - query[k] = v - + uol_formats = self._download_json( + 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id, + media_id) + quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p']) formats = [] - for f in video_data.get('formats', []): + for format_id, f in uol_formats.items(): + if not isinstance(f, dict): + continue f_url = f.get('url') or f.get('secureUrl') if not f_url: continue + query = { + 'ver': ver, + 'r': 'http://mais.uol.com.br', + } + for k in ('token', 'sign'): + v = f.get(k) + if v: + query[k] = v f_url = update_url_query(f_url, query) - format_id = str_or_none(f.get('id')) - if format_id == '10': - formats.extend(self._extract_m3u8_formats( - f_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) + format_id = format_id + if format_id == 'HLS': + m3u8_formats = self._extract_m3u8_formats( + f_url, media_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + encoded_query = compat_urllib_parse_urlencode(query) + for m3u8_f in m3u8_formats: + m3u8_f['extra_param_to_segment_url'] = encoded_query + m3u8_f['url'] = update_url_query(m3u8_f['url'], query) + formats.extend(m3u8_formats) continue - fmt = { + formats.append({ 'format_id': format_id, 'url': f_url, - 'source_preference': 1, - } - fmt.update(self._FORMATS.get(format_id, {})) - formats.append(fmt) - self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext')) + 'quality': quality(format_id), + 'preference': -1, + }) + self._sort_formats(formats) tags = [] for tag in video_data.get('tags', []): @@ -148,12 +121,24 @@ class UOLIE(InfoExtractor): continue tags.append(tag_description) + thumbnails = [] + for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'): + q_url = video_data.get('thumb' + q) + if not q_url: + continue + thumbnails.append({ + 'id': q, + 'url': q_url, + }) + return { 'id': media_id, 'title': title, - 'description': clean_html(video_data.get('desMedia')), - 'thumbnail': video_data.get('thumbnail'), - 'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')), + 'description': clean_html(video_data.get('description')), + 'thumbnails': thumbnails, + 'duration': parse_duration(video_data.get('duration')), 'tags': tags, 'formats': formats, + 'timestamp': parse_iso8601(video_data.get('publishDate'), ' '), + 'view_count': int_or_none(video_data.get('viewsQtty')), } From d66aed88bf0671b9b149ae836b5deb914488d158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2020 20:01:25 +0700 Subject: [PATCH 045/123] [downloader/http] Finish downloading once received data length matches expected Always do this if possible, i.e. if Content-Length or expected length is known, not only in test. This will save unnecessary last extra loop trying to read 0 bytes. --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 3c72ea18b..970103a8d 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -299,7 +299,7 @@ class HttpFD(FileDownloader): 'elapsed': now - ctx.start_time, }) - if is_test and byte_counter == data_len: + if data_len is not None and byte_counter == data_len: break if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: From 41c1621f268182f4c8f1631cfa8705f3c651c3d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2020 21:30:27 +0700 Subject: [PATCH 046/123] [downloader/http] Request last data block of exact remaining size Always request last data block of exact size remaining to download if possible not the current block size. --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 970103a8d..5046878df 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -227,7 +227,7 @@ class HttpFD(FileDownloader): while True: try: # Download and write - data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) + data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter)) # socket.timeout is a subclass of socket.error but may not have # errno set except socket.timeout as e: From c81040f0f1cf84644fd82f5b7b56430e9c136bc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 May 2020 23:20:14 +0700 Subject: [PATCH 047/123] [iprima] Improve extraction (closes #25138) --- youtube_dl/extractor/iprima.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 11bbeb592..53a550c11 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor): _GEO_BYPASS = False _TESTS = [{ - 'url': 'http://play.iprima.cz/gondici-s-r-o-33', + 'url': 'https://prima.iprima.cz/particka/92-epizoda', 'info_dict': { - 'id': 'p136534', + 'id': 'p51388', 'ext': 'mp4', - 'title': 'Gondíci s. r. o. (34)', - 'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd', + 'title': 'Partička (92)', + 'description': 'md5:859d53beae4609e6dd7796413f1b6cac', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, + }, { + 'url': 'https://cnn.iprima.cz/videa/70-epizoda', + 'info_dict': { + 'id': 'p681554', + 'ext': 'mp4', + 'title': 'HLAVNÍ ZPRÁVY 3.5.2020', }, 'params': { 'skip_download': True, # m3u8 download @@ -68,9 +78,15 @@ class IPrimaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r'<h1>([^<]+)', webpage, 'title') + video_id = self._search_regex( (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', - r'data-product="([^"]+)">'), + r'data-product="([^"]+)">', + r'id=["\']player-(p\d+)"', + r'playerId\s*:\s*["\']player-(p\d+)'), webpage, 'real id') playerpage = self._download_webpage( @@ -125,8 +141,8 @@ class IPrimaIE(InfoExtractor): return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'formats': formats, - 'description': self._og_search_description(webpage), + 'description': self._og_search_description(webpage, default=None), } From 70c1610701fa647648574fd86e908d95b7fea9e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 May 2020 17:42:30 +0700 Subject: [PATCH 048/123] [youtube] Improve signature cipher extraction (closes #25188) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5ea66c962..9f7483905 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1966,7 +1966,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url = url_or_none(fmt.get('url')) if not url: - cipher = fmt.get('cipher') + cipher = fmt.get('cipher') or fmt.get('signatureCipher') if not cipher: continue url_data = compat_parse_qs(cipher) From a03c5d5db839dadf2ceebe6ba46738725b715fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 May 2020 18:07:05 +0700 Subject: [PATCH 049/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 200df7c03..fd68814db 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version <unreleased> + +Core +* [downloader/http] Request last data block of exact remaining size +* [downloader/http] Finish downloading once received data length matches + expected +* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always + ensure cookie name and value are bytestrings on python 2 (#23256, #24776) ++ [compat] Introduce compat_cookiejar_Cookie +* [utils] Improve cookie files support + + Add support for UTF-8 in cookie files + * Skip malformed cookie file entries instead of crashing (invalid entry + length, invalid expires at) + +Extractors +* [youtube] Improve signature cipher extraction (#25187, #25188) +* [iprima] Improve extraction (#25138) +* [uol] Fix extraction (#22007) ++ [orf] Add support for more radio stations (#24938, #24968) +* [dailymotion] Fix typo +- [puhutv] Remove no longer available HTTP formats (#25124) + + version 2020.05.03 Core From a7aca308c4a8b78327e55d8970dc3d8b57363f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 May 2020 18:10:37 +0700 Subject: [PATCH 050/123] release 2020.05.08 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 10 ++++++++++ youtube_dl/version.py | 2 +- 8 files changed, 24 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 487de9298..4999154e6 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.05.03** +- [ ] I've verified that I'm running youtube-dl version **2020.05.08** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.03 + [debug] youtube-dl version 2020.05.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index da4b17db9..be994f368 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.05.03** +- [ ] I've verified that I'm running youtube-dl version **2020.05.08** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e64e39516..f05326c8c 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.03** +- [ ] I've verified that I'm running youtube-dl version **2020.05.08** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 11ac95173..0dbb867cd 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.05.03** +- [ ] I've verified that I'm running youtube-dl version **2020.05.08** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.03 + [debug] youtube-dl version 2020.05.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index c75c2a073..4b31c88ed 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.03** +- [ ] I've verified that I'm running youtube-dl version **2020.05.08** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index fd68814db..7805c62b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.05.08 Core * [downloader/http] Request last data block of exact remaining size diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 843dc2dc0..35c1050e5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -620,11 +620,21 @@ - **Ooyala** - **OoyalaExternal** - **OraTV** + - **orf:burgenland**: Radio Burgenland - **orf:fm4**: radio FM4 - **orf:fm4:story**: fm4.orf.at stories - **orf:iptv**: iptv.ORF.at + - **orf:kaernten**: Radio Kärnten + - **orf:noe**: Radio Niederösterreich + - **orf:oberoesterreich**: Radio Oberösterreich - **orf:oe1**: Radio Österreich 1 + - **orf:oe3**: Radio Österreich 3 + - **orf:salzburg**: Radio Salzburg + - **orf:steiermark**: Radio Steiermark + - **orf:tirol**: Radio Tirol - **orf:tvthek**: ORF TVthek + - **orf:vorarlberg**: Radio Vorarlberg + - **orf:wien**: Radio Wien - **OsnatelTV** - **OutsideTV** - **PacktPub** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f933eb8ec..b08ee126e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.05.03' +__version__ = '2020.05.08' From acb623ab561995d1346a4341eb3936a45feecc63 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 12 May 2020 20:48:16 +0100 Subject: [PATCH 051/123] [spike] fix Bellator mgid extraction(closes #25195) --- youtube_dl/extractor/spike.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 7c11ea7aa..aabff7a3c 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -8,15 +8,10 @@ class BellatorIE(MTVServicesInfoExtractor): _TESTS = [{ 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', 'info_dict': { - 'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4', - 'ext': 'mp4', - 'title': 'Douglas Lima vs. Paul Daley - Round 1', - 'description': 'md5:805a8dd29310fd611d32baba2f767885', - }, - 'params': { - # m3u8 download - 'skip_download': True, + 'title': 'Michael Page vs. Evangelista Cyborg', + 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05', }, + 'playlist_count': 3, }, { 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', 'only_matching': True, @@ -25,6 +20,9 @@ class BellatorIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.bellator.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] + def _extract_mgid(self, webpage): + return self._extract_triforce_mgid(webpage) + class ParamountNetworkIE(MTVServicesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' From 160cc06fad3feda5ff7b5a0137ed696fcc6ef4ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 14 May 2020 05:11:42 +0700 Subject: [PATCH 052/123] [bbccouk] PEP8 --- youtube_dl/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 901c5a54f..002c39c39 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -528,7 +528,7 @@ class BBCCoUkIE(InfoExtractor): def get_programme_id(item): def get_from_attributes(item): - for p in('identifier', 'group'): + for p in ('identifier', 'group'): value = item.get(p) if value and re.match(r'^[pb][\da-z]{7}$', value): return value From eadc854eae26a15e0632c5ea734c06599020be6e Mon Sep 17 00:00:00 2001 From: comsomisha <shmelev1996@mail.ru> Date: Thu, 14 May 2020 01:51:40 +0300 Subject: [PATCH 053/123] [mailru] Fix extraction (closes #24530) (#25239) --- youtube_dl/extractor/mailru.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 50234798b..65cc474db 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -128,6 +128,12 @@ class MailRuIE(InfoExtractor): 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') + headers = {} + + video_key = self._get_cookies('https://my.mail.ru').get('video_key') + if video_key: + headers['Cookie'] = 'video_key=%s' % video_key.value + formats = [] for f in video_data['videos']: video_url = f.get('url') @@ -140,6 +146,7 @@ class MailRuIE(InfoExtractor): 'url': video_url, 'format_id': format_id, 'height': height, + 'http_headers': headers, }) self._sort_formats(formats) From 56586029b29b9ba6a9726bb7e897c1bf940fd167 Mon Sep 17 00:00:00 2001 From: TotalCaesar659 <14265316+TotalCaesar659@users.noreply.github.com> Date: Thu, 14 May 2020 01:53:17 +0300 Subject: [PATCH 054/123] [README.md] flake8 HTTPS URL (#25230) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 12dc00b3e..45326c69e 100644 --- a/README.md +++ b/README.md @@ -1032,7 +1032,7 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 youtube_dl/extractor/yourextractor.py From 0d0782e98cdad0f93c8e322f52371372c3f76d25 Mon Sep 17 00:00:00 2001 From: Juan Francisco Cantero Hurtado <iam@juanfra.info> Date: Thu, 14 May 2020 00:54:42 +0200 Subject: [PATCH 055/123] [youtube] Add support for yewtu.be (#25226) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9f7483905..2cf79e74d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -388,6 +388,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.drycat\.fr/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| + (?:www\.)?yewtu\.be/| (?:www\.)?yt\.elukerio\.org/| (?:www\.)?yt\.lelux\.fi/| (?:www\.)?kgg2m7yk5aybusll\.onion/| From 506a3befee9893a2e662e490466878d0750c0cfe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 15 May 2020 14:12:31 +0100 Subject: [PATCH 056/123] [soundcloud] reduce API playlist page limit(closes #25274) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 422ce1626..d37c52543 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -559,7 +559,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): def _extract_playlist(self, base_url, playlist_id, playlist_title): COMMON_QUERY = { - 'limit': 2000000000, + 'limit': 80000, 'linked_partitioning': '1', } From c9f7b015218c134a0228f414f347efa0a584d5d3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 15 May 2020 15:57:06 +0100 Subject: [PATCH 057/123] [vimeo] improve format extraction and sorting(closes #25285) --- youtube_dl/extractor/vimeo.py | 43 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 3d243c191..80c71b173 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -140,28 +140,28 @@ class VimeoBaseInfoExtractor(InfoExtractor): }) # TODO: fix handling of 308 status code returned for live archive manifest requests + sep_pattern = r'/sep/video/' for files_type in ('hls', 'dash'): for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): manifest_url = cdn_data.get('url') if not manifest_url: continue format_id = '%s-%s' % (files_type, cdn_name) - if files_type == 'hls': - formats.extend(self._extract_m3u8_formats( - manifest_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id, - note='Downloading %s m3u8 information' % cdn_name, - fatal=False)) - elif files_type == 'dash': - mpd_pattern = r'/%s/(?:sep/)?video/' % video_id - mpd_manifest_urls = [] - if re.search(mpd_pattern, manifest_url): - for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): - mpd_manifest_urls.append((format_id + suffix, re.sub( - mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url))) - else: - mpd_manifest_urls = [(format_id, manifest_url)] - for f_id, m_url in mpd_manifest_urls: + sep_manifest_urls = [] + if re.search(sep_pattern, manifest_url): + for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): + sep_manifest_urls.append((format_id + suffix, re.sub( + sep_pattern, '/%s/' % repl, manifest_url))) + else: + sep_manifest_urls = [(format_id, manifest_url)] + for f_id, m_url in sep_manifest_urls: + if files_type == 'hls': + formats.extend(self._extract_m3u8_formats( + m_url, video_id, 'mp4', + 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id, + note='Downloading %s m3u8 information' % cdn_name, + fatal=False)) + elif files_type == 'dash': if 'json=1' in m_url: real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url') if real_m_url: @@ -170,11 +170,6 @@ class VimeoBaseInfoExtractor(InfoExtractor): m_url.replace('/master.json', '/master.mpd'), video_id, f_id, 'Downloading %s MPD information' % cdn_name, fatal=False) - for f in mpd_formats: - if f.get('vcodec') == 'none': - f['preference'] = -50 - elif f.get('acodec') == 'none': - f['preference'] = -40 formats.extend(mpd_formats) live_archive = live_event.get('archive') or {} @@ -186,6 +181,12 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'preference': 1, }) + for f in formats: + if f.get('vcodec') == 'none': + f['preference'] = -50 + elif f.get('acodec') == 'none': + f['preference'] = -40 + subtitles = {} text_tracks = config['request'].get('text_tracks') if text_tracks: From 5e75fc80831e390c34e5e3e01745070dddd1c61e Mon Sep 17 00:00:00 2001 From: Dave Loyall <dave@the-good-guys.net> Date: Tue, 19 May 2020 14:11:05 -0500 Subject: [PATCH 058/123] [redtube] Improve title extraction (#25208) --- youtube_dl/extractor/redtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index b1bde1e81..deb3ad52c 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -57,7 +57,7 @@ class RedTubeIE(InfoExtractor): if not info.get('title'): info['title'] = self._html_search_regex( - (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), webpage, 'title', group='title', default=None) or self._og_search_title(webpage) From 45684945ea58cf57ccf7b73c48995304ec98306b Mon Sep 17 00:00:00 2001 From: tlsssl <63866177+tlsssl@users.noreply.github.com> Date: Tue, 19 May 2020 19:13:06 +0000 Subject: [PATCH 059/123] [indavideo] Switch to HTTPS for API request (#25191) --- youtube_dl/extractor/indavideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 2b5b2b5b0..4c16243ec 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -58,7 +58,7 @@ class IndavideoEmbedIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, + 'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, video_id)['data'] title = video['title'] From 2688c1482729fef5f80e23c522c3c361d7cc3537 Mon Sep 17 00:00:00 2001 From: Rob <ankenyr@gmail.com> Date: Tue, 19 May 2020 13:21:52 -0700 Subject: [PATCH 060/123] [utils] Fix file permissions in write_json_file (closes #12471) (#25122) --- youtube_dl/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 112279ed7..d1eca3760 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1837,6 +1837,12 @@ def write_json_file(obj, fn): os.unlink(fn) except OSError: pass + try: + mask = os.umask(0) + os.umask(mask) + os.chmod(tf.name, 0o666 & ~mask) + except OSError: + pass os.rename(tf.name, fn) except Exception: try: From 6ae7f9a781d716f36d4633e198839d1c64c471bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 20 May 2020 03:39:41 +0700 Subject: [PATCH 061/123] [redtube] Improve formats extraction and extract m3u8 formats (closes #25311, closes #25321) --- youtube_dl/extractor/redtube.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index deb3ad52c..2d2f6a98c 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, merge_dicts, @@ -77,7 +78,7 @@ class RedTubeIE(InfoExtractor): }) medias = self._parse_json( self._search_regex( - r'mediaDefinition\s*:\s*(\[.+?\])', webpage, + r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, 'media definitions', default='{}'), video_id, fatal=False) if medias and isinstance(medias, list): @@ -85,6 +86,12 @@ class RedTubeIE(InfoExtractor): format_url = url_or_none(media.get('videoUrl')) if not format_url: continue + if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + continue format_id = media.get('quality') formats.append({ 'url': format_url, From a12a7b5fb1058fa5acb5156ff14e39cc973a4747 Mon Sep 17 00:00:00 2001 From: Michael Klein <github@a98shuttle.de> Date: Tue, 19 May 2020 23:08:08 +0200 Subject: [PATCH 062/123] [ard] Improve _VALID_URL (closes #25134) (#25198) --- youtube_dl/extractor/ard.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 2f47e21c3..e23b71466 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -249,7 +249,7 @@ class ARDMediathekIE(ARDMediathekBaseIE): class ARDIE(InfoExtractor): - _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html' + _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html' _TESTS = [{ # available till 14.02.2019 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html', @@ -263,6 +263,9 @@ class ARDIE(InfoExtractor): 'upload_date': '20180214', 'thumbnail': r're:^https?://.*\.jpg$', }, + }, { + 'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html', + 'only_matching': True, }, { 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', 'only_matching': True, From 3f46f0b4544696f0063098f2b9b74694b5972370 Mon Sep 17 00:00:00 2001 From: JordanWeatherby <47519158+JordanWeatherby@users.noreply.github.com> Date: Wed, 20 May 2020 22:30:50 +0100 Subject: [PATCH 063/123] [giantbomb] Extend _VALID_URL (#25222) --- youtube_dl/extractor/giantbomb.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py index 6a1b1e96e..c6477958d 100644 --- a/youtube_dl/extractor/giantbomb.py +++ b/youtube_dl/extractor/giantbomb.py @@ -13,10 +13,10 @@ from ..utils import ( class GiantBombIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)' + _TESTS = [{ 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/', - 'md5': 'c8ea694254a59246a42831155dec57ac', + 'md5': '132f5a803e7e0ab0e274d84bda1e77ae', 'info_dict': { 'id': '2300-9782', 'display_id': 'quick-look-destiny-the-dark-below', @@ -26,7 +26,10 @@ class GiantBombIE(InfoExtractor): 'duration': 2399, 'thumbnail': r're:^https?://.*\.jpg$', } - } + }, { + 'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 4cce94b5d7f70084066c2e97f2200bd3f29a19cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 May 2020 12:26:21 +0700 Subject: [PATCH 064/123] [postprocessor/ffmpeg] Embed series metadata with --add-metadata --- youtube_dl/postprocessor/ffmpeg.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index fd3f921a8..5f7298345 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -447,6 +447,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor): metadata[meta_f] = info[info_f] break + # See [1-4] for some info on media metadata/metadata supported + # by ffmpeg. + # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/ + # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata + # 3. https://kodi.wiki/view/Video_file_tagging + # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html + add('title', ('track', 'title')) add('date', 'upload_date') add(('description', 'comment'), 'description') @@ -457,6 +464,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('album') add('album_artist') add('disc', 'disc_number') + add('show', 'series') + add('season_number') + add('episode_id', ('episode', 'episode_id')) + add('episode_sort', 'episode_number') if not metadata: self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') From 341ceccf82c5c1a7421ed068882a1729a574acde Mon Sep 17 00:00:00 2001 From: "striker.sh" <19488257+strikersh@users.noreply.github.com> Date: Tue, 26 May 2020 20:26:45 +0200 Subject: [PATCH 065/123] [youtube] Add support for more invidious instances (#25417) --- youtube_dl/extractor/youtube.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2cf79e74d..fec17987b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -391,6 +391,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?yewtu\.be/| (?:www\.)?yt\.elukerio\.org/| (?:www\.)?yt\.lelux\.fi/| + (?:www\.)?invidious\.ggc-project\.de/| + (?:www\.)?yt\.maisputain\.ovh/| + (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?invidious\.toot\.koeln/| + (?:www\.)?invidious\.fdn\.fr/| + (?:www\.)?watch\.nettohikari\.com/| (?:www\.)?kgg2m7yk5aybusll\.onion/| (?:www\.)?qklhadlycap4cnod\.onion/| (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| @@ -398,6 +404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/| (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/| (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/| + (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: From a04eff8edc5bca9230e4e04bb0f71a37084f34de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 29 May 2020 02:01:51 +0700 Subject: [PATCH 066/123] [ard:beta] Extend _VALID_URL (closes #25405) --- youtube_dl/extractor/ard.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index e23b71466..5b7b2dd6d 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -313,9 +313,9 @@ class ARDIE(InfoExtractor): class ARDBetaMediathekIE(ARDMediathekBaseIE): - _VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/(?P<client>[^/]+)/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?' + _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita', + 'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f', 'info_dict': { 'display_id': 'die-robuste-roswita', @@ -328,6 +328,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'upload_date': '20191222', 'ext': 'mp4', }, + }, { + 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', + 'only_matching': True, + }, { + 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/', + 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/', + 'only_matching': True, }, { 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/', 'only_matching': True, @@ -339,7 +348,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('video_id') - display_id = mobj.group('display_id') or video_id + display_id = mobj.group('display_id') + if display_id: + display_id = display_id.rstrip('/') + if not display_id: + display_id = video_id player_page = self._download_json( 'https://api.ardmediathek.de/public-gateway', From 2b6c67cb20b1082b46e7f1332f2b5d5c8613537c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 29 May 2020 03:28:44 +0700 Subject: [PATCH 067/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7805c62b6..e174aad58 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version <unreleased> + +Core +* [postprocessor/ffmpeg] Embed series metadata with --add-metadata +* [utils] Fix file permissions in write_json_file (#12471, #25122) + +Extractors +* [ard:beta] Extend URL regular expression (#25405) ++ [youtube] Add support for more invidious instances (#25417) +* [giantbomb] Extend URL regular expression (#25222) +* [ard] Improve URL regular expression (#25134, #25198) +* [redtube] Improve formats extraction and extract m3u8 formats (#25311, + #25321) +* [indavideo] Switch to HTTPS for API request (#25191) +* [redtube] Improve title extraction (#25208) +* [vimeo] Improve format extraction and sorting (#25285) +* [soundcloud] Reduce API playlist page limit (#25274) ++ [youtube] Add support for yewtu.be (#25226) +* [mailru] Fix extraction (#24530, #25239) +* [bellator] Fix mgid extraction (#25195) + + version 2020.05.08 Core From 1b1de85aace33791e353528c3e3e813064feeb99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 29 May 2020 03:33:13 +0700 Subject: [PATCH 068/123] release 2020.05.29 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 4999154e6..09bf763cd 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.05.08** +- [ ] I've verified that I'm running youtube-dl version **2020.05.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.08 + [debug] youtube-dl version 2020.05.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index be994f368..dc9b67cc8 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.05.08** +- [ ] I've verified that I'm running youtube-dl version **2020.05.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index f05326c8c..129ca0a02 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.08** +- [ ] I've verified that I'm running youtube-dl version **2020.05.29** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 0dbb867cd..40e53bcae 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.05.08** +- [ ] I've verified that I'm running youtube-dl version **2020.05.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.08 + [debug] youtube-dl version 2020.05.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4b31c88ed..619a45f19 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.08** +- [ ] I've verified that I'm running youtube-dl version **2020.05.29** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ac759ddc4..58ab3a4b8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -153,7 +153,7 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 youtube_dl/extractor/yourextractor.py diff --git a/ChangeLog b/ChangeLog index e174aad58..c13035c89 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.05.29 Core * [postprocessor/ffmpeg] Embed series metadata with --add-metadata diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b08ee126e..966fb3aa9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.05.08' +__version__ = '2020.05.29' From 748fa251c0f3c70296a04671cbd770eb9e67117a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2020 11:10:31 +0700 Subject: [PATCH 069/123] [jwplatform] Improve embeds extraction (closes #25467) --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 2aabd98b5..dfa07e423 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -32,7 +32,7 @@ class JWPlatformIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return re.findall( - r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})', + r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})', webpage) def _real_extract(self, url): From 05fd6a190359f7828c2977f7e0710f8c6b3b4f9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Jun 2020 20:31:51 +0700 Subject: [PATCH 070/123] [periscope] Fix untitled broadcasts (#25482) --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index c02e34aba..b15906390 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -18,7 +18,7 @@ class PeriscopeBaseIE(InfoExtractor): item_id, query=query) def _parse_broadcast_data(self, broadcast, video_id): - title = broadcast['status'] + title = broadcast.get('status') or 'Periscope Broadcast' uploader = broadcast.get('user_display_name') or broadcast.get('username') title = '%s - %s' % (uploader, title) if uploader else title is_live = broadcast.get('state').lower() == 'running' From 1b184e7898216d001a80410b3bdd6ecf4c58d2dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Jun 2020 20:32:57 +0700 Subject: [PATCH 071/123] [twitter:broadcast] Add untitled periscope broadcast test --- youtube_dl/extractor/twitter.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 01468981c..4284487db 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -578,6 +578,18 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): IE_NAME = 'twitter:broadcast' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})' + _TEST = { + # untitled Periscope video + 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj', + 'info_dict': { + 'id': '1yNGaQLWpejGj', + 'ext': 'mp4', + 'title': 'Andrea May Sahouri - Periscope Broadcast', + 'uploader': 'Andrea May Sahouri', + 'uploader_id': '1PXEdBZWpGwKe', + }, + } + def _real_extract(self, url): broadcast_id = self._match_id(url) broadcast = self._call_api( From ff3dabbc3c955c75a8b80f65a3cdd92f8a0c62e4 Mon Sep 17 00:00:00 2001 From: Matej Dujava <mdujava@gmail.com> Date: Mon, 1 Jun 2020 16:11:31 +0200 Subject: [PATCH 072/123] [malltv] Add support for sk.mall.tv (#25445) --- youtube_dl/extractor/malltv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py index e13c2e11a..6f4fd927f 100644 --- a/youtube_dl/extractor/malltv.py +++ b/youtube_dl/extractor/malltv.py @@ -8,7 +8,7 @@ from ..utils import merge_dicts class MallTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', 'md5': '1c4a37f080e1f3023103a7b43458e518', @@ -26,6 +26,9 @@ class MallTVIE(InfoExtractor): }, { 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', 'only_matching': True, + }, { + 'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka', + 'only_matching': True, }] def _real_extract(self, url): From 0235f89a83e95aa2a4dbc5060379c9edca6760bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Jun 2020 23:33:14 +0700 Subject: [PATCH 073/123] [brightcove] Fix subtitles extraction (closes #25540) --- youtube_dl/extractor/brightcove.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 85001b3ad..462815317 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -25,9 +25,11 @@ from ..utils import ( int_or_none, parse_iso8601, smuggle_url, + str_or_none, unescapeHTML, unsmuggle_url, update_url_query, + url_or_none, clean_html, mimetype2ext, UnsupportedError, @@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE): subtitles = {} for text_track in json_data.get('text_tracks', []): - if text_track.get('src'): - subtitles.setdefault(text_track.get('srclang'), []).append({ - 'url': text_track['src'], - }) + if text_track.get('kind') != 'captions': + continue + text_track_url = url_or_none(text_track.get('src')) + if not text_track_url: + continue + lang = (str_or_none(text_track.get('srclang')) + or str_or_none(text_track.get('label')) or 'en').lower() + subtitles.setdefault(lang, []).append({ + 'url': text_track_url, + }) is_live = False duration = float_or_none(json_data.get('duration'), 1000) From d6a72af21182d1987077ab149ff5d6a7e9f3c600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Jun 2020 23:35:57 +0700 Subject: [PATCH 074/123] [brightcove] Sort imports --- youtube_dl/extractor/brightcove.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 462815317..5c22a730d 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -5,34 +5,34 @@ import base64 import re import struct -from .common import InfoExtractor from .adobepass import AdobePassIE +from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, + compat_HTTPError, compat_parse_qs, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, - compat_HTTPError, ) from ..utils import ( - ExtractorError, + clean_html, extract_attributes, + ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, - js_to_json, int_or_none, + js_to_json, + mimetype2ext, parse_iso8601, smuggle_url, str_or_none, unescapeHTML, unsmuggle_url, + UnsupportedError, update_url_query, url_or_none, - clean_html, - mimetype2ext, - UnsupportedError, ) From 7cc62f40e7f4f106cf9705626f7a4ffcffbbf37f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 00:12:47 +0700 Subject: [PATCH 075/123] [twitch] Pass v5 accept header and fix thumbnails extraction (closes #25531) --- youtube_dl/extractor/twitch.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 78ee0115c..45b8a7236 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -21,6 +21,7 @@ from ..utils import ( orderedSet, parse_duration, parse_iso8601, + qualities, try_get, unified_timestamp, update_url_query, @@ -50,7 +51,10 @@ class TwitchBaseIE(InfoExtractor): def _call_api(self, path, item_id, *args, **kwargs): headers = kwargs.get('headers', {}).copy() - headers['Client-ID'] = self._CLIENT_ID + headers.update({ + 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8', + 'Client-ID': self._CLIENT_ID, + }) kwargs['headers'] = headers response = self._download_json( '%s/%s' % (self._API_BASE, path), item_id, @@ -186,12 +190,27 @@ class TwitchItemBaseIE(TwitchBaseIE): is_live = False else: is_live = None + _QUALITIES = ('small', 'medium', 'large') + quality_key = qualities(_QUALITIES) + thumbnails = [] + preview = info.get('preview') + if isinstance(preview, dict): + for thumbnail_id, thumbnail_url in preview.items(): + thumbnail_url = url_or_none(thumbnail_url) + if not thumbnail_url: + continue + if thumbnail_id not in _QUALITIES: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'preference': quality_key(thumbnail_id), + }) return { 'id': info['_id'], 'title': info.get('title') or 'Untitled Broadcast', 'description': info.get('description'), 'duration': int_or_none(info.get('length')), - 'thumbnail': info.get('preview'), + 'thumbnails': thumbnails, 'uploader': info.get('channel', {}).get('display_name'), 'uploader_id': info.get('channel', {}).get('name'), 'timestamp': parse_iso8601(info.get('recorded_at')), From 4371c2a91bd8755ed644becc17772089fd19811d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 00:55:29 +0700 Subject: [PATCH 076/123] [twitch:stream] Fix extraction (closes #25528) --- youtube_dl/extractor/twitch.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 45b8a7236..4cd5f0db4 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -22,6 +22,7 @@ from ..utils import ( parse_duration, parse_iso8601, qualities, + str_or_none, try_get, unified_timestamp, update_url_query, @@ -591,10 +592,18 @@ class TwitchStreamIE(TwitchBaseIE): else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): - channel_id = self._match_id(url) + channel_name = self._match_id(url) + + access_token = self._call_api( + 'api/channels/%s/access_token' % channel_name, channel_name, + 'Downloading access token JSON') + + token = access_token['token'] + channel_id = compat_str(self._parse_json( + token, channel_name)['channel_id']) stream = self._call_api( - 'kraken/streams/%s?stream_type=all' % channel_id.lower(), + 'kraken/streams/%s?stream_type=all' % channel_id, channel_id, 'Downloading stream JSON').get('stream') if not stream: @@ -604,11 +613,9 @@ class TwitchStreamIE(TwitchBaseIE): # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing # an invalid m3u8 URL. Working around by use of original channel name from stream # JSON and fallback to lowercase if it's not available. - channel_id = stream.get('channel', {}).get('name') or channel_id.lower() - - access_token = self._call_api( - 'api/channels/%s/access_token' % channel_id, channel_id, - 'Downloading channel access token') + channel_name = try_get( + stream, lambda x: x['channel']['name'], + compat_str) or channel_name.lower() query = { 'allow_source': 'true', @@ -619,11 +626,11 @@ class TwitchStreamIE(TwitchBaseIE): 'playlist_include_framerate': 'true', 'segment_preference': '4', 'sig': access_token['sig'].encode('utf-8'), - 'token': access_token['token'].encode('utf-8'), + 'token': token.encode('utf-8'), } formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)), + % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)), channel_id, 'mp4') self._prefer_source(formats) @@ -646,8 +653,8 @@ class TwitchStreamIE(TwitchBaseIE): }) return { - 'id': compat_str(stream['_id']), - 'display_id': channel_id, + 'id': str_or_none(stream.get('_id')) or channel_id, + 'display_id': channel_name, 'title': title, 'description': description, 'thumbnails': thumbnails, From 850d856bd0935404a419ca6dcf9612f8d87836e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 00:57:40 +0700 Subject: [PATCH 077/123] [twitch:stream] Expect 400 and 410 HTTP errors from API --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4cd5f0db4..e211cd4c8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -56,7 +56,10 @@ class TwitchBaseIE(InfoExtractor): 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8', 'Client-ID': self._CLIENT_ID, }) - kwargs['headers'] = headers + kwargs.update({ + 'headers': headers, + 'expected_status': (400, 410), + }) response = self._download_json( '%s/%s' % (self._API_BASE, path), item_id, *args, **compat_kwargs(kwargs)) From dba030385ae840bd4b6058cf575adb510d0f0b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 01:35:09 +0700 Subject: [PATCH 078/123] [tele5] Prefer jwplatform over nexx (closes #25533) --- youtube_dl/extractor/tele5.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index 364556a1f..c209eb04f 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -6,14 +6,8 @@ import re from .common import InfoExtractor from .jwplatform import JWPlatformIE from .nexx import NexxIE -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - NO_DEFAULT, - try_get, -) +from ..compat import compat_urlparse +from ..utils import NO_DEFAULT class Tele5IE(InfoExtractor): @@ -30,6 +24,21 @@ class Tele5IE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # jwplatform, nexx unavailable + 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', + 'info_dict': { + 'id': 'WJuiOlUp', + 'ext': 'mp4', + 'upload_date': '20200603', + 'timestamp': 1591214400, + 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters', + 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [JWPlatformIE.ie_key()], }, { 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', 'only_matching': True, @@ -88,15 +97,6 @@ class Tele5IE(InfoExtractor): if not jwplatform_id: jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id') - media = self._download_json( - 'https://cdn.jwplayer.com/v2/media/' + jwplatform_id, - display_id) - nexx_id = try_get( - media, lambda x: x['playlist'][0]['nexx_id'], compat_str) - - if nexx_id: - return nexx_result(nexx_id) - return self.url_result( 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), video_id=jwplatform_id) From dc2ed23c7f048cab1869be82215ca7e54ba994ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 01:44:36 +0700 Subject: [PATCH 079/123] [jwplatform] Add support for bypass geo restriction --- youtube_dl/extractor/jwplatform.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index dfa07e423..c34b5f5e6 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import unsmuggle_url class JWPlatformIE(InfoExtractor): @@ -36,6 +37,10 @@ class JWPlatformIE(InfoExtractor): webpage) def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + }) video_id = self._match_id(url) json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id) return self._parse_jwplayer_data(json_data, video_id) From 108c4f6deb763794923cc4c88379784ac5bbfa70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 01:45:05 +0700 Subject: [PATCH 080/123] [tele5] Bypass geo restriction --- youtube_dl/extractor/tele5.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index c209eb04f..3e1a7a9e6 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -7,11 +7,15 @@ from .common import InfoExtractor from .jwplatform import JWPlatformIE from .nexx import NexxIE from ..compat import compat_urlparse -from ..utils import NO_DEFAULT +from ..utils import ( + NO_DEFAULT, + smuggle_url, +) class Tele5IE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _GEO_COUNTRIES = ['DE'] _TESTS = [{ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', 'info_dict': { @@ -98,5 +102,7 @@ class Tele5IE(InfoExtractor): jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id') return self.url_result( - 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), - video_id=jwplatform_id) + smuggle_url( + 'jwplatform:%s' % jwplatform_id, + {'geo_countries': self._GEO_COUNTRIES}), + ie=JWPlatformIE.ie_key(), video_id=jwplatform_id) From 087c966ab6253d409688771b844b4d79e4a624c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 01:49:27 +0700 Subject: [PATCH 081/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index c13035c89..03b05ca28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version <unreleased> + +Extractors +* [tele5] Bypass geo restriction ++ [jwplatform] Add support for bypass geo restriction +* [tele5] Prefer jwplatform over nexx (#25533) +* [twitch:stream] Expect 400 and 410 HTTP errors from API +* [twitch:stream] Fix extraction (#25528) +* [twitch] Fix thumbnails extraction (#25531) ++ [twitch] Pass v5 Accept HTTP header (#25531) +* [brightcove] Fix subtitles extraction (#25540) ++ [malltv] Add support for sk.mall.tv (#25445) +* [periscope] Fix untitled broadcasts (#25482) +* [jwplatform] Improve embeds extraction (#25467) + + version 2020.05.29 Core From 134453d1b2c2c5e80b40082054172e642f84b1c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 01:51:39 +0700 Subject: [PATCH 082/123] release 2020.06.06 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 09bf763cd..3fe1b1a33 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.29 + [debug] youtube-dl version 2020.06.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index dc9b67cc8..e9f4b880c 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 129ca0a02..bbd34ecab 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 40e53bcae..4299474fa 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.29 + [debug] youtube-dl version 2020.06.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 619a45f19..c9ccc7010 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 03b05ca28..f439f29e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.06.06 Extractors * [tele5] Bypass geo restriction diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 966fb3aa9..30f31f888 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.05.29' +__version__ = '2020.06.06' From 000522151f614916f78462b832a7dd0ad95c1590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 02:14:35 +0700 Subject: [PATCH 083/123] [kaltura] Add support for multiple embeds on a webpage (closes #25523) --- youtube_dl/extractor/generic.py | 18 +++++++++++++++--- youtube_dl/extractor/kaltura.py | 19 +++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ce8252f6a..355067a50 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1708,6 +1708,15 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # multiple kaltura embeds, nsfw + 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html', + 'info_dict': { + 'id': 'kamila-avec-video-jaime-sadomie', + 'title': "Kamila avec vídeo “J'aime sadomie”", + }, + 'playlist_count': 8, + }, { # Non-standard Vimeo embed 'url': 'https://openclassrooms.com/courses/understanding-the-web', @@ -2844,9 +2853,12 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - kaltura_url = KalturaIE._extract_url(webpage) - if kaltura_url: - return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) + kaltura_urls = KalturaIE._extract_urls(webpage) + if kaltura_urls: + return self.playlist_from_matches( + kaltura_urls, video_id, video_title, + getter=lambda x: smuggle_url(x, {'source_url': url}), + ie=KalturaIE.ie_key()) # Look for EaglePlatform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 2d38b758b..49d13460d 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor): @staticmethod def _extract_url(webpage): + urls = KalturaIE._extract_urls(webpage) + return urls[0] if urls else None + + @staticmethod + def _extract_urls(webpage): # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site - mobj = ( - re.search( + finditer = ( + re.finditer( r"""(?xs) kWidget\.(?:thumb)?[Ee]mbed\( \{.*? @@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor): (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) """, webpage) - or re.search( + or re.finditer( r'''(?xs) (?P<q1>["']) (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* @@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor): ) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage) - or re.search( + or re.finditer( r'''(?xs) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) @@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor): (?P=q1) ''', webpage) ) - if mobj: + urls = [] + for mobj in finditer: embed_info = mobj.groupdict() for k, v in embed_info.items(): if v: @@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor): webpage) if service_mobj: url = smuggle_url(url, {'service_url': service_mobj.group('id')}) - return url + urls.append(url) + return urls def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] From 06dc6af04c88ea792c95d8b1ae637fd8a93921b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2020 04:16:31 +0700 Subject: [PATCH 084/123] [youtube] Extract chapters from JSON (closes #24819) --- test/test_youtube_chapters.py | 2 +- youtube_dl/extractor/youtube.py | 63 +++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py index 324ca8525..e69c57377 100644 --- a/test/test_youtube_chapters.py +++ b/test/test_youtube_chapters.py @@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase): for description, duration, expected_chapters in self._TEST_CASES: ie = YoutubeIE() expect_value( - self, ie._extract_chapters(description, duration), + self, ie._extract_chapters_from_description(description, duration), expected_chapters, None) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fec17987b..54ec76db5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1652,8 +1652,63 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id + def _extract_chapters_from_json(self, webpage, video_id, duration): + if not webpage: + return + player = self._parse_json( + self._search_regex( + r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage, + 'player args', default='{}'), + video_id, fatal=False) + if not player or not isinstance(player, dict): + return + watch_next_response = player.get('watch_next_response') + if not isinstance(watch_next_response, compat_str): + return + response = self._parse_json(watch_next_response, video_id, fatal=False) + if not response or not isinstance(response, dict): + return + chapters_list = try_get( + response, + lambda x: x['playerOverlays'] + ['playerOverlayRenderer'] + ['decoratedPlayerBarRenderer'] + ['decoratedPlayerBarRenderer'] + ['playerBar'] + ['chapteredPlayerBarRenderer'] + ['chapters'], + list) + if not chapters_list: + return + + def chapter_time(chapter): + return float_or_none( + try_get( + chapter, + lambda x: x['chapterRenderer']['timeRangeStartMillis'], + int), + scale=1000) + chapters = [] + for next_num, chapter in enumerate(chapters_list, start=1): + start_time = chapter_time(chapter) + if start_time is None: + continue + end_time = (chapter_time(chapters_list[next_num]) + if next_num < len(chapters_list) else duration) + if end_time is None: + continue + title = try_get( + chapter, lambda x: x['chapterRenderer']['title']['simpleText'], + compat_str) + chapters.append({ + 'start_time': start_time, + 'end_time': end_time, + 'title': title, + }) + return chapters + @staticmethod - def _extract_chapters(description, duration): + def _extract_chapters_from_description(description, duration): if not description: return None chapter_lines = re.findall( @@ -1687,6 +1742,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }) return chapters + def _extract_chapters(self, webpage, description, video_id, duration): + return (self._extract_chapters_from_json(webpage, video_id, duration) + or self._extract_chapters_from_description(description, duration)) + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -2324,7 +2383,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Unable to download video annotations', fatal=False, data=urlencode_postdata({xsrf_field_name: xsrf_token})) - chapters = self._extract_chapters(description_original, video_duration) + chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration) # Look for the DASH manifest if self._downloader.params.get('youtube_include_dash_manifest', True): From 46d2733f9215832031a0be6431c8ce11c255cc4b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 14 Jun 2020 13:17:51 +0200 Subject: [PATCH 085/123] [facebook] Support single-video ID links I stumbled upon this at https://www.facebook.com/bwfbadminton/posts/10157127020046316 . No idea how prevalent it is yet. --- youtube_dl/extractor/facebook.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index ce64e2683..610d66745 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -466,15 +466,18 @@ class FacebookIE(InfoExtractor): return info_dict if '/posts/' in url: - entries = [ - self.url_result('facebook:%s' % vid, FacebookIE.ie_key()) - for vid in self._parse_json( - self._search_regex( - r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', - webpage, 'video ids', group='ids'), - video_id)] + video_id_json = self._search_regex( + r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids', + default='') + if video_id_json: + entries = [ + self.url_result('facebook:%s' % vid, FacebookIE.ie_key()) + for vid in self._parse_json(video_id_json, video_id)] + return self.playlist_result(entries, video_id) - return self.playlist_result(entries, video_id) + # Single Video? + video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id') + return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) else: _, info_dict = self._extract_from_url( self._VIDEO_PAGE_TEMPLATE % video_id, From ec9e8b79bd2829e78f1be62eecd919c7afc7c937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 16 Jun 2020 01:59:46 +0700 Subject: [PATCH 086/123] [youtube] Fix playlist and feed extraction (closes #25675) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 54ec76db5..e01c27438 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -70,6 +70,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}' + _YOUTUBE_CLIENT_HEADERS = { + 'x-youtube-client-name': '1', + 'x-youtube-client-version': '1.20200609.04.02', + } + def _set_language(self): self._set_cookie( '.youtube.com', 'PREF', 'f1=50000000&hl=en', @@ -301,7 +306,8 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): 'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''), - transform_source=uppercase_escape) + transform_source=uppercase_escape, + headers=self._YOUTUBE_CLIENT_HEADERS) break except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): @@ -3250,7 +3256,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): more = self._download_json( 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE, 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) + transform_source=uppercase_escape, + headers=self._YOUTUBE_CLIENT_HEADERS) content_html = more['content_html'] more_widget_html = more['load_more_widget_html'] From 6fcbdc54a0a5e6ac3a20a7d8c6e053905c43005c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jul 2020 18:27:19 +0700 Subject: [PATCH 087/123] [wistia] Restrict embed regex (closes #25969) --- youtube_dl/extractor/wistia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 168e5e901..77febd2eb 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -56,7 +56,7 @@ class WistiaIE(InfoExtractor): urls.append(unescapeHTML(match.group('url'))) for match in re.finditer( r'''(?sx) - <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2 + <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1 ''', webpage): urls.append('wistia:%s' % match.group('id')) for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): From 39db658554be7f8b6aa3f7d105162694cab22c11 Mon Sep 17 00:00:00 2001 From: MRWITEK <mrvvitek@gmail.com> Date: Tue, 14 Jul 2020 14:01:15 +0300 Subject: [PATCH 088/123] [youtube] Improve description extraction (closes #25937) (#25980) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ef08bf8cb..c27f2cd95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1930,7 +1930,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription') + video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage) if not smuggled_data.get('force_singlefeed', False): if not self._downloader.params.get('noplaylist'): From 339a68004557e20b31807faf47d443f6ea78dcc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 28 Jul 2020 05:04:50 +0700 Subject: [PATCH 089/123] [youtube] Fix sigfunc name extraction (closes #26134, closes #26135, closes #26136, closes #26137) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c27f2cd95..b35bf03aa 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1384,7 +1384,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', From 5b65f659f6c4659300d94160be3acd63041e6944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 28 Jul 2020 05:07:54 +0700 Subject: [PATCH 090/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 07d6ccd69..a49904c89 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Extractors +* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137) +* [youtube] Improve description extraction (#25937, #25980) +* [wistia] Restrict embed regular expression (#25969) +* [youtube] Prevent excess HTTP 301 (#25786) ++ [youtube:playlists] Extend URL regular expression (#25810) ++ [bellmedia] Add support for cp24.com clip URLs (#25764) +* [brightcove] Improve embed detection (#25674) + + version 2020.06.16.1 Extractors From 4aa9a9a20cd5bdd1ccf227787e51e03513f16f80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 28 Jul 2020 05:13:03 +0700 Subject: [PATCH 091/123] release 2020.07.28 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d29d5366f..f2260db46 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.16.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.06.16.1 + [debug] youtube-dl version 2020.07.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index ee882f98c..8bc05c4ba 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.16.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 23033fe13..98348e0cd 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.16.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 597531330..86706f528 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.16.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.06.16.1 + [debug] youtube-dl version 2020.07.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5cfcb9318..52c2709f9 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.06.16.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index a49904c89..bf515f784 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.07.28 Extractors * [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6b88eb38c..17101fa47 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.06.16.1' +__version__ = '2020.07.28' From 32c4b0b725bd129d69dc5984935a2248149f98ab Mon Sep 17 00:00:00 2001 From: tfvlrue <35318734+tfvlrue@users.noreply.github.com> Date: Sat, 12 Sep 2020 05:35:11 -0400 Subject: [PATCH 092/123] [soundcloud] Reduce pagination limit to fix 502 Bad Gateway errors when listing a user's tracks. (#26557) Per the documentation here https://developers.soundcloud.com/blog/offset-pagination-deprecated the maximum limit is 200, so let's respect that (even if a higher value sometimes works). Co-authored-by: tfvlrue <tfvlrue> --- youtube_dl/extractor/soundcloud.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index d37c52543..a2fddf6d9 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -558,8 +558,10 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): def _extract_playlist(self, base_url, playlist_id, playlist_title): + # Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. + # https://developers.soundcloud.com/blog/offset-pagination-deprecated COMMON_QUERY = { - 'limit': 80000, + 'limit': 200, 'linked_partitioning': '1', } From 0cb11dfc86dc68a81819552b115fbfc0e4656f06 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 12 Sep 2020 19:20:53 +0100 Subject: [PATCH 093/123] [redbulltv] Add support for new redbull.com TV URLs(closes #22037)(closes #22063) --- youtube_dl/extractor/extractors.py | 2 + youtube_dl/extractor/redbulltv.py | 110 +++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bc502979d..881001638 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -922,7 +922,9 @@ from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redbulltv import ( RedBullTVIE, + RedBullEmbedIE, RedBullTVRrnContentIE, + RedBullIE, ) from .reddit import ( RedditIE, diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index dbe1aaded..06945bd0c 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -10,7 +12,7 @@ from ..utils import ( class RedBullTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?P<id>AP-\w+)' + _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)' _TESTS = [{ # film 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', @@ -29,8 +31,8 @@ class RedBullTVIE(InfoExtractor): 'id': 'AP-1PMHKJFCW1W11', 'ext': 'mp4', 'title': 'Grime - Hashtags S2E4', - 'description': 'md5:b5f522b89b72e1e23216e5018810bb25', - 'duration': 904.6, + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + 'duration': 904, }, 'params': { 'skip_download': True, @@ -44,11 +46,15 @@ class RedBullTVIE(InfoExtractor): }, { 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', + 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - + def extract_info(self, video_id): session = self._download_json( 'https://api.redbull.tv/v3/session', video_id, note='Downloading access token', query={ @@ -105,24 +111,104 @@ class RedBullTVIE(InfoExtractor): 'subtitles': subtitles, } + def _real_extract(self, url): + video_id = self._match_id(url) + return self.extract_info(video_id) + + +class RedBullEmbedIE(RedBullTVIE): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' + _TESTS = [{ + # HLS manifest accessible only using assetId + 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', + 'only_matching': True, + }] + _VIDEO_ESSENSE_TMPL = '''... on %s { + videoEssence { + attributes + } + }''' + + def _real_extract(self, url): + rrn_id = self._match_id(url) + asset_id = self._download_json( + 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', + rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'}, + query={ + 'query': '''{ + resource(id: "%s", enforceGeoBlocking: false) { + %s + %s + } +}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), + })['data']['resource']['videoEssence']['attributes']['assetId'] + return self.extract_info(asset_id) + class RedBullTVRrnContentIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) + region, lang, rrn_id = re.search(self._VALID_URL, url).groups() + rrn_id += ':%s-%s' % (lang, region.upper()) + return self.url_result( + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) - webpage = self._download_webpage(url, display_id) - video_url = self._og_search_url(webpage) +class RedBullIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', + 'md5': 'db8271a7200d40053a1809ed0dd574ff', + 'info_dict': { + 'id': 'AA-1MT8DQWA91W14', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + }, + }, { + 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', + 'only_matching': True, + }] + + def _real_extract(self, url): + region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups() + if filter_type == 'episodes': + filter_type = 'episode-videos' + elif filter_type == 'live': + filter_type = 'live-videos' + + rrn_id = self._download_json( + 'https://www.redbull.com/v3/api/graphql/v1/v3/query/%s-%s' % (lang, region.upper()), + display_id, query={ + 'filter[type]': filter_type, + 'filter[uriSlug]': display_id, + 'rb3Schema': 'v1:hero', + })['data']['id'] return self.url_result( - video_url, ie=RedBullTVIE.ie_key(), - video_id=RedBullTVIE._match_id(video_url)) + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) From f07a98da579df6a8c71684628923118fdf5769b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 13 Sep 2020 11:26:11 +0100 Subject: [PATCH 094/123] [redbulltv] improve support for rebull.com TV localized URLS(#22063) --- youtube_dl/extractor/redbulltv.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index 06945bd0c..3aae79f5d 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -192,7 +192,14 @@ class RedBullIE(InfoExtractor): }, { 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', 'only_matching': True, + }, { + # only available on the int-en website so a fallback is need for the API + # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero + 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', + 'only_matching': True, }] + _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] + _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] def _real_extract(self, url): region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups() @@ -201,8 +208,16 @@ class RedBullIE(InfoExtractor): elif filter_type == 'live': filter_type = 'live-videos' + regions = [region.upper()] + if region != 'int': + if region in self._LAT_FALLBACK_MAP: + regions.append('LAT') + if lang in self._INT_FALLBACK_LIST: + regions.append('INT') + locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) + rrn_id = self._download_json( - 'https://www.redbull.com/v3/api/graphql/v1/v3/query/%s-%s' % (lang, region.upper()), + 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, display_id, query={ 'filter[type]': filter_type, 'filter[uriSlug]': display_id, From 614deef199c0a0c9ac0a0b2355f77cb70ebf504e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 13 Sep 2020 18:59:37 +0700 Subject: [PATCH 095/123] [svtplay] Fix id extraction (closes #26576) --- youtube_dl/extractor/svt.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 8e9ec2ca3..2f6887d86 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -231,7 +231,9 @@ class SVTPlayIE(SVTPlayBaseIE): if not svt_id: svt_id = self._search_regex( (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', - r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"'), + r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)', + r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"', + r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), webpage, 'video id') return self._extract_by_video_id(svt_id, webpage) From 6b2868b2334981d821cbb933ed19da8d9ad1ad7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 13 Sep 2020 20:43:50 +0700 Subject: [PATCH 096/123] [googledrive] Use redirect URLs for source format (closes #18877, closes #23919, closes #24689, closes #26565) --- youtube_dl/extractor/googledrive.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 589e4d5c3..f2cc57e44 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -220,19 +220,27 @@ class GoogleDriveIE(InfoExtractor): 'id': video_id, 'export': 'download', }) - urlh = self._request_webpage( - source_url, video_id, note='Requesting source file', - errnote='Unable to request source file', fatal=False) + + def request_source_file(source_url, kind): + return self._request_webpage( + source_url, video_id, note='Requesting %s file' % kind, + errnote='Unable to request %s file' % kind, fatal=False) + urlh = request_source_file(source_url, 'source') if urlh: - def add_source_format(src_url): + def add_source_format(urlh): formats.append({ - 'url': src_url, + # Use redirect URLs as download URLs in order to calculate + # correct cookies in _calc_cookies. + # Using original URLs may result in redirect loop due to + # google.com's cookies mistakenly used for googleusercontent.com + # redirect URLs (see #23919). + 'url': urlh.geturl(), 'ext': determine_ext(title, 'mp4').lower(), 'format_id': 'source', 'quality': 1, }) if urlh.headers.get('Content-Disposition'): - add_source_format(source_url) + add_source_format(urlh) else: confirmation_webpage = self._webpage_read_content( urlh, url, video_id, note='Downloading confirmation page', @@ -242,9 +250,12 @@ class GoogleDriveIE(InfoExtractor): r'confirm=([^&"\']+)', confirmation_webpage, 'confirmation code', fatal=False) if confirm: - add_source_format(update_url_query(source_url, { + confirmed_source_url = update_url_query(source_url, { 'confirm': confirm, - })) + }) + urlh = request_source_file(confirmed_source_url, 'confirmed source') + if urlh and urlh.headers.get('Content-Disposition'): + add_source_format(urlh) if not formats: reason = self._search_regex( From bae063a5d95f434fc877d55fd8edfbb7266dbf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 13 Sep 2020 21:07:25 +0700 Subject: [PATCH 097/123] [srgssr] Extend _VALID_URL (closes #26555, closes #26556, closes #26578) --- youtube_dl/extractor/srgssr.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 170dce87f..f63a1359a 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -114,7 +114,7 @@ class SRGSSRPlayIE(InfoExtractor): [^/]+/(?P<type>video|audio)/[^?]+| popup(?P<type_2>video|audio)player ) - \?id=(?P<id>[0-9a-f\-]{36}|\d+) + \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+) ''' _TESTS = [{ @@ -175,6 +175,12 @@ class SRGSSRPlayIE(InfoExtractor): }, { 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', 'only_matching': True, + }, { + 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'only_matching': True, + }, { + 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260', + 'only_matching': True, }] def _real_extract(self, url): From 08ac256a3f727bcdd084ff08ecd5f847cf26e0eb Mon Sep 17 00:00:00 2001 From: Daniel Peukert <dan.peukert@gmail.com> Date: Sun, 13 Sep 2020 16:23:21 +0200 Subject: [PATCH 098/123] [youtube] Fix empty description extraction (#26575) (closes #26006) --- youtube_dl/extractor/youtube.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6ae2e58c1..02f3ab61a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1264,7 +1264,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, - } + }, + { + # empty description results in an empty string + 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k', + 'info_dict': { + 'id': 'x41yOUIvK2k', + 'ext': 'mp4', + 'title': 'IMG 3456', + 'description': '', + 'upload_date': '20170613', + 'uploader_id': 'ElevageOrVert', + 'uploader': 'ElevageOrVert', + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -1931,7 +1947,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage) + video_description = video_details.get('shortDescription') + if video_description is None: + video_description = self._html_search_meta('description', video_webpage) if not smuggled_data.get('force_singlefeed', False): if not self._downloader.params.get('noplaylist'): From 1fa97b61c4978c0a78c1b69a1c458295a12eab3a Mon Sep 17 00:00:00 2001 From: Derek Land <d.d.land@hhs.nl> Date: Sun, 13 Sep 2020 16:38:16 +0200 Subject: [PATCH 099/123] [rtlnl] Extend _VALID_URL (#26549) (closes #25821) --- youtube_dl/extractor/rtlnl.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index fadca8c17..cf4dc85db 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -14,12 +14,26 @@ class RtlNlIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:(?:www|static)\.)? (?: - rtlxl\.nl/[^\#]*\#!/[^/]+/| + rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/) ) (?P<id>[0-9a-f-]+)''' _TESTS = [{ + # new URL schema + 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', + 'md5': '490428f1187b60d714f34e1f2e3af0b6', + 'info_dict': { + 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1593293400, + 'upload_date': '20200627', + 'duration': 661.08, + }, + }, { + # old URL schema 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'md5': '473d1946c1fdd050b2c0161a4b13c373', 'info_dict': { @@ -31,6 +45,7 @@ class RtlNlIE(InfoExtractor): 'upload_date': '20160429', 'duration': 1167.96, }, + 'skip': '404', }, { # best format available a3t 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', From 01df3020f7557be5a69cb8439438c20b9707856c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 13 Sep 2020 21:42:06 +0700 Subject: [PATCH 100/123] [rtlnl] Extend _VALID_URL for new embed URL schema --- youtube_dl/extractor/rtlnl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index cf4dc85db..9eaa06f25 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -15,7 +15,8 @@ class RtlNlIE(InfoExtractor): https?://(?:(?:www|static)\.)? (?: rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| - rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/) + rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| + embed\.rtl\.nl/\#uuid= ) (?P<id>[0-9a-f-]+)''' @@ -91,6 +92,10 @@ class RtlNlIE(InfoExtractor): }, { 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', 'only_matching': True, + }, { + # new embed URL schema + 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'only_matching': True, }] def _real_extract(self, url): From 07ed2799c4a99cbc7b7f25d5e95eb4d91bdbe897 Mon Sep 17 00:00:00 2001 From: Alex Merkel <mail@alexmerkel.com> Date: Thu, 18 Jun 2020 22:36:44 +0200 Subject: [PATCH 101/123] [postprocessor/embedthumbnail] Add support for non jpeg/png thumbnails (closes #25687) --- youtube_dl/postprocessor/embedthumbnail.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 56be914b8..e2002ab0b 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -41,6 +41,28 @@ class EmbedThumbnailPP(FFmpegPostProcessor): 'Skipping embedding the thumbnail because the file is missing.') return [], info + # Check for mislabeled webp file + with open(encodeFilename(thumbnail_filename), "rb") as f: + b = f.read(16) + if b'\x57\x45\x42\x50' in b: # Binary for WEBP + [thumbnail_filename_path, thumbnail_filename_extension] = os.path.splitext(thumbnail_filename) + if not thumbnail_filename_extension == ".webp": + webp_thumbnail_filename = thumbnail_filename_path + ".webp" + os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_thumbnail_filename)) + thumbnail_filename = webp_thumbnail_filename + + # If not a jpg or png thumbnail, convert it to jpg using ffmpeg + if not os.path.splitext(thumbnail_filename)[1].lower() in ['.jpg', '.png']: + jpg_thumbnail_filename = os.path.splitext(thumbnail_filename)[0] + ".jpg" + jpg_thumbnail_filename = os.path.join(os.path.dirname(jpg_thumbnail_filename), os.path.basename(jpg_thumbnail_filename).replace('%', '_')) # ffmpeg interprets % as image sequence + + self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % thumbnail_filename) + + self.run_ffmpeg(thumbnail_filename, jpg_thumbnail_filename, ['-bsf:v', 'mjpeg2jpeg']) + + os.remove(encodeFilename(thumbnail_filename)) + thumbnail_filename = jpg_thumbnail_filename + if info['ext'] == 'mp3': options = [ '-c', 'copy', '-map', '0', '-map', '1', From 198ac72e3a4b999aabb624aefcd49b906f9dfd12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 14 Sep 2020 03:28:31 +0700 Subject: [PATCH 102/123] [postprocessor/embedthumbnail] Fix issues (closes #25717) * Fix WebP with wrong extension processing * Fix embedding of thumbnails with % character in path --- youtube_dl/postprocessor/embedthumbnail.py | 49 +++++++++++++--------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index e2002ab0b..5a3359588 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -13,6 +13,7 @@ from ..utils import ( encodeFilename, PostProcessingError, prepend_extension, + replace_extension, shell_quote ) @@ -41,27 +42,37 @@ class EmbedThumbnailPP(FFmpegPostProcessor): 'Skipping embedding the thumbnail because the file is missing.') return [], info - # Check for mislabeled webp file - with open(encodeFilename(thumbnail_filename), "rb") as f: - b = f.read(16) - if b'\x57\x45\x42\x50' in b: # Binary for WEBP - [thumbnail_filename_path, thumbnail_filename_extension] = os.path.splitext(thumbnail_filename) - if not thumbnail_filename_extension == ".webp": - webp_thumbnail_filename = thumbnail_filename_path + ".webp" - os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_thumbnail_filename)) - thumbnail_filename = webp_thumbnail_filename + def is_webp(path): + with open(encodeFilename(path), 'rb') as f: + b = f.read(12) + return b[0:4] == b'RIFF' and b[8:] == b'WEBP' - # If not a jpg or png thumbnail, convert it to jpg using ffmpeg - if not os.path.splitext(thumbnail_filename)[1].lower() in ['.jpg', '.png']: - jpg_thumbnail_filename = os.path.splitext(thumbnail_filename)[0] + ".jpg" - jpg_thumbnail_filename = os.path.join(os.path.dirname(jpg_thumbnail_filename), os.path.basename(jpg_thumbnail_filename).replace('%', '_')) # ffmpeg interprets % as image sequence + # Correct extension for WebP file with wrong extension (see #25687, #25717) + _, thumbnail_ext = os.path.splitext(thumbnail_filename) + if thumbnail_ext: + thumbnail_ext = thumbnail_ext[1:].lower() + if thumbnail_ext != 'webp' and is_webp(thumbnail_filename): + self._downloader.to_screen( + '[ffmpeg] Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename) + thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp') + os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename)) + thumbnail_filename = thumbnail_webp_filename + thumbnail_ext = 'webp' - self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % thumbnail_filename) - - self.run_ffmpeg(thumbnail_filename, jpg_thumbnail_filename, ['-bsf:v', 'mjpeg2jpeg']) - - os.remove(encodeFilename(thumbnail_filename)) - thumbnail_filename = jpg_thumbnail_filename + # Convert unsupported thumbnail formats to JPEG (see #25687, #25717) + if thumbnail_ext not in ['jpg', 'png']: + # NB: % is supposed to be escaped with %% but this does not work + # for input files so working around with standard substitution + escaped_thumbnail_filename = thumbnail_filename.replace('%', '#') + os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename)) + escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg') + self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename) + self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg']) + os.remove(encodeFilename(escaped_thumbnail_filename)) + thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg') + # Rename back to unescaped for further processing + os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename)) + thumbnail_filename = thumbnail_jpg_filename if info['ext'] == 'mp3': options = [ From 9d5351f7439a7f461f4158198e3f0ce12d20c39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 14 Sep 2020 03:35:18 +0700 Subject: [PATCH 103/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 86b0e8ccb..041cf7113 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core ++ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails + (#25687, #25717) + +Extractors +* [rtlnl] Extend URL regular expression (#26549, #25821) +* [youtube] Fix empty description extraction (#26575, #26006) +* [srgssr] Extend URL regular expression (#26555, #26556, #26578) +* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689, + #26565) +* [svtplay] Fix id extraction (#26576) +* [redbulltv] Improve support for rebull.com TV localized URLs (#22063) ++ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063) +* [soundcloud:pagedplaylist] Reduce pagination limit (#26557) + + version 2020.09.06 Core From 76fbd329dcc97e17c89399a906d9ff470ba25b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 14 Sep 2020 03:37:36 +0700 Subject: [PATCH 104/123] release 2020.09.14 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 8 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f05aa66e6..352263789 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.06 + [debug] youtube-dl version 2020.09.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 29beaf437..fa6509be3 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index f96b8d2bb..70b0f2f19 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 3a175aa4d..ec17e4a33 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.06 + [debug] youtube-dl version 2020.09.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4977079de..6ac963206 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 041cf7113..4143ec2fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.09.14 Core + [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5c4e1d58c..367545a96 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -717,6 +717,8 @@ - **RayWenderlichCourse** - **RBMARadio** - **RDS**: RDS.ca + - **RedBull** + - **RedBullEmbed** - **RedBullTV** - **RedBullTVRrnContent** - **Reddit** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 45b4d3291..5625b8324 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.09.06' +__version__ = '2020.09.14' From eb37dd2e640d95b45eaa99121b61bd37663d7b3c Mon Sep 17 00:00:00 2001 From: Ori Avtalion <ori@avtalion.name> Date: Thu, 17 Sep 2020 23:15:44 +0300 Subject: [PATCH 105/123] [downloader/http] Retry download when urlopen times out (#26603) (refs #10935) --- youtube_dl/downloader/http.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 5046878df..e14ddce58 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -106,7 +106,12 @@ class HttpFD(FileDownloader): set_range(request, range_start, range_end) # Establish connection try: - ctx.data = self.ydl.urlopen(request) + try: + ctx.data = self.ydl.urlopen(request) + except (compat_urllib_error.URLError, ) as err: + if isinstance(err.reason, socket.timeout): + raise RetryDownload(err) + raise err # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range From 59ba99573a760550935cfc3f7fd83310f9d5f847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 18 Sep 2020 03:32:54 +0700 Subject: [PATCH 106/123] [downloader/http] Improve timeout detection when reading block of data (refs #10935) --- youtube_dl/downloader/http.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index e14ddce58..6ef26548d 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -238,9 +238,11 @@ class HttpFD(FileDownloader): except socket.timeout as e: retry(e) except socket.error as e: - if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): - raise - retry(e) + # SSLError on python 2 (inherits socket.error) may have + # no errno set but this error message + if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out': + retry(e) + raise byte_counter += len(data_block) From 177d707220d96b9cc8d08d8dedc7d07004505d30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 18 Sep 2020 03:41:16 +0700 Subject: [PATCH 107/123] [extractor/common] Handle ssl.CertificateError in _request_webpage (closes #26601) ssl.CertificateError is raised on some python versions <= 3.7.x --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a61753b17..f740ddad1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -10,6 +10,7 @@ import os import random import re import socket +import ssl import sys import time import math @@ -623,9 +624,12 @@ class InfoExtractor(object): url_or_request = update_url_query(url_or_request, query) if data is not None or headers: url_or_request = sanitized_Request(url_or_request, data, headers) + exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error] + if hasattr(ssl, 'CertificateError'): + exceptions.append(ssl.CertificateError) try: return self._downloader.urlopen(url_or_request) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + except tuple(exceptions) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): # Retain reference to error to prevent file object from From 36bf3f2d2cce6b4aa41e955b929277848cca0534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20P=C3=B6schel?= <basicmaster@users.noreply.github.com> Date: Fri, 18 Sep 2020 00:26:56 +0200 Subject: [PATCH 108/123] [downloader/hls] Fix incorrect end byte in Range HTTP header for media segments with EXT-X-BYTERANGE (#24512) (closes #14748) The end of the byte range is the first byte that is NOT part of the to be downloaded range. So don't include it into the requested HTTP download range, as this additional byte leads to a broken TS packet and subsequently to e.g. visible video corruption. Fixes #14748. --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 84bc34928..0f2c06f40 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -141,7 +141,7 @@ class HlsFD(FragmentFD): count = 0 headers = info_dict.get('http_headers', {}) if byte_range: - headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) while count <= fragment_retries: try: success, frag_content = self._download_fragment( From fa52659eb68f3d5d8608766c89ed38d0049227b9 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Fri, 18 Sep 2020 18:59:19 -0400 Subject: [PATCH 109/123] [pornhub] Fix view count extraction (#26621) (refs #26614) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3567a3283..c64c870dc 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -341,7 +341,7 @@ class PornHubIE(PornHubBaseIE): webpage, 'uploader', fatal=False) view_count = self._extract_count( - r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') + r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') like_count = self._extract_count( r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') dislike_count = self._extract_count( From 571b1dd76cc31ff74338e3b6d43aa62cf1055fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 19 Sep 2020 06:13:42 +0700 Subject: [PATCH 110/123] [extractor/common] Extract author as uploader for VideoObject in _json_ld --- youtube_dl/extractor/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f740ddad1..c9b8b6337 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1268,6 +1268,7 @@ class InfoExtractor(object): 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), + 'uploader': str_or_none(e.get('author')), 'filesize': float_or_none(e.get('contentSize')), 'tbr': int_or_none(e.get('bitrate')), 'width': int_or_none(e.get('width')), From d81280607e90ae510949b717782d197375224c76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 19 Sep 2020 06:33:17 +0700 Subject: [PATCH 111/123] [extractor/common] Relax interaction count extraction in _json_ld --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c9b8b6337..021945a89 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -68,6 +68,7 @@ from ..utils import ( sanitized_Request, sanitize_filename, str_or_none, + str_to_int, strip_or_none, unescapeHTML, unified_strdate, @@ -1248,7 +1249,10 @@ class InfoExtractor(object): interaction_type = is_e.get('interactionType') if not isinstance(interaction_type, compat_str): continue - interaction_count = int_or_none(is_e.get('userInteractionCount')) + # For interaction count some sites provide string instead of + # an integer (as per spec) with non digit characters (e.g. ",") + # so extracting count with more relaxed str_to_int + interaction_count = str_to_int(is_e.get('userInteractionCount')) if interaction_count is None: continue count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1]) From 7d3470617e867d6c83288f4d4854fdadfa762d9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 19 Sep 2020 06:34:34 +0700 Subject: [PATCH 112/123] [pornhub] Extract metadata from JSON-LD (closes #26614) --- youtube_dl/extractor/pornhub.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index c64c870dc..529f3f711 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -17,6 +17,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + merge_dicts, NO_DEFAULT, orderedSet, remove_quotes, @@ -59,13 +60,14 @@ class PornHubIE(PornHubBaseIE): ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': '1e19b41231a02eba417839222ac9d58e', + 'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'info_dict': { 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', 'upload_date': '20130628', + 'timestamp': 1372447216, 'duration': 361, 'view_count': int, 'like_count': int, @@ -82,8 +84,8 @@ class PornHubIE(PornHubBaseIE): 'id': '1331683002', 'ext': 'mp4', 'title': '重庆婷婷女王足交', - 'uploader': 'Unknown', 'upload_date': '20150213', + 'timestamp': 1423804862, 'duration': 1753, 'view_count': int, 'like_count': int, @@ -121,6 +123,7 @@ class PornHubIE(PornHubBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'This video has been disabled', }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, @@ -338,7 +341,7 @@ class PornHubIE(PornHubBaseIE): video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', fatal=False) + webpage, 'uploader', default=None) view_count = self._extract_count( r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') @@ -356,7 +359,11 @@ class PornHubIE(PornHubBaseIE): if div: return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) - return { + info = self._search_json_ld(webpage, video_id, default={}) + # description provided in JSON-LD is irrelevant + info['description'] = None + + return merge_dicts({ 'id': video_id, 'uploader': video_uploader, 'upload_date': upload_date, @@ -372,7 +379,7 @@ class PornHubIE(PornHubBaseIE): 'tags': extract_list('tags'), 'categories': extract_list('categories'), 'subtitles': subtitles, - } + }, info) class PornHubPlaylistBaseIE(PornHubBaseIE): From 17f46d0aded14ad41d16033514ed67475c07a5ff Mon Sep 17 00:00:00 2001 From: Patrick Dessalle <patrick@dessalle.be> Date: Wed, 28 Aug 2019 19:04:57 +0200 Subject: [PATCH 113/123] [telequebec] Add support for brightcove videos (closes #25833) --- youtube_dl/extractor/telequebec.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index c82c94b3a..3adea7bc5 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -12,6 +12,8 @@ from ..utils import ( class TeleQuebecBaseIE(InfoExtractor): + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' + @staticmethod def _limelight_result(media_id): return { @@ -21,6 +23,13 @@ class TeleQuebecBaseIE(InfoExtractor): 'ie_key': 'LimelightMedia', } + def _brightcove_result(self, brightcove_id): + return self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'BrightcoveNew', brightcove_id) + class TeleQuebecIE(TeleQuebecBaseIE): _VALID_URL = r'''(?x) @@ -37,7 +46,7 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'id': '577116881b4b439084e6b1cf4ef8b1b3', 'ext': 'mp4', 'title': 'Un petit choc et puis repart!', - 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374', + 'description': 'md5:067bc84bd6afecad85e69d1000730907', }, 'params': { 'skip_download': True, @@ -58,7 +67,10 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, media_id)['media'] - info = self._limelight_result(media_data['streamInfo']['sourceId']) + if media_data['streamInfo']['source'] == 'Brightcove': + info = self._brightcove_result(media_data['streamInfo']['sourceId']) + elif media_data['streamInfo']['source'] == 'Limelight': + info = self._limelight_result(media_data['streamInfo']['sourceId']) info.update({ 'title': media_data.get('title'), 'description': try_get( From e9f7247cb0f268e5b0e2df1a23f901e669560505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 19 Sep 2020 07:52:42 +0700 Subject: [PATCH 114/123] [telequebec] Fix issues (closes #26368) --- youtube_dl/extractor/telequebec.py | 55 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 3adea7bc5..b4c485b9b 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -12,23 +12,24 @@ from ..utils import ( class TeleQuebecBaseIE(InfoExtractor): - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' + @staticmethod + def _result(url, ie_key): + return { + '_type': 'url_transparent', + 'url': smuggle_url(url, {'geo_countries': ['CA']}), + 'ie_key': ie_key, + } @staticmethod def _limelight_result(media_id): - return { - '_type': 'url_transparent', - 'url': smuggle_url( - 'limelight:media:' + media_id, {'geo_countries': ['CA']}), - 'ie_key': 'LimelightMedia', - } + return TeleQuebecBaseIE._result( + 'limelight:media:' + media_id, 'LimelightMedia') - def _brightcove_result(self, brightcove_id): - return self.url_result( - smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['CA']}), - 'BrightcoveNew', brightcove_id) + @staticmethod + def _brightcove_result(brightcove_id): + return TeleQuebecBaseIE._result( + 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' + % brightcove_id, 'BrightcoveNew') class TeleQuebecIE(TeleQuebecBaseIE): @@ -51,6 +52,22 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout', + 'info_dict': { + 'id': '6167180337001', + 'ext': 'mp4', + 'title': 'Le soleil', + 'description': 'md5:64289c922a8de2abbe99c354daffde02', + 'uploader_id': '6150020952001', + 'upload_date': '20200625', + 'timestamp': 1593090307, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], }, { # no description 'url': 'http://zonevideo.telequebec.tv/media/30261', @@ -67,10 +84,14 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, media_id)['media'] - if media_data['streamInfo']['source'] == 'Brightcove': - info = self._brightcove_result(media_data['streamInfo']['sourceId']) - elif media_data['streamInfo']['source'] == 'Limelight': - info = self._limelight_result(media_data['streamInfo']['sourceId']) + source_id = media_data['streamInfo']['sourceId'] + source = (try_get( + media_data, lambda x: x['streamInfo']['source'], + compat_str) or 'limelight').lower() + if source == 'brightcove': + info = self._brightcove_result(source_id) + else: + info = self._limelight_result(source_id) info.update({ 'title': media_data.get('title'), 'description': try_get( From c485e44d2e6bff51247adbf976ca4d744f144f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Sep 2020 10:05:00 +0700 Subject: [PATCH 115/123] [twitch] Switch streams to GraphQL and refactor (closes #26535) --- youtube_dl/extractor/twitch.py | 142 +++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index eadc48c6d..ab6654432 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -24,7 +24,6 @@ from ..utils import ( parse_duration, parse_iso8601, qualities, - str_or_none, try_get, unified_timestamp, update_url_query, @@ -337,19 +336,27 @@ def _make_video_result(node): class TwitchGraphQLBaseIE(TwitchBaseIE): _PAGE_LIMIT = 100 - def _download_gql(self, video_id, op, variables, sha256_hash, note, fatal=True): + _OPERATION_HASHES = { + 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14', + 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb', + 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777', + 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', + 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', + 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', + 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', + } + + def _download_gql(self, video_id, ops, note, fatal=True): + for op in ops: + op['extensions'] = { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': self._OPERATION_HASHES[op['operationName']], + } + } return self._download_json( 'https://gql.twitch.tv/gql', video_id, note, - data=json.dumps({ - 'operationName': op, - 'variables': variables, - 'extensions': { - 'persistedQuery': { - 'version': 1, - 'sha256Hash': sha256_hash, - } - } - }).encode(), + data=json.dumps(ops).encode(), headers={ 'Content-Type': 'text/plain;charset=UTF-8', 'Client-ID': self._CLIENT_ID, @@ -369,14 +376,15 @@ class TwitchCollectionIE(TwitchGraphQLBaseIE): }] _OPERATION_NAME = 'CollectionSideBar' - _SHA256_HASH = '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14' def _real_extract(self, url): collection_id = self._match_id(url) collection = self._download_gql( - collection_id, self._OPERATION_NAME, - {'collectionID': collection_id}, self._SHA256_HASH, - 'Downloading collection GraphQL')['data']['collection'] + collection_id, [{ + 'operationName': self._OPERATION_NAME, + 'variables': {'collectionID': collection_id}, + }], + 'Downloading collection GraphQL')[0]['data']['collection'] title = collection.get('title') entries = [] for edge in collection['items']['edges']: @@ -403,14 +411,16 @@ class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE): if cursor: variables['cursor'] = cursor page = self._download_gql( - channel_name, self._OPERATION_NAME, variables, - self._SHA256_HASH, + channel_name, [{ + 'operationName': self._OPERATION_NAME, + 'variables': variables, + }], 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num), fatal=False) if not page: break edges = try_get( - page, lambda x: x['data']['user'][entries_key]['edges'], list) + page, lambda x: x[0]['data']['user'][entries_key]['edges'], list) if not edges: break for edge in edges: @@ -553,7 +563,6 @@ class TwitchVideosIE(TwitchPlaylistBaseIE): 'views': 'Popular', } - _SHA256_HASH = 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb' _OPERATION_NAME = 'FilterableVideoTower_Videos' _ENTRY_KIND = 'video' _EDGE_KIND = 'VideoEdge' @@ -622,7 +631,6 @@ class TwitchVideosClipsIE(TwitchPlaylistBaseIE): # NB: values other than 20 result in skipped videos _PAGE_LIMIT = 20 - _SHA256_HASH = 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777' _OPERATION_NAME = 'ClipsCards__User' _ENTRY_KIND = 'clip' _EDGE_KIND = 'ClipEdge' @@ -680,7 +688,6 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): 'playlist_mincount': 3, }] - _SHA256_HASH = '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84' _OPERATION_NAME = 'ChannelCollectionsContent' _ENTRY_KIND = 'collection' _EDGE_KIND = 'CollectionsItemEdge' @@ -717,7 +724,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): playlist_title='%s - Collections' % channel_name) -class TwitchStreamIE(TwitchBaseIE): +class TwitchStreamIE(TwitchGraphQLBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'''(?x) https?:// @@ -774,28 +781,43 @@ class TwitchStreamIE(TwitchBaseIE): else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): - channel_name = self._match_id(url) + channel_name = self._match_id(url).lower() - access_token = self._download_access_token(channel_name) + gql = self._download_gql( + channel_name, [{ + 'operationName': 'StreamMetadata', + 'variables': {'channelLogin': channel_name}, + }, { + 'operationName': 'ComscoreStreamingQuery', + 'variables': { + 'channel': channel_name, + 'clipSlug': '', + 'isClip': False, + 'isLive': True, + 'isVodOrCollection': False, + 'vodID': '', + }, + }, { + 'operationName': 'VideoPreviewOverlay', + 'variables': {'login': channel_name}, + }], + 'Downloading stream GraphQL') - token = access_token['token'] - channel_id = self._extract_channel_id(token, channel_name) + user = gql[0]['data']['user'] - stream = self._call_api( - 'kraken/streams/%s?stream_type=all' % channel_id, - channel_id, 'Downloading stream JSON').get('stream') + if not user: + raise ExtractorError( + '%s does not exist' % channel_name, expected=True) + + stream = user['stream'] if not stream: - raise ExtractorError('%s is offline' % channel_id, expected=True) + raise ExtractorError('%s is offline' % channel_name, expected=True) - # Channel name may be typed if different case than the original channel name - # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing - # an invalid m3u8 URL. Working around by use of original channel name from stream - # JSON and fallback to lowercase if it's not available. - channel_name = try_get( - stream, lambda x: x['channel']['name'], - compat_str) or channel_name.lower() + access_token = self._download_access_token(channel_name) + token = access_token['token'] + stream_id = stream.get('id') or channel_name query = { 'allow_source': 'true', 'allow_audio_only': 'true', @@ -808,41 +830,39 @@ class TwitchStreamIE(TwitchBaseIE): 'token': token.encode('utf-8'), } formats = self._extract_m3u8_formats( - '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)), - channel_id, 'mp4') + '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name), + stream_id, 'mp4', query=query) self._prefer_source(formats) view_count = stream.get('viewers') - timestamp = parse_iso8601(stream.get('created_at')) + timestamp = unified_timestamp(stream.get('createdAt')) - channel = stream['channel'] - title = self._live_title(channel.get('display_name') or channel.get('name')) - description = channel.get('status') + sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {} + uploader = sq_user.get('displayName') + description = try_get( + sq_user, lambda x: x['broadcastSettings']['title'], compat_str) - thumbnails = [] - for thumbnail_key, thumbnail_url in stream['preview'].items(): - m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key) - if not m: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) + thumbnail = url_or_none(try_get( + gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'], + compat_str)) + + title = uploader or channel_name + stream_type = stream.get('type') + if stream_type in ['rerun', 'live']: + title += ' (%s)' % stream_type return { - 'id': str_or_none(stream.get('_id')) or channel_id, + 'id': stream_id, 'display_id': channel_name, - 'title': title, + 'title': self._live_title(title), 'description': description, - 'thumbnails': thumbnails, - 'uploader': channel.get('display_name'), - 'uploader_id': channel.get('name'), + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': channel_name, 'timestamp': timestamp, 'view_count': view_count, 'formats': formats, - 'is_live': True, + 'is_live': stream_type == 'live', } From 743a342abe0d41cd0fe992731e8b08c60afbe5dd Mon Sep 17 00:00:00 2001 From: nixxo <c.nixxo@gmail.com> Date: Sun, 20 Sep 2020 06:39:42 +0200 Subject: [PATCH 116/123] [redtube] Extend _VALID_URL (#26506) --- youtube_dl/extractor/redtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 2d2f6a98c..a1ca791ca 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -15,7 +15,7 @@ from ..utils import ( class RedTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.redtube.com/66418', 'md5': 'fc08071233725f26b8f014dba9590005', @@ -31,6 +31,9 @@ class RedTubeIE(InfoExtractor): }, { 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 'only_matching': True, + }, { + 'url': 'http://it.redtube.com/66418', + 'only_matching': True, }] @staticmethod From 4419ace2985fb2c05854dbc11ecdd35607677603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Sep 2020 12:23:38 +0700 Subject: [PATCH 117/123] [ChangeLog] Actualize [ci skip] --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4143ec2fb..7610cab17 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version <unreleased> + +Core +* [extractor/common] Relax interaction count extraction in _json_ld ++ [extractor/common] Extract author as uploader for VideoObject in _json_ld +* [downloader/hls] Fix incorrect end byte in Range HTTP header for + media segments with EXT-X-BYTERANGE (#14748, #24512) +* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601) +* [downloader/http] Improve timeout detection when reading block of data + (#10935) +* [downloader/http] Retry download when urlopen times out (#10935, #26603) + +Extractors +* [redtube] Extend URL regular expression (#26506) +* [twitch] Refactor +* [twitch:stream] Switch to GraphQL and fix reruns (#26535) ++ [telequebec] Add support for brightcove videos (#25833) +* [pornhub] Extract metadata from JSON-LD (#26614) +* [pornhub] Fix view count extraction (#26621, #26614) + + version 2020.09.14 Core From 8c744aa72b7462cab0ad5690f686855e442f0d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Sep 2020 12:30:45 +0700 Subject: [PATCH 118/123] release 2020.09.20 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 352263789..ce0319fe2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.14 + [debug] youtube-dl version 2020.09.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index fa6509be3..a4002603c 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 70b0f2f19..3f8b6ce2e 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index ec17e4a33..d880c225a 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.14 + [debug] youtube-dl version 2020.09.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 6ac963206..dd5fb5144 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 7610cab17..9b52b7bd2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2020.09.20 Core * [extractor/common] Relax interaction count extraction in _json_ld diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5625b8324..709e5c74c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.09.14' +__version__ = '2020.09.20' From 07974b90b224fe1bf0bbedf45f66e9813de82157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Sep 2020 06:44:14 +0700 Subject: [PATCH 119/123] [downloader/http] Fix access to not yet opened stream in retry --- youtube_dl/downloader/http.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 6ef26548d..04da14d91 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -223,9 +223,10 @@ class HttpFD(FileDownloader): def retry(e): to_stdout = ctx.tmpfilename == '-' - if not to_stdout: - ctx.stream.close() - ctx.stream = None + if ctx.stream is not None: + if not to_stdout: + ctx.stream.close() + ctx.stream = None ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) raise RetryDownload(e) From dd7c84f5d39fe818c086a91ff91129c0c684b781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Sep 2020 07:01:59 +0700 Subject: [PATCH 120/123] [downloader/http] Properly handle missing message in SSLError (closes #26646) --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 04da14d91..96379caf1 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -241,7 +241,7 @@ class HttpFD(FileDownloader): except socket.error as e: # SSLError on python 2 (inherits socket.error) may have # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out': + if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': retry(e) raise From e9ba8548d3e898924be569e35254ea4b703bfcf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Sep 2020 06:36:07 +0700 Subject: [PATCH 121/123] [README.md] Fix autonumber sequence description (refs #26686) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 45326c69e..cd8856828 100644 --- a/README.md +++ b/README.md @@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f - `extractor` (string): Name of the extractor - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch when creating the file - - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero + - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` - `playlist` (string): Name or id of the playlist that contains the video - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist - `playlist_id` (string): Playlist identifier From 52d5296dc2f832ddfa0bc413607bd16114e9453c Mon Sep 17 00:00:00 2001 From: Surkal <Surkal@users.noreply.github.com> Date: Thu, 24 Sep 2020 01:46:58 +0200 Subject: [PATCH 122/123] [iprima] Improve video id extraction (#26507) (closes #26494) --- youtube_dl/extractor/iprima.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 53a550c11..648ae6741 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -86,7 +86,8 @@ class IPrimaIE(InfoExtractor): (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', r'data-product="([^"]+)">', r'id=["\']player-(p\d+)"', - r'playerId\s*:\s*["\']player-(p\d+)'), + r'playerId\s*:\s*["\']player-(p\d+)', + r'\bvideos\s*=\s*["\'](p\d+)'), webpage, 'real id') playerpage = self._download_webpage( From a6e3b9a97b8df45944979d1851fe06d345ac5775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Sep 2020 07:36:38 +0700 Subject: [PATCH 123/123] [expressen] Add support for di.se (closes #26670) --- youtube_dl/extractor/expressen.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/expressen.py b/youtube_dl/extractor/expressen.py index f79365038..dc8b855d2 100644 --- a/youtube_dl/extractor/expressen.py +++ b/youtube_dl/extractor/expressen.py @@ -15,7 +15,7 @@ from ..utils import ( class ExpressenIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?:www\.)?expressen\.se/ + (?:www\.)?(?:expressen|di)\.se/ (?:(?:tvspelare/video|videoplayer/embed)/)? tv/(?:[^/]+/)* (?P<id>[^/?#&]+) @@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor): }, { 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', 'only_matching': True, + }, { + 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return [ mobj.group('url') for mobj in re.finditer( - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1', + r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1', webpage)] def _real_extract(self, url):