From e0dde1d8e28cee673e4362a4141a21326937999d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 2 May 2019 10:46:29 +0100 Subject: [PATCH 1/6] [fox] fix Uplynk PrePlay error handling under python 2(#20925) --- youtube_dl/extractor/fox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index f1fbaa0fc..04f4bdba6 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -100,7 +100,7 @@ class FOXIE(AdobePassIE): try: m3u8_url = self._download_json(release_url, video_id)['playURL'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: error = self._parse_json(e.cause.read().decode(), video_id) if error.get('exception') == 'GeoLocationBlocked': self.raise_geo_restricted(countries=['US']) From 2533f5b6918da1c09939bc9d5c051f56c26be86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 May 2019 03:11:25 +0700 Subject: [PATCH 2/6] [hotstar] Move to API v2 (closes #20931) --- youtube_dl/extractor/hotstar.py | 87 +++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 8de9c4faf..79d5bbb2e 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -4,40 +4,59 @@ from __future__ import unicode_literals import hashlib import hmac import time +import uuid from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( determine_ext, ExtractorError, int_or_none, + str_or_none, try_get, + url_or_none, ) class HotStarBaseIE(InfoExtractor): _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' - def _call_api(self, path, video_id, query_name='contentId'): + def _call_api_impl(self, path, video_id, query): st = int(time.time()) exp = st + 6000 auth = 'st=%d~exp=%d~acl=/*' % (st, exp) auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() response = self._download_json( - 'https://api.hotstar.com/' + path, - video_id, headers={ + 'https://api.hotstar.com/' + path, video_id, headers={ 'hotstarauth': auth, 'x-country-code': 'IN', 'x-platform-code': 'JIO', - }, query={ - query_name: video_id, - 'tas': 10000, - }) + }, query=query) if response['statusCode'] != 'OK': raise ExtractorError( response['body']['message'], expected=True) return response['body']['results'] + def _call_api(self, path, video_id, query_name='contentId'): + return self._call_api_impl(path, video_id, { + query_name: video_id, + 'tas': 10000, + }) + + def _call_api_v2(self, path, video_id): + return self._call_api_impl( + '%s/in/contents/%s' % (path, video_id), video_id, { + 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash', + 'client': 'mweb', + 'clientVersion': '6.18.0', + 'deviceId': compat_str(uuid.uuid4()), + 'osName': 'Windows', + 'osVersion': '10', + }) + class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' @@ -68,6 +87,10 @@ class HotStarIE(HotStarBaseIE): }, { 'url': 'http://www.hotstar.com/1000000515', 'only_matching': True, + }, { + # only available via api v2 + 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', + 'only_matching': True, }] _GEO_BYPASS = False @@ -95,26 +118,40 @@ class HotStarIE(HotStarBaseIE): raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - format_data = self._call_api('h/v1/play', video_id)['item'] - format_url = format_data['playbackUrl'] - ext = determine_ext(format_url) - if ext == 'm3u8': + geo_restricted = False + playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] + for playback_set in playback_sets: + if not isinstance(playback_set, dict): + continue + format_url = url_or_none(playback_set.get('playbackUrl')) + if not format_url: + continue + tags = str_or_none(playback_set.get('tagsCombination')) or '' + if tags and 'encryption:plain' not in tags: + continue + ext = determine_ext(format_url) try: - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id='hls')) + if 'package:hls' in tags or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls')) + elif 'package:dash' in tags or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash')) + elif ext == 'f4m': + # produce broken files + pass + else: + formats.append({ + 'url': format_url, + 'width': int_or_none(playback_set.get('width')), + 'height': int_or_none(playback_set.get('height')), + }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self.raise_geo_restricted(countries=['IN']) - raise - elif ext == 'f4m': - # produce broken files - pass - else: - formats.append({ - 'url': format_url, - 'width': int_or_none(format_data.get('width')), - 'height': int_or_none(format_data.get('height')), - }) + geo_restricted = True + continue + if not formats and geo_restricted: + self.raise_geo_restricted(countries=['IN']) self._sort_formats(formats) return { From c9856648db6060a2f4aefda95646b3965e1858c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 May 2019 03:26:40 +0700 Subject: [PATCH 3/6] [4tube] Update token hosts (closes #20918) --- youtube_dl/extractor/fourtube.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index a9a1f911e..be4e81342 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -22,8 +22,6 @@ from ..utils import ( class FourTubeBaseIE(InfoExtractor): - _TKN_HOST = 'tkn.kodicdn.com' - def _extract_formats(self, url, video_id, media_id, sources): token_url = 'https://%s/%s/desktop/%s' % ( self._TKN_HOST, media_id, '+'.join(sources)) @@ -120,6 +118,7 @@ class FourTubeIE(FourTubeBaseIE): IE_NAME = '4tube' _VALID_URL = r'https?://(?:(?Pwww|m)\.)?4tube\.com/(?:videos|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' + _TKN_HOST = 'token.4tube.com' _TESTS = [{ 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 'md5': '6516c8ac63b03de06bc8eac14362db4f', @@ -149,6 +148,7 @@ class FourTubeIE(FourTubeBaseIE): class FuxIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?fux\.com/(?:video|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' _URL_TEMPLATE = 'https://www.fux.com/video/%s/video' + _TKN_HOST = 'token.fux.com' _TESTS = [{ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', 'info_dict': { @@ -280,6 +280,7 @@ class PornTubeIE(FourTubeBaseIE): class PornerBrosIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' + _TKN_HOST = 'token.pornerbros.com' _TESTS = [{ 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', 'md5': '6516c8ac63b03de06bc8eac14362db4f', From 876fed6bf32c623ca55ece31b675cebec75f05f1 Mon Sep 17 00:00:00 2001 From: mtilbury <26613468+mtilbury@users.noreply.github.com> Date: Sat, 4 May 2019 16:26:30 -0700 Subject: [PATCH 4/6] [francetvinfo] Extend video id extraction (closes #20619) (#20740) --- youtube_dl/extractor/francetv.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 20f449c5c..81b468c7d 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -371,12 +371,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): self.url_result(dailymotion_url, DailymotionIE.ie_key()) for dailymotion_url in dailymotion_urls]) - video_id, catalogue = self._search_regex( - (r'id-video=([^@]+@[^"]+)', + video_id = self._search_regex( + (r'player\.load[^;]+src:\s*["\']([^"\']+)', + r'id-video=([^@]+@[^"]+)', r']+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), - webpage, 'video id').split('@') + webpage, 'video id') - return self._make_url_result(video_id, catalogue) + return self._make_url_result(video_id) class FranceTVInfoSportIE(FranceTVBaseInfoExtractor): From f8c55c6664e0d279ed01702b2af2ba5ee290ee4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 May 2019 01:12:32 +0700 Subject: [PATCH 5/6] [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965) --- youtube_dl/extractor/youtube.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9d542f893..4002dcfdd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_HTTPError, compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, @@ -288,10 +289,25 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): if not mobj: break - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) + count = 0 + retries = 3 + while count <= retries: + try: + # Downloading page may result in intermittent 5xx HTTP error + # that is usually worked around with a retry + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s%s' + % (page_num, ' (retry #%d)' % count if count else ''), + transform_source=uppercase_escape) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): + count += 1 + if count <= retries: + continue + raise + content_html = more['content_html'] if not content_html.strip(): # Some webpages show a "Load more" button but they don't From 71ebd35d5003cfc5f4c8518249e03e1da0e620b4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 7 May 2019 10:16:51 +0100 Subject: [PATCH 6/6] [sky] add support for news.sky.com (closes #13055) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/{skysports.py => sky.py} | 57 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) rename youtube_dl/extractor/{skysports.py => sky.py} (54%) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 392b1f92b..0d0732dcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1033,7 +1033,10 @@ from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, ) -from .skysports import SkySportsIE +from .sky import ( + SkyNewsIE, + SkySportsIE, +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/sky.py similarity index 54% rename from youtube_dl/extractor/skysports.py rename to youtube_dl/extractor/sky.py index efcbb36a9..ea30d6e62 100644 --- a/youtube_dl/extractor/skysports.py +++ b/youtube_dl/extractor/sky.py @@ -10,34 +10,25 @@ from ..utils import ( ) -class SkySportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', - 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', - 'info_dict': { - 'id': '10328419', - 'ext': 'mp4', - 'title': 'Bale: It\'s our time to shine', - 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', - }, - 'add_ie': ['Ooyala'], - } - +class SkyBaseIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_data = extract_attributes(self._search_regex( - r'(]+>)', webpage, 'video data')) + r'(]+>)', + webpage, 'video data')) video_url = 'ooyala:%s' % video_data['data-video-id'] if video_data.get('data-token-required') == 'true': - token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {} + token_fetch_options = self._parse_json(video_data.get( + 'data-token-fetch-options', '{}'), video_id, fatal=False) or {} token_fetch_url = token_fetch_options.get('url') if token_fetch_url: - embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False) + embed_token = self._download_webpage(urljoin( + url, token_fetch_url), video_id, fatal=False) if embed_token: - video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')}) + video_url = smuggle_url( + video_url, {'embed_token': embed_token.strip('"')}) return { '_type': 'url_transparent', @@ -47,3 +38,33 @@ class SkySportsIE(InfoExtractor): 'description': strip_or_none(self._og_search_description(webpage)), 'ie_key': 'Ooyala', } + + +class SkySportsIE(SkyBaseIE): + _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', + 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', + 'info_dict': { + 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ', + 'ext': 'mp4', + 'title': 'Bale: It\'s our time to shine', + 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', + }, + 'add_ie': ['Ooyala'], + } + + +class SkyNewsIE(SkyBaseIE): + _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P[0-9]+)' + _TEST = { + 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962', + 'md5': 'd6327e581473cea9976a3236ded370cd', + 'info_dict': { + 'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', + 'ext': 'mp4', + 'title': 'Russian plane inspected after deadly fire', + 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.', + }, + 'add_ie': ['Ooyala'], + }