From 2533f5b6918da1c09939bc9d5c051f56c26be86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 May 2019 03:11:25 +0700 Subject: [PATCH 01/49] [hotstar] Move to API v2 (closes #20931) --- youtube_dl/extractor/hotstar.py | 87 +++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 8de9c4faf..79d5bbb2e 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -4,40 +4,59 @@ from __future__ import unicode_literals import hashlib import hmac import time +import uuid from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( determine_ext, ExtractorError, int_or_none, + str_or_none, try_get, + url_or_none, ) class HotStarBaseIE(InfoExtractor): _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' - def _call_api(self, path, video_id, query_name='contentId'): + def _call_api_impl(self, path, video_id, query): st = int(time.time()) exp = st + 6000 auth = 'st=%d~exp=%d~acl=/*' % (st, exp) auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() response = self._download_json( - 'https://api.hotstar.com/' + path, - video_id, headers={ + 'https://api.hotstar.com/' + path, video_id, headers={ 'hotstarauth': auth, 'x-country-code': 'IN', 'x-platform-code': 'JIO', - }, query={ - query_name: video_id, - 'tas': 10000, - }) + }, query=query) if response['statusCode'] != 'OK': raise ExtractorError( response['body']['message'], expected=True) return response['body']['results'] + def _call_api(self, path, video_id, query_name='contentId'): + return self._call_api_impl(path, video_id, { + query_name: video_id, + 'tas': 10000, + }) + + def _call_api_v2(self, path, video_id): + return self._call_api_impl( + '%s/in/contents/%s' % (path, video_id), video_id, { + 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash', + 'client': 'mweb', + 'clientVersion': '6.18.0', + 'deviceId': compat_str(uuid.uuid4()), + 'osName': 'Windows', + 'osVersion': '10', + }) + class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' @@ -68,6 +87,10 @@ class HotStarIE(HotStarBaseIE): }, { 'url': 'http://www.hotstar.com/1000000515', 'only_matching': True, + }, { + # only available via api v2 + 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', + 'only_matching': True, }] _GEO_BYPASS = False @@ -95,26 +118,40 @@ class HotStarIE(HotStarBaseIE): raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - format_data = self._call_api('h/v1/play', video_id)['item'] - format_url = format_data['playbackUrl'] - ext = determine_ext(format_url) - if ext == 'm3u8': + geo_restricted = False + playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] + for playback_set in playback_sets: + if not isinstance(playback_set, dict): + continue + format_url = url_or_none(playback_set.get('playbackUrl')) + if not format_url: + continue + tags = str_or_none(playback_set.get('tagsCombination')) or '' + if tags and 'encryption:plain' not in tags: + continue + ext = determine_ext(format_url) try: - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id='hls')) + if 'package:hls' in tags or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls')) + elif 'package:dash' in tags or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash')) + elif ext == 'f4m': + # produce broken files + pass + else: + formats.append({ + 'url': format_url, + 'width': int_or_none(playback_set.get('width')), + 'height': int_or_none(playback_set.get('height')), + }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self.raise_geo_restricted(countries=['IN']) - raise - elif ext == 'f4m': - # produce broken files - pass - else: - formats.append({ - 'url': format_url, - 'width': int_or_none(format_data.get('width')), - 'height': int_or_none(format_data.get('height')), - }) + geo_restricted = True + continue + if not formats and geo_restricted: + self.raise_geo_restricted(countries=['IN']) self._sort_formats(formats) return { From c9856648db6060a2f4aefda95646b3965e1858c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 May 2019 03:26:40 +0700 Subject: [PATCH 02/49] [4tube] Update token hosts (closes #20918) --- youtube_dl/extractor/fourtube.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index a9a1f911e..be4e81342 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -22,8 +22,6 @@ from ..utils import ( class FourTubeBaseIE(InfoExtractor): - _TKN_HOST = 'tkn.kodicdn.com' - def _extract_formats(self, url, video_id, media_id, sources): token_url = 'https://%s/%s/desktop/%s' % ( self._TKN_HOST, media_id, '+'.join(sources)) @@ -120,6 +118,7 @@ class FourTubeIE(FourTubeBaseIE): IE_NAME = '4tube' _VALID_URL = r'https?://(?:(?Pwww|m)\.)?4tube\.com/(?:videos|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' + _TKN_HOST = 'token.4tube.com' _TESTS = [{ 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 'md5': '6516c8ac63b03de06bc8eac14362db4f', @@ -149,6 +148,7 @@ class FourTubeIE(FourTubeBaseIE): class FuxIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?fux\.com/(?:video|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' _URL_TEMPLATE = 'https://www.fux.com/video/%s/video' + _TKN_HOST = 'token.fux.com' _TESTS = [{ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', 'info_dict': { @@ -280,6 +280,7 @@ class PornTubeIE(FourTubeBaseIE): class PornerBrosIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' + _TKN_HOST = 'token.pornerbros.com' _TESTS = [{ 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', 'md5': '6516c8ac63b03de06bc8eac14362db4f', From 876fed6bf32c623ca55ece31b675cebec75f05f1 Mon Sep 17 00:00:00 2001 From: mtilbury <26613468+mtilbury@users.noreply.github.com> Date: Sat, 4 May 2019 16:26:30 -0700 Subject: [PATCH 03/49] [francetvinfo] Extend video id extraction (closes #20619) (#20740) --- youtube_dl/extractor/francetv.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 20f449c5c..81b468c7d 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -371,12 +371,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): self.url_result(dailymotion_url, DailymotionIE.ie_key()) for dailymotion_url in dailymotion_urls]) - video_id, catalogue = self._search_regex( - (r'id-video=([^@]+@[^"]+)', + video_id = self._search_regex( + (r'player\.load[^;]+src:\s*["\']([^"\']+)', + r'id-video=([^@]+@[^"]+)', r']+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), - webpage, 'video id').split('@') + webpage, 'video id') - return self._make_url_result(video_id, catalogue) + return self._make_url_result(video_id) class FranceTVInfoSportIE(FranceTVBaseInfoExtractor): From f8c55c6664e0d279ed01702b2af2ba5ee290ee4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 May 2019 01:12:32 +0700 Subject: [PATCH 04/49] [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965) --- youtube_dl/extractor/youtube.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9d542f893..4002dcfdd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_HTTPError, compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, @@ -288,10 +289,25 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): if not mobj: break - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) + count = 0 + retries = 3 + while count <= retries: + try: + # Downloading page may result in intermittent 5xx HTTP error + # that is usually worked around with a retry + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s%s' + % (page_num, ' (retry #%d)' % count if count else ''), + transform_source=uppercase_escape) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): + count += 1 + if count <= retries: + continue + raise + content_html = more['content_html'] if not content_html.strip(): # Some webpages show a "Load more" button but they don't From 71ebd35d5003cfc5f4c8518249e03e1da0e620b4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 7 May 2019 10:16:51 +0100 Subject: [PATCH 05/49] [sky] add support for news.sky.com (closes #13055) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/{skysports.py => sky.py} | 57 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) rename youtube_dl/extractor/{skysports.py => sky.py} (54%) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 392b1f92b..0d0732dcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1033,7 +1033,10 @@ from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, ) -from .skysports import SkySportsIE +from .sky import ( + SkyNewsIE, + SkySportsIE, +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/sky.py similarity index 54% rename from youtube_dl/extractor/skysports.py rename to youtube_dl/extractor/sky.py index efcbb36a9..ea30d6e62 100644 --- a/youtube_dl/extractor/skysports.py +++ b/youtube_dl/extractor/sky.py @@ -10,34 +10,25 @@ from ..utils import ( ) -class SkySportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', - 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', - 'info_dict': { - 'id': '10328419', - 'ext': 'mp4', - 'title': 'Bale: It\'s our time to shine', - 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', - }, - 'add_ie': ['Ooyala'], - } - +class SkyBaseIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_data = extract_attributes(self._search_regex( - r'(]+>)', webpage, 'video data')) + r'(]+>)', + webpage, 'video data')) video_url = 'ooyala:%s' % video_data['data-video-id'] if video_data.get('data-token-required') == 'true': - token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {} + token_fetch_options = self._parse_json(video_data.get( + 'data-token-fetch-options', '{}'), video_id, fatal=False) or {} token_fetch_url = token_fetch_options.get('url') if token_fetch_url: - embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False) + embed_token = self._download_webpage(urljoin( + url, token_fetch_url), video_id, fatal=False) if embed_token: - video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')}) + video_url = smuggle_url( + video_url, {'embed_token': embed_token.strip('"')}) return { '_type': 'url_transparent', @@ -47,3 +38,33 @@ class SkySportsIE(InfoExtractor): 'description': strip_or_none(self._og_search_description(webpage)), 'ie_key': 'Ooyala', } + + +class SkySportsIE(SkyBaseIE): + _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', + 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', + 'info_dict': { + 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ', + 'ext': 'mp4', + 'title': 'Bale: It\'s our time to shine', + 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', + }, + 'add_ie': ['Ooyala'], + } + + +class SkyNewsIE(SkyBaseIE): + _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P[0-9]+)' + _TEST = { + 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962', + 'md5': 'd6327e581473cea9976a3236ded370cd', + 'info_dict': { + 'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', + 'ext': 'mp4', + 'title': 'Russian plane inspected after deadly fire', + 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.', + }, + 'add_ie': ['Ooyala'], + } From b45a9e698e900cf3628963b77e5149e65857fdaf Mon Sep 17 00:00:00 2001 From: pypy Date: Thu, 9 May 2019 02:58:47 +0900 Subject: [PATCH 06/49] [youtube] Fix channel id extraction (closes #20982) (#21003) --- youtube_dl/extractor/youtube.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4002dcfdd..da202b9bc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2100,8 +2100,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self._downloader.report_warning('unable to extract uploader nickname') - channel_id = self._html_search_meta( - 'channelId', video_webpage, 'channel id') + channel_id = ( + str_or_none(video_details.get('channelId')) or + self._html_search_meta( + 'channelId', video_webpage, 'channel id', default=None) or + self._search_regex( + r'data-channel-external-id=(["\'])(?P(?:(?!\1).)+)\1', + video_webpage, 'channel id', default=None, group='id')) channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None # thumbnail image From a5b92d3590def85aee73d2968875e9a9cc916f26 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 9 May 2019 04:13:31 +0100 Subject: [PATCH 07/49] [yahoo:gyao] extend _VALID_URL(closes #21008) --- youtube_dl/extractor/yahoo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 86ba7d3c9..6576c1d69 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -526,7 +526,7 @@ class YahooGyaOPlayerIE(InfoExtractor): class YahooGyaOIE(InfoExtractor): IE_NAME = 'yahoo:gyao' - _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/p|streaming\.yahoo\.co\.jp/p/y)/(?P\d+/v\d+)' + _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title/[^/]+)|streaming\.yahoo\.co\.jp/p/y)/(?P\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/', 'info_dict': { @@ -536,6 +536,9 @@ class YahooGyaOIE(InfoExtractor): }, { 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/', 'only_matching': True, + }, { + 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf', + 'only_matching': True, }] def _real_extract(self, url): From 027ffdca0d3174963a4269ce8de8519cfed7a12c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 May 2019 08:36:10 +0700 Subject: [PATCH 08/49] [youtube] Use sp field value for signature field name (closes #18841, closes #18927, closes #21028) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index da202b9bc..8619f3838 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1987,7 +1987,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): signature = self._decrypt_signature( encrypted_sig, video_id, player_url, age_gate) - url += '&signature=' + signature + sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature' + url += '&%s=%s' % (sp, signature) if 'ratebypass' not in url: url += '&ratebypass=yes' From 4eec112740910621a7fd9c50158fb2388649d8b7 Mon Sep 17 00:00:00 2001 From: ealgase Date: Fri, 10 May 2019 14:35:57 -0400 Subject: [PATCH 09/49] [openload] Add support for verystream.com (closes #20701) (#20967) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/generic.py | 11 +++- youtube_dl/extractor/openload.py | 86 ++++++++++++++++++++++-------- 3 files changed, 78 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0d0732dcb..3037b5a45 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -833,7 +833,10 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) -from .openload import OpenloadIE +from .openload import ( + OpenloadIE, + VerystreamIE, +) from .ora import OraTVIE from .orf import ( ORFTVthekIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6f48b04da..495fa4975 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -89,7 +89,10 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE -from .openload import OpenloadIE +from .openload import ( + OpenloadIE, + VerystreamIE, +) from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE @@ -3017,6 +3020,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) + # Look for Verystream embeds + verystream_urls = VerystreamIE._extract_urls(webpage) + if verystream_urls: + return self.playlist_from_matches( + verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key()) + # Look for VideoPress embeds videopress_urls = VideoPressIE._extract_urls(webpage) if videopress_urls: diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 6a8ef67bd..f77296f42 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -254,7 +254,10 @@ class OpenloadIE(InfoExtractor): (?:f|embed)/ (?P[a-zA-Z0-9-_]+) ''' % _DOMAINS - + _EMBED_WORD = 'embed' + _STREAM_WORD = 'f' + _REDIR_WORD = 'stream' + _URL_IDS = ('streamurl', 'streamuri', 'streamurj') _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', 'md5': 'bf1c059b004ebc7a256f89408e65c36e', @@ -1948,11 +1951,16 @@ class OpenloadIE(InfoExtractor): '69.0.3497.28', ) - @staticmethod - def _extract_urls(webpage): + @classmethod + def _extract_urls(cls, webpage): return re.findall( - r']+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)' - % OpenloadIE._DOMAINS, webpage) + r']+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' + % (cls._DOMAINS, cls._EMBED_WORD), webpage) + + def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + phantom = PhantomJSwrapper(self, required_version='2.0') + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) + return webpage def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -1964,9 +1972,9 @@ class OpenloadIE(InfoExtractor): 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), } - for path in ('embed', 'f'): + for path in (self._EMBED_WORD, self._STREAM_WORD): page_url = url_pattern % path - last = path == 'f' + last = path == self._STREAM_WORD webpage = self._download_webpage( page_url, video_id, 'Downloading %s webpage' % path, headers=headers, fatal=last) @@ -1978,21 +1986,20 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True, video_id=video_id) break - phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) - - decoded_id = (get_element_by_id('streamurl', webpage) or - get_element_by_id('streamuri', webpage) or - get_element_by_id('streamurj', webpage) or - self._search_regex( - (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', - r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', - r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, - 'stream URL')) - - video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id) + webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers) + for element_id in self._URL_IDS: + decoded_id = get_element_by_id(element_id, webpage) + if decoded_id: + break + if not decoded_id: + decoded_id = self._search_regex( + (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', + r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', + r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', + r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', + r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, + 'stream URL') + video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, @@ -2012,3 +2019,38 @@ class OpenloadIE(InfoExtractor): 'subtitles': subtitles, 'http_headers': headers, } + + +class VerystreamIE(OpenloadIE): + IE_NAME = 'verystream' + + _DOMAINS = r'(?:verystream\.com)' + _VALID_URL = r'''(?x) + https?:// + (?P + (?:www\.)? + %s + )/ + (?:stream|e)/ + (?P[a-zA-Z0-9-_]+) + ''' % _DOMAINS + _EMBED_WORD = 'e' + _STREAM_WORD = 'stream' + _REDIR_WORD = 'gettoken' + _URL_IDS = ('videolink', ) + _TESTS = [{ + 'url': 'https://verystream.com/stream/c1GWQ9ngBBx/', + 'md5': 'd3e8c5628ccb9970b65fd65269886795', + 'info_dict': { + 'id': 'c1GWQ9ngBBx', + 'ext': 'mp4', + 'title': 'Big Buck Bunny.mp4', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'https://verystream.com/e/c1GWQ9ngBBx/', + 'only_matching': True, + }] + + def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + return webpage # for Verystream, the webpage is already decrypted From fd35d8cdfdc77ca6ec6d87677fe0d00df0cbb22a Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Fri, 10 May 2019 20:42:32 +0200 Subject: [PATCH 10/49] =?UTF-8?q?[utils]=20Transliterate=20"=C3=BE"=20as?= =?UTF-8?q?=20"th"=20(#20897)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite visual similarity "þ" is unrelated to "p". It is normally transliterated as "th": $ echo þ-Þ | iconv -t ASCII//TRANSLIT th-TH --- test/test_utils.py | 2 +- youtube_dl/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index ca6d832a4..9ef0e422b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -183,7 +183,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 71713f63a..99ee54942 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = ( # needed for sanitizing filenames in restricted mode ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) DATE_FORMATS = ( '%d %B %Y', From 68b92aa1b476d3d5cdd98fe11b211171753b712c Mon Sep 17 00:00:00 2001 From: biwubo <45994985+biwubo@users.noreply.github.com> Date: Fri, 10 May 2019 20:12:45 +0100 Subject: [PATCH 11/49] [gfycat] Add support for URLs with tags (closes #20696) (#20731) --- youtube_dl/extractor/gfycat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index c1b36a59b..eb6f85836 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^-/?#]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -47,6 +47,9 @@ class GfycatIE(InfoExtractor): }, { 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', 'only_matching': True + }, { + 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball', + 'only_matching': True }] def _real_extract(self, url): From ab116745020f2edd30de34e8ad7800209cdc4c72 Mon Sep 17 00:00:00 2001 From: Michael Tilbury Date: Sun, 14 Apr 2019 18:30:46 -0400 Subject: [PATCH 12/49] [byutv] Add support for DVR videos (closes #20574) Fix code style on brackets (flake8) Add more information to test info_dict --- youtube_dl/extractor/byutv.py | 58 +++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 4bf4efe1f..1ec56f42a 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + url_basename, + parse_duration, +) class BYUtvIE(InfoExtractor): @@ -22,6 +26,18 @@ class BYUtvIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], + }, { + 'url': 'https://www.byutv.org/player/a5467e14-c7f2-46f9-b3c2-cb31a56749c6/byu-soccer-w-argentina-vs-byu-4419', + 'info_dict': { + 'id': 'a5467e14-c7f2-46f9-b3c2-cb31a56749c6', + 'display_id': 'byu-soccer-w-argentina-vs-byu-4419', + 'ext': 'mp4', + 'title': 'Argentina vs. BYU (4/4/19)', + 'duration': 7543.0, + }, + 'params': { + 'skip_download': True + }, }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', 'only_matching': True, @@ -33,9 +49,8 @@ class BYUtvIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - ep = self._download_json( + info = self._download_json( 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, query={ 'contentid': video_id, @@ -44,15 +59,32 @@ class BYUtvIE(InfoExtractor): }, headers={ 'x-byutv-context': 'web$US', 'x-byutv-platformkey': 'xsaaw9c7y5', - })['ooyalaVOD'] + }) - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'display_id': display_id, - 'title': ep.get('title'), - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } + ep = info.get('ooyalaVOD') + if ep: + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': mobj.group('display_id') or video_id, + 'title': ep.get('title'), + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } + else: + ep = info['dvr'] + formats = self._extract_m3u8_formats( + ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native' + ) + self._sort_formats(formats) + return { + 'formats': formats, + 'id': video_id, + 'display_id': url_basename(url), + 'title': ep['title'], + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + 'duration': parse_duration(ep.get('length')), + } From 0db2b275dd574af2adff49fbbf99ee164b60e4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 May 2019 03:05:34 +0700 Subject: [PATCH 13/49] [byutv] Improve extraction and update DVR test (closes #20676) --- youtube_dl/extractor/byutv.py | 56 ++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 1ec56f42a..562c83af9 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,15 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - url_basename, - parse_duration, -) +from ..utils import parse_duration class BYUtvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' _TESTS = [{ + # ooyalaVOD 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', @@ -27,13 +25,15 @@ class BYUtvIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, { - 'url': 'https://www.byutv.org/player/a5467e14-c7f2-46f9-b3c2-cb31a56749c6/byu-soccer-w-argentina-vs-byu-4419', + # dvr + 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2', 'info_dict': { - 'id': 'a5467e14-c7f2-46f9-b3c2-cb31a56749c6', - 'display_id': 'byu-soccer-w-argentina-vs-byu-4419', + 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451', + 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2', 'ext': 'mp4', - 'title': 'Argentina vs. BYU (4/4/19)', - 'duration': 7543.0, + 'title': 'Pacific vs. BYU (4/12/19)', + 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3', + 'duration': 11645, }, 'params': { 'skip_download': True @@ -49,10 +49,11 @@ class BYUtvIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id info = self._download_json( - 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, - query={ + 'https://api.byutv.org/api3/catalog/getvideosforcontent', + display_id, query={ 'contentid': video_id, 'channel': 'byutv', 'x-byutv-context': 'web$US', @@ -68,23 +69,24 @@ class BYUtvIE(InfoExtractor): 'ie_key': 'Ooyala', 'url': 'ooyala:%s' % ep['providerId'], 'id': video_id, - 'display_id': mobj.group('display_id') or video_id, + 'display_id': display_id, 'title': ep.get('title'), 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), } - else: - ep = info['dvr'] - formats = self._extract_m3u8_formats( - ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native' - ) - self._sort_formats(formats) - return { - 'formats': formats, - 'id': video_id, - 'display_id': url_basename(url), - 'title': ep['title'], - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - 'duration': parse_duration(ep.get('length')), - } + + ep = info['dvr'] + title = ep['title'] + formats = self._extract_m3u8_formats( + ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + 'duration': parse_duration(ep.get('length')), + 'formats': formats, + } From 169f8d0fe151f5175ae436152ea3c815d7f290ce Mon Sep 17 00:00:00 2001 From: davex25 Date: Fri, 10 May 2019 15:09:00 -0500 Subject: [PATCH 14/49] [cloudflarestream] Add support for videodelivery.net (#21049) --- youtube_dl/extractor/cloudflarestream.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py index e6d92cca2..8ff2c6531 100644 --- a/youtube_dl/extractor/cloudflarestream.py +++ b/youtube_dl/extractor/cloudflarestream.py @@ -10,8 +10,8 @@ class CloudflareStreamIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:watch\.)?cloudflarestream\.com/| - embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo= + (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/| + embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo= ) (?P[\da-f]+) ''' @@ -31,6 +31,9 @@ class CloudflareStreamIE(InfoExtractor): }, { 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd', 'only_matching': True, + }, { + 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', + 'only_matching': True, }] @staticmethod @@ -38,7 +41,7 @@ class CloudflareStreamIE(InfoExtractor): return [ mobj.group('url') for mobj in re.finditer( - r']+\bsrc=(["\'])(?P(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', webpage)] def _real_extract(self, url): From cd37ef44e9af0b050ecf18455cf43cecd08347e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 May 2019 03:11:53 +0700 Subject: [PATCH 15/49] [commonmistakes] Fix E117 --- youtube_dl/extractor/commonmistakes.py | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index 06d9148e0..7e12499b1 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -32,19 +32,19 @@ class CommonMistakesIE(InfoExtractor): class UnicodeBOMIE(InfoExtractor): - IE_DESC = False - _VALID_URL = r'(?P\ufeff)(?P.*)$' + IE_DESC = False + _VALID_URL = r'(?P\ufeff)(?P.*)$' - # Disable test for python 3.2 since BOM is broken in re in this version - # (see https://github.com/ytdl-org/youtube-dl/issues/9751) - _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ - 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', - 'only_matching': True, - }] + # Disable test for python 3.2 since BOM is broken in re in this version + # (see https://github.com/ytdl-org/youtube-dl/issues/9751) + _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ + 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', + 'only_matching': True, + }] - def _real_extract(self, url): - real_url = self._match_id(url) - self.report_warning( - 'Your URL starts with a Byte Order Mark (BOM). ' - 'Removing the BOM and looking for "%s" ...' % real_url) - return self.url_result(real_url) + def _real_extract(self, url): + real_url = self._match_id(url) + self.report_warning( + 'Your URL starts with a Byte Order Mark (BOM). ' + 'Removing the BOM and looking for "%s" ...' % real_url) + return self.url_result(real_url) From d23e85515a8f58e276e8ac07bf1fa19f4f1aaec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 May 2019 03:14:31 +0700 Subject: [PATCH 16/49] [tvnow] Fix W605 --- youtube_dl/extractor/tvnow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py index 3c6a60c39..ecd0fb937 100644 --- a/youtube_dl/extractor/tvnow.py +++ b/youtube_dl/extractor/tvnow.py @@ -207,7 +207,7 @@ class TVNowNewBaseIE(InfoExtractor): return result -""" +r""" TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it when api.tvnow.de is shut down. This version can't bypass premium checks though. class TVNowIE(TVNowNewBaseIE): From 3089bc748c0fe72a0361bce3f5e2fbab25175236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 May 2019 03:56:22 +0700 Subject: [PATCH 17/49] Fix W504 and disable W503 (closes #20863) --- devscripts/check-porn.py | 8 ++--- setup.cfg | 2 +- test/test_aes.py | 8 ++--- test/test_swfinterp.py | 4 +-- youtube_dl/YoutubeDL.py | 42 ++++++++++++------------- youtube_dl/__init__.py | 16 +++++----- youtube_dl/compat.py | 6 ++-- youtube_dl/downloader/common.py | 10 +++--- youtube_dl/downloader/f4m.py | 8 ++--- youtube_dl/downloader/fragment.py | 4 +-- youtube_dl/downloader/hls.py | 8 ++--- youtube_dl/downloader/http.py | 14 ++++----- youtube_dl/extractor/addanime.py | 6 ++-- youtube_dl/extractor/blinkx.py | 4 +-- youtube_dl/extractor/common.py | 40 +++++++++++------------ youtube_dl/extractor/dailymail.py | 4 +-- youtube_dl/extractor/dctp.py | 4 +-- youtube_dl/extractor/expressen.py | 4 +-- youtube_dl/extractor/frontendmasters.py | 4 +-- youtube_dl/extractor/generic.py | 14 ++++----- youtube_dl/extractor/heise.py | 4 +-- youtube_dl/extractor/hitbox.py | 4 +-- youtube_dl/extractor/hitrecord.py | 4 +-- youtube_dl/extractor/hketv.py | 12 +++---- youtube_dl/extractor/hrti.py | 4 +-- youtube_dl/extractor/infoq.py | 6 ++-- youtube_dl/extractor/iqiyi.py | 6 ++-- youtube_dl/extractor/itv.py | 8 ++--- youtube_dl/extractor/kaltura.py | 8 ++--- youtube_dl/extractor/karrierevideos.py | 4 +-- youtube_dl/extractor/motherless.py | 4 +-- youtube_dl/extractor/ndtv.py | 4 +-- youtube_dl/extractor/nextmedia.py | 4 +-- youtube_dl/extractor/niconico.py | 26 +++++++-------- youtube_dl/extractor/nrk.py | 4 +-- youtube_dl/extractor/ooyala.py | 4 +-- youtube_dl/extractor/openload.py | 6 ++-- youtube_dl/extractor/podomatic.py | 4 +-- youtube_dl/extractor/ruutu.py | 4 +-- youtube_dl/extractor/sbs.py | 4 +-- youtube_dl/extractor/vevo.py | 4 +-- youtube_dl/extractor/vk.py | 4 +-- youtube_dl/extractor/yandexvideo.py | 6 ++-- youtube_dl/extractor/youku.py | 4 +-- youtube_dl/extractor/youtube.py | 18 +++++------ youtube_dl/extractor/zattoo.py | 4 +-- youtube_dl/postprocessor/ffmpeg.py | 10 +++--- youtube_dl/postprocessor/xattrpp.py | 4 +-- youtube_dl/utils.py | 34 ++++++++++---------- 49 files changed, 211 insertions(+), 211 deletions(-) diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 72b2ee422..740f04de0 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -45,12 +45,12 @@ for test in gettestcases(): RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) - if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or - test['info_dict']['age_limit'] != 18): + if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): print('\nPotential missing age_limit check: {0}'.format(test['name'])) - elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and - test['info_dict']['age_limit'] == 18): + elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] + and test['info_dict']['age_limit'] == 18): print('\nPotential false negative: {0}'.format(test['name'])) else: diff --git a/setup.cfg b/setup.cfg index af9a554c6..da78a9c47 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,4 +3,4 @@ universal = True [flake8] exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv -ignore = E402,E501,E731,E741 +ignore = E402,E501,E731,E741,W503 diff --git a/test/test_aes.py b/test/test_aes.py index 78a28751b..cc89fb6ab 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -44,16 +44,16 @@ class TestAES(unittest.TestCase): def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode('utf-8') encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + - b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' ).decode('utf-8') decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) password = intlist_to_bytes(self.key).decode('utf-8') encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + - b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + intlist_to_bytes(self.iv[:8]) + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' ).decode('utf-8') decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index f1e899819..9f18055e6 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -34,8 +34,8 @@ def _make_testfunc(testfile): def test_func(self): as_file = os.path.join(TEST_DIR, testfile) swf_file = os.path.join(TEST_DIR, test_id + '.swf') - if ((not os.path.exists(swf_file)) or - os.path.getmtime(swf_file) < os.path.getmtime(as_file)): + if ((not os.path.exists(swf_file)) + or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): # Recompile try: subprocess.check_call([ diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 57f52f888..3e832fec2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -400,9 +400,9 @@ class YoutubeDL(object): else: raise - if (sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and - not params.get('restrictfilenames', False)): + if (sys.platform != 'win32' + and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not params.get('restrictfilenames', False)): # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -440,9 +440,9 @@ class YoutubeDL(object): if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] if idxs: correct_argv = ( - ['youtube-dl'] + - [a for i, a in enumerate(argv) if i not in idxs] + - ['--'] + [argv[i] for i in idxs] + ['youtube-dl'] + + [a for i, a in enumerate(argv) if i not in idxs] + + ['--'] + [argv[i] for i in idxs] ) self.report_warning( 'Long argument string detected. ' @@ -850,8 +850,8 @@ class YoutubeDL(object): if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url(ie_result['url']) extract_flat = self.params.get('extract_flat', False) - if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or - extract_flat is True): + if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) + or extract_flat is True): if self.params.get('forcejson', False): self.to_stdout(json.dumps(ie_result)) return ie_result @@ -1619,9 +1619,9 @@ class YoutubeDL(object): # https://github.com/ytdl-org/youtube-dl/issues/10083). incomplete_formats = ( # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) # all formats are audio-only - all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) + or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) ctx = { 'formats': formats, @@ -1947,8 +1947,8 @@ class YoutubeDL(object): else: assert fixup_policy in ('ignore', 'never') - if (info_dict.get('requested_formats') is None and - info_dict.get('container') == 'm4a_dash'): + if (info_dict.get('requested_formats') is None + and info_dict.get('container') == 'm4a_dash'): if fixup_policy == 'warn': self.report_warning( '%s: writing DASH m4a. ' @@ -1967,9 +1967,9 @@ class YoutubeDL(object): else: assert fixup_policy in ('ignore', 'never') - if (info_dict.get('protocol') == 'm3u8_native' or - info_dict.get('protocol') == 'm3u8' and - self.params.get('hls_prefer_native')): + if (info_dict.get('protocol') == 'm3u8_native' + or info_dict.get('protocol') == 'm3u8' + and self.params.get('hls_prefer_native')): if fixup_policy == 'warn': self.report_warning('%s: malformed AAC bitstream detected.' % ( info_dict['id'])) @@ -1995,10 +1995,10 @@ class YoutubeDL(object): def download(self, url_list): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - if (len(url_list) > 1 and - outtmpl != '-' and - '%' not in outtmpl and - self.params.get('max_downloads') != 1): + if (len(url_list) > 1 + and outtmpl != '-' + and '%' not in outtmpl + and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: @@ -2143,8 +2143,8 @@ class YoutubeDL(object): if res: res += ', ' res += '%s container' % fdict['container'] - if (fdict.get('vcodec') is not None and - fdict.get('vcodec') != 'none'): + if (fdict.get('vcodec') is not None + and fdict.get('vcodec') != 'none'): if res: res += ', ' res += fdict['vcodec'] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9d4859bcf..165c975dd 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -230,14 +230,14 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True - outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or - (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or - (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or - (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or - (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or - (opts.useid and '%(id)s.%(ext)s') or - (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or - DEFAULT_OUTTMPL) + outtmpl = ((opts.outtmpl is not None and opts.outtmpl) + or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') + or (opts.useid and '%(id)s.%(ext)s') + or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') + or DEFAULT_OUTTMPL) if not os.path.splitext(outtmpl)[1] and opts.extractaudio: parser.error('Cannot download a video and extract audio into the same' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7992a23ca..c75ab131b 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2649,9 +2649,9 @@ else: try: args = shlex.split('中文') - assert (isinstance(args, list) and - isinstance(args[0], compat_str) and - args[0] == '中文') + assert (isinstance(args, list) + and isinstance(args[0], compat_str) + and args[0] == '中文') compat_shlex_split = shlex.split except (AssertionError, UnicodeEncodeError): # Working around shlex issue with unicode strings on some python 2 diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 5979833c0..646d7f779 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -330,15 +330,15 @@ class FileDownloader(object): """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) and - os.path.exists(encodeFilename(filename)) + self.params.get('nooverwrites', False) + and os.path.exists(encodeFilename(filename)) ) if not hasattr(filename, 'write'): continuedl_and_exists = ( - self.params.get('continuedl', True) and - os.path.isfile(encodeFilename(filename)) and - not self.params.get('nopart', False) + self.params.get('continuedl', True) + and os.path.isfile(encodeFilename(filename)) + and not self.params.get('nopart', False) ) # Check file already present diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 9b15a0e15..8dd3c2eeb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -238,8 +238,8 @@ def write_metadata_tag(stream, metadata): def remove_encrypted_media(media): - return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and - 'drmAdditionalHeaderSetId' not in e.attrib, + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib + and 'drmAdditionalHeaderSetId' not in e.attrib, media)) @@ -267,8 +267,8 @@ class F4mFD(FragmentFD): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') - for e in (doc.findall(_add_ns('drmAdditionalHeader')) + - doc.findall(_add_ns('drmAdditionalHeaderSet'))): + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): # If id attribute is missing it's valid for all media nodes # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 917f6dc01..f2e5733b6 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -219,8 +219,8 @@ class FragmentFD(FileDownloader): frag_total_bytes = s.get('total_bytes') or 0 if not ctx['live']: estimated_size = ( - (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['fragment_index'] + 1) * total_frags) + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) + / (state['fragment_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size if s['status'] == 'finished': diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 419e73576..b59aad73f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -76,12 +76,12 @@ class HlsFD(FragmentFD): return fd.real_download(filename, info_dict) def is_ad_fragment_start(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or - s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) def is_ad_fragment_end(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or - s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) media_frags = 0 ad_frags = 0 diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 08670ee3c..3c72ea18b 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -46,8 +46,8 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - info_dict.get('downloader_options', {}).get('http_chunk_size') or - self.params.get('http_chunk_size') or 0) + info_dict.get('downloader_options', {}).get('http_chunk_size') + or self.params.get('http_chunk_size') or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 @@ -123,11 +123,11 @@ class HttpFD(FileDownloader): content_len = int_or_none(content_range_m.group(3)) accept_content_len = ( # Non-chunked download - not ctx.chunk_size or + not ctx.chunk_size # Chunked download and requested piece or # its part is promised to be served - content_range_end == range_end or - content_len < range_end) + or content_range_end == range_end + or content_len < range_end) if accept_content_len: ctx.data_len = content_len return @@ -152,8 +152,8 @@ class HttpFD(FileDownloader): raise else: # Examine the reported length - if (content_length is not None and - (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): + if (content_length is not None + and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 9f8a71262..5e7c0724e 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -59,9 +59,9 @@ class AddAnimeIE(InfoExtractor): parsed_url = compat_urllib_parse_urlparse(url) av_val = av_res + len(parsed_url.netloc) confirm_url = ( - parsed_url.scheme + '://' + parsed_url.netloc + - action + '?' + - compat_urllib_parse_urlencode({ + parsed_url.scheme + '://' + parsed_url.netloc + + action + '?' + + compat_urllib_parse_urlencode({ 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) self._download_webpage( confirm_url, video_id, diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 3b8eabe8f..db5e12b21 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -32,8 +32,8 @@ class BlinkxIE(InfoExtractor): video_id = self._match_id(url) display_id = video_id[:8] - api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + - 'video=%s' % video_id) + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + + 'video=%s' % video_id) data_json = self._download_webpage(api_url, display_id) data = json.loads(data_json)['api']['results'][0] duration = None diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 59ad455c1..23b4f372a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -542,11 +542,11 @@ class InfoExtractor(object): raise ExtractorError('An extractor error has occurred.', cause=e) def __maybe_fake_ip_and_retry(self, countries): - if (not self._downloader.params.get('geo_bypass_country', None) and - self._GEO_BYPASS and - self._downloader.params.get('geo_bypass', True) and - not self._x_forwarded_for_ip and - countries): + if (not self._downloader.params.get('geo_bypass_country', None) + and self._GEO_BYPASS + and self._downloader.params.get('geo_bypass', True) + and not self._x_forwarded_for_ip + and countries): country_code = random.choice(countries) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: @@ -682,8 +682,8 @@ class InfoExtractor(object): def __check_blocked(self, content): first_block = content[:512] - if ('Access to this site is blocked' in content and - 'Websense' in first_block): + if ('Access to this site is blocked' in content + and 'Websense' in first_block): msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' blocked_iframe = self._html_search_regex( r'