From 662087e491912be66dbf6866df3846608fce6904 Mon Sep 17 00:00:00 2001 From: thezero Date: Sun, 28 Jul 2019 23:08:39 +0200 Subject: [PATCH 1/5] [sproutvideo] Add new extractor (closes #7935) --- youtube_dl/downloader/__init__.py | 3 ++ youtube_dl/downloader/fragment.py | 3 ++ youtube_dl/downloader/hls.py | 18 ++++--- youtube_dl/downloader/http.py | 6 ++- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 17 ++++++ youtube_dl/extractor/sproutvideo.py | 80 +++++++++++++++++++++++++++++ 7 files changed, 120 insertions(+), 8 deletions(-) create mode 100644 youtube_dl/extractor/sproutvideo.py diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 2e485df9d..28cc6a363 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}): if ed.can_download(info_dict): return ed + if info_dict.get('force_hlsdl') is True: + return HlsFD + if protocol.startswith('m3u8') and info_dict.get('is_live'): return FFmpegFD diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 02f35459e..449795bc7 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -121,6 +121,8 @@ class FragmentFD(FileDownloader): del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): + if 'hls' not in ctx: + ctx['hls'] = False if 'live' not in ctx: ctx['live'] = False if not ctx['live']: @@ -143,6 +145,7 @@ class FragmentFD(FileDownloader): 'retries': self.params.get('retries', 0), 'nopart': self.params.get('nopart', False), 'test': self.params.get('test', False), + 'hls': ctx['hls'], } ) tmpfilename = self.temp_name(ctx['filename']) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 84bc34928..84bfc4a87 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -105,6 +105,7 @@ class HlsFD(FragmentFD): 'filename': filename, 'total_frags': media_frags, 'ad_frags': ad_frags, + 'hls': '#EXT-X-KEY:METHOD=AES-128' in s, } self._prepare_and_start_frag_download(ctx) @@ -113,10 +114,15 @@ class HlsFD(FragmentFD): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) test = self.params.get('test', False) - extra_query = None + extra_segment_query = None + extra_key_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: - extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) + extra_segment_query = compat_urlparse.parse_qs(extra_param_to_segment_url) + extra_key_query = compat_urlparse.parse_qs(extra_param_to_segment_url) + extra_param_to_key_url = info_dict.get('extra_param_to_key_url') + if extra_param_to_key_url: + extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -136,8 +142,8 @@ class HlsFD(FragmentFD): line if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) - if extra_query: - frag_url = update_url_query(frag_url, extra_query) + if extra_segment_query: + frag_url = update_url_query(frag_url, extra_segment_query) count = 0 headers = info_dict.get('http_headers', {}) if byte_range: @@ -187,8 +193,8 @@ class HlsFD(FragmentFD): if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if extra_key_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_key_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 3c72ea18b..431483bc4 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -45,7 +45,8 @@ class HttpFD(FileDownloader): headers.update(add_headers) is_test = self.params.get('test', False) - chunk_size = self._TEST_FILE_SIZE if is_test else ( + is_hls = self.params.get('hls', False) + chunk_size = self._TEST_FILE_SIZE if is_test and not is_hls else ( info_dict.get('downloader_options', {}).get('http_chunk_size') or self.params.get('http_chunk_size') or 0) @@ -194,7 +195,8 @@ class HttpFD(FileDownloader): # However, for a test we still would like to download just a piece of a file. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control # block size when downloading a file. - if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + # If we are using HLS we cannot cut the fragment because it will break the decryption. + if is_test and not is_hls and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): data_len = self._TEST_FILE_SIZE if data_len is not None: diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e407ab3d9..42ee3ff65 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1053,6 +1053,7 @@ from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE +from .sproutvideo import SproutVideoIE from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ce8252f6a..13dc6f34b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -119,6 +119,7 @@ from .expressen import ExpressenIE from .zype import ZypeIE from .odnoklassniki import OdnoklassnikiIE from .kinja import KinjaEmbedIE +from .sproutvideo import SproutVideoIE class GenericIE(InfoExtractor): @@ -2142,6 +2143,18 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # SproutVideo iframe in page + 'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs', + 'info_dict': { + 'id': '4c9dddb01910e3c9c4', + 'ext': 'mp4', + 'title': 'Adrien Labaeye : Berlin, des communautés aux communs', + }, + 'params': { + 'skip_download': True, + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -3201,6 +3214,10 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( zype_urls, video_id, video_title, ie=ZypeIE.ie_key()) + sproutvideo_url = SproutVideoIE._extract_url(webpage) + if sproutvideo_url: + return self.url_result(sproutvideo_url) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/youtube_dl/extractor/sproutvideo.py b/youtube_dl/extractor/sproutvideo.py new file mode 100644 index 000000000..99cf3f727 --- /dev/null +++ b/youtube_dl/extractor/sproutvideo.py @@ -0,0 +1,80 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..compat import ( + compat_b64decode, + compat_urllib_parse_urlencode, +) + + +class SproutVideoIE(InfoExtractor): + _VALID_URL = r'(?:https?:|)//videos.sproutvideo.com/embed/(?P[a-f0-9]+)/[a-f0-9]+\??.*' + _TEST = { + 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3', + 'md5': '1343ce1a6cb39d67889bfa07c7b02b0e', + 'info_dict': { + 'id': '4c9dddb01910e3c9c4', + 'ext': 'mp4', + 'title': 'Adrien Labaeye : Berlin, des communautés aux communs', + } + } + + @staticmethod + def _extract_url(webpage): + sproutvideo = re.search( + r'(?:%s)[\'\"]' % SproutVideoIE._VALID_URL, webpage) + if sproutvideo: + return sproutvideo.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + data = self._search_regex(r']+>var dat = \'([^\']+)\';', webpage, 'data') + data_decoded = compat_b64decode(data).decode('utf-8') + parsed_data = self._parse_json(data_decoded, video_id) + + # https://github.com/ytdl-org/youtube-dl/issues/16996#issuecomment-406901324 + # signature->m for manifests + # signature->k for keys + # signature->t for segments + m_sig = self._policy_to_qs(parsed_data, 'm') + k_sig = self._policy_to_qs(parsed_data, 'k') + t_sig = self._policy_to_qs(parsed_data, 't') + + url = "https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}" + url = url.format(parsed_data['base'], + parsed_data['s3_user_hash'], + parsed_data['s3_video_hash'], + m_sig) + + formats = self._extract_m3u8_formats(url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + self._sort_formats(formats) + + for i in range(len(formats)): + formats[i]['url'] = "{}?{}".format(formats[i]['url'], m_sig) + + return { + 'id': video_id, + 'title': parsed_data['title'], + 'formats': formats, + 'force_hlsdl': True, # currently FFmpeg is not supported + 'extra_param_to_segment_url': t_sig, + 'extra_param_to_key_url': k_sig + } + + def _format_qsdata(self, qs_data): + parsed_dict = dict() + for key in qs_data: + parsed_dict[key.replace('CloudFront-', '')] = qs_data[key] + return parsed_dict + + def _policy_to_qs(self, policy, key): + sig = self._format_qsdata(policy['signatures'][key]) + sig['sessionID'] = policy['sessionID'] + return compat_urllib_parse_urlencode(sig, doseq=True) From 36071a8d06a61e461ef5aad3fa3bf583be97f13d Mon Sep 17 00:00:00 2001 From: thezero Date: Mon, 5 Aug 2019 22:49:13 +0200 Subject: [PATCH 2/5] [sproutvideo] Fix extractor --- youtube_dl/downloader/__init__.py | 3 -- youtube_dl/downloader/fragment.py | 3 -- youtube_dl/downloader/hls.py | 7 +++-- youtube_dl/downloader/http.py | 6 ++-- youtube_dl/extractor/sproutvideo.py | 49 ++++++++++++++++------------- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 28cc6a363..2e485df9d 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -43,9 +43,6 @@ def get_suitable_downloader(info_dict, params={}): if ed.can_download(info_dict): return ed - if info_dict.get('force_hlsdl') is True: - return HlsFD - if protocol.startswith('m3u8') and info_dict.get('is_live'): return FFmpegFD diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 449795bc7..02f35459e 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -121,8 +121,6 @@ class FragmentFD(FileDownloader): del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): - if 'hls' not in ctx: - ctx['hls'] = False if 'live' not in ctx: ctx['live'] = False if not ctx['live']: @@ -145,7 +143,6 @@ class FragmentFD(FileDownloader): 'retries': self.params.get('retries', 0), 'nopart': self.params.get('nopart', False), 'test': self.params.get('test', False), - 'hls': ctx['hls'], } ) tmpfilename = self.temp_name(ctx['filename']) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 84bfc4a87..80d006fb6 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -105,7 +105,6 @@ class HlsFD(FragmentFD): 'filename': filename, 'total_frags': media_frags, 'ad_frags': ad_frags, - 'hls': '#EXT-X-KEY:METHOD=AES-128' in s, } self._prepare_and_start_frag_download(ctx) @@ -176,8 +175,10 @@ class HlsFD(FragmentFD): iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() - frag_content = AES.new( - decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) + # We don't decrypt fragments during the test + if not test: + frag_content = AES.new( + decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) self._append_fragment(ctx, frag_content) # We only download the first fragment during the test if test: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 431483bc4..3c72ea18b 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -45,8 +45,7 @@ class HttpFD(FileDownloader): headers.update(add_headers) is_test = self.params.get('test', False) - is_hls = self.params.get('hls', False) - chunk_size = self._TEST_FILE_SIZE if is_test and not is_hls else ( + chunk_size = self._TEST_FILE_SIZE if is_test else ( info_dict.get('downloader_options', {}).get('http_chunk_size') or self.params.get('http_chunk_size') or 0) @@ -195,8 +194,7 @@ class HttpFD(FileDownloader): # However, for a test we still would like to download just a piece of a file. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control # block size when downloading a file. - # If we are using HLS we cannot cut the fragment because it will break the decryption. - if is_test and not is_hls and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): data_len = self._TEST_FILE_SIZE if data_len is not None: diff --git a/youtube_dl/extractor/sproutvideo.py b/youtube_dl/extractor/sproutvideo.py index 99cf3f727..1bf87ea9a 100644 --- a/youtube_dl/extractor/sproutvideo.py +++ b/youtube_dl/extractor/sproutvideo.py @@ -12,10 +12,11 @@ from ..compat import ( class SproutVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:|)//videos.sproutvideo.com/embed/(?P[a-f0-9]+)/[a-f0-9]+\??.*' + _NOSCHEMA_URL = r'//videos.sproutvideo.com/embed/(?P[a-f0-9]+)/[a-f0-9]+' + _VALID_URL = r'https?:%s' % _NOSCHEMA_URL _TEST = { 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3', - 'md5': '1343ce1a6cb39d67889bfa07c7b02b0e', + 'md5': 'fbc675bb97437e797d11d14d99563f50', 'info_dict': { 'id': '4c9dddb01910e3c9c4', 'ext': 'mp4', @@ -26,9 +27,14 @@ class SproutVideoIE(InfoExtractor): @staticmethod def _extract_url(webpage): sproutvideo = re.search( - r'(?:%s)[\'\"]' % SproutVideoIE._VALID_URL, webpage) + r'(?:(?:https?:|)%s[^\'\"]+)[\'\"]' % SproutVideoIE._NOSCHEMA_URL, webpage) if sproutvideo: - return sproutvideo.group('url') + video_url = sproutvideo.group('url') + # Fix the video URL if the iframe doesn't have a defined schema + if video_url[:2] == '//': + video_url = 'https:' + video_url + return video_url + def _real_extract(self, url): video_id = self._match_id(url) @@ -42,30 +48,31 @@ class SproutVideoIE(InfoExtractor): # signature->m for manifests # signature->k for keys # signature->t for segments - m_sig = self._policy_to_qs(parsed_data, 'm') - k_sig = self._policy_to_qs(parsed_data, 'k') - t_sig = self._policy_to_qs(parsed_data, 't') + m_sign = self._policy_to_qs(parsed_data, 'm') + k_sign = self._policy_to_qs(parsed_data, 'k') + t_sign = self._policy_to_qs(parsed_data, 't') - url = "https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}" - url = url.format(parsed_data['base'], - parsed_data['s3_user_hash'], - parsed_data['s3_video_hash'], - m_sig) + resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}' + resource_url = resource_url.format(parsed_data['base'], + parsed_data['s3_user_hash'], + parsed_data['s3_video_hash'], + m_sign) - formats = self._extract_m3u8_formats(url, video_id, 'mp4', 'm3u8_native', + formats = self._extract_m3u8_formats(resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) self._sort_formats(formats) - for i in range(len(formats)): - formats[i]['url'] = "{}?{}".format(formats[i]['url'], m_sig) + for entry in formats: + entry.update({ + 'url': '{0}?{1}'.format(entry['url'], m_sign), + 'extra_param_to_segment_url': t_sign, + 'extra_param_to_key_url': k_sign, + }) return { 'id': video_id, 'title': parsed_data['title'], 'formats': formats, - 'force_hlsdl': True, # currently FFmpeg is not supported - 'extra_param_to_segment_url': t_sig, - 'extra_param_to_key_url': k_sig } def _format_qsdata(self, qs_data): @@ -75,6 +82,6 @@ class SproutVideoIE(InfoExtractor): return parsed_dict def _policy_to_qs(self, policy, key): - sig = self._format_qsdata(policy['signatures'][key]) - sig['sessionID'] = policy['sessionID'] - return compat_urllib_parse_urlencode(sig, doseq=True) + sign = self._format_qsdata(policy['signatures'][key]) + sign['sessionID'] = policy['sessionID'] + return compat_urllib_parse_urlencode(sign, doseq=True) From c956f4a736a85fcb6de852e3156b2e5ffff31e3e Mon Sep 17 00:00:00 2001 From: thezero Date: Tue, 6 Aug 2019 20:32:00 +0200 Subject: [PATCH 3/5] [sproutvideo] Download multiple video in a single webpage --- youtube_dl/extractor/generic.py | 7 ++++--- youtube_dl/extractor/sproutvideo.py | 15 +++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 13dc6f34b..b7b64a0be 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3214,9 +3214,10 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( zype_urls, video_id, video_title, ie=ZypeIE.ie_key()) - sproutvideo_url = SproutVideoIE._extract_url(webpage) - if sproutvideo_url: - return self.url_result(sproutvideo_url) + sproutvideo_urls = SproutVideoIE._extract_urls(webpage) + if sproutvideo_urls: + return self.playlist_from_matches( + sproutvideo_urls, video_id, video_title, ie=SproutVideoIE.ie_key()) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') diff --git a/youtube_dl/extractor/sproutvideo.py b/youtube_dl/extractor/sproutvideo.py index 1bf87ea9a..85f58d3ba 100644 --- a/youtube_dl/extractor/sproutvideo.py +++ b/youtube_dl/extractor/sproutvideo.py @@ -25,16 +25,11 @@ class SproutVideoIE(InfoExtractor): } @staticmethod - def _extract_url(webpage): - sproutvideo = re.search( - r'(?:(?:https?:|)%s[^\'\"]+)[\'\"]' % SproutVideoIE._NOSCHEMA_URL, webpage) - if sproutvideo: - video_url = sproutvideo.group('url') - # Fix the video URL if the iframe doesn't have a defined schema - if video_url[:2] == '//': - video_url = 'https:' + video_url - return video_url - + def _extract_urls(webpage): + # Fix the video URL if the iframe doesn't have a defined schema + return [sprout.group('url') for sprout in re.finditer( + r'(?:(?:https?:|)%s[^\'\"]+)[\'\"]' % SproutVideoIE._NOSCHEMA_URL, + webpage)] def _real_extract(self, url): video_id = self._match_id(url) From 511ad55d78235cce7c0a9a0a2fe81c7658b4b767 Mon Sep 17 00:00:00 2001 From: thezero Date: Thu, 9 Apr 2020 01:09:00 +0200 Subject: [PATCH 4/5] [sproutvideo] improve HLS download, fix video detection --- youtube_dl/downloader/hls.py | 18 ++++++++++++----- youtube_dl/extractor/sproutvideo.py | 31 ++++++++++++++--------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 80d006fb6..56b173309 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -64,7 +64,7 @@ class HlsFD(FragmentFD): s = urlh.read().decode('utf-8', 'ignore') if not self.can_download(s, info_dict): - if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): + if info_dict.get('extra_param_to_segment_url') or info_dict.get('extra_param_to_key_url'): self.report_error('pycrypto not found. Please install it.') return False self.report_warning( @@ -115,13 +115,17 @@ class HlsFD(FragmentFD): extra_segment_query = None extra_key_query = None + extra_key_url = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + extra_param_to_key_url = info_dict.get('extra_param_to_key_url') if extra_param_to_segment_url: extra_segment_query = compat_urlparse.parse_qs(extra_param_to_segment_url) extra_key_query = compat_urlparse.parse_qs(extra_param_to_segment_url) - extra_param_to_key_url = info_dict.get('extra_param_to_key_url') if extra_param_to_key_url: - extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url) + if extra_param_to_key_url.startswith('http'): + extra_key_url = extra_param_to_key_url + else: + extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -174,8 +178,10 @@ class HlsFD(FragmentFD): if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() - # We don't decrypt fragments during the test + self._prepare_url(info_dict, decrypt_info['URI'])).read() + # Since "self._TEST_FILE_SIZE" is set to 10241 bytes, only those will be downloaded for the first fragment + # In case a fragment is bigger then 10241 bytes, the fragment will be cropped so AES-CBC decryption will fail. + # For this reason we can't decrypt fragments during the tests. if not test: frag_content = AES.new( decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) @@ -196,6 +202,8 @@ class HlsFD(FragmentFD): man_url, decrypt_info['URI']) if extra_key_query: decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_key_query) + elif extra_key_url: + decrypt_info['URI'] = extra_key_url if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): diff --git a/youtube_dl/extractor/sproutvideo.py b/youtube_dl/extractor/sproutvideo.py index 85f58d3ba..4e805ccde 100644 --- a/youtube_dl/extractor/sproutvideo.py +++ b/youtube_dl/extractor/sproutvideo.py @@ -12,7 +12,7 @@ from ..compat import ( class SproutVideoIE(InfoExtractor): - _NOSCHEMA_URL = r'//videos.sproutvideo.com/embed/(?P[a-f0-9]+)/[a-f0-9]+' + _NOSCHEMA_URL = r'//videos\.sproutvideo\.com/embed/(?P[a-f0-9]+)/[a-f0-9]+' _VALID_URL = r'https?:%s' % _NOSCHEMA_URL _TEST = { 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3', @@ -28,14 +28,14 @@ class SproutVideoIE(InfoExtractor): def _extract_urls(webpage): # Fix the video URL if the iframe doesn't have a defined schema return [sprout.group('url') for sprout in re.finditer( - r'(?:(?:https?:|)%s[^\'\"]+)[\'\"]' % SproutVideoIE._NOSCHEMA_URL, + r']+src=[\'"](?P(?:https?:|)%s[^\'"]+)[\'"]' % SproutVideoIE._NOSCHEMA_URL, webpage)] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - data = self._search_regex(r']+>var dat = \'([^\']+)\';', webpage, 'data') + data = self._search_regex(r"var\s+dat\s+=\s+'([^']+)';", webpage, 'data') data_decoded = compat_b64decode(data).decode('utf-8') parsed_data = self._parse_json(data_decoded, video_id) @@ -43,18 +43,15 @@ class SproutVideoIE(InfoExtractor): # signature->m for manifests # signature->k for keys # signature->t for segments - m_sign = self._policy_to_qs(parsed_data, 'm') - k_sign = self._policy_to_qs(parsed_data, 'k') - t_sign = self._policy_to_qs(parsed_data, 't') + m_sign = SproutVideoIE._policy_to_qs(parsed_data, 'm') + k_sign = SproutVideoIE._policy_to_qs(parsed_data, 'k') + t_sign = SproutVideoIE._policy_to_qs(parsed_data, 't') - resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}' - resource_url = resource_url.format(parsed_data['base'], - parsed_data['s3_user_hash'], - parsed_data['s3_video_hash'], - m_sign) + resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}'.format( + parsed_data['base'], parsed_data['s3_user_hash'], parsed_data['s3_video_hash'], m_sign) - formats = self._extract_m3u8_formats(resource_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) + formats = self._extract_m3u8_formats( + resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) self._sort_formats(formats) for entry in formats: @@ -70,13 +67,15 @@ class SproutVideoIE(InfoExtractor): 'formats': formats, } - def _format_qsdata(self, qs_data): + @staticmethod + def _format_qsdata(qs_data): parsed_dict = dict() for key in qs_data: parsed_dict[key.replace('CloudFront-', '')] = qs_data[key] return parsed_dict - def _policy_to_qs(self, policy, key): - sign = self._format_qsdata(policy['signatures'][key]) + @staticmethod + def _policy_to_qs(policy, key): + sign = SproutVideoIE._format_qsdata(policy['signatures'][key]) sign['sessionID'] = policy['sessionID'] return compat_urllib_parse_urlencode(sign, doseq=True) From 0d4e58840bcdc0d032fe80635ef92caee303f00f Mon Sep 17 00:00:00 2001 From: thezero Date: Thu, 9 Apr 2020 01:22:25 +0200 Subject: [PATCH 5/5] [vzaar] fix hls downloader key_url --- youtube_dl/extractor/vzaar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index b7d02fca3..c1ffa46a4 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -97,7 +97,7 @@ class VzaarIE(InfoExtractor): m3u8_id='hls', fatal=False) if hls_aes: for f in m3u8_formats: - f['_decryption_key_url'] = url_templ % ('goose', '') + qs + f['extra_param_to_key_url'] = url_templ % ('goose', '') + qs formats.extend(m3u8_formats) self._sort_formats(formats)