[sproutvideo] improve HLS download, fix video detection

This commit is contained in:
thezero 2020-04-09 01:09:00 +02:00
parent c956f4a736
commit 511ad55d78
2 changed files with 28 additions and 21 deletions

View File

@ -64,7 +64,7 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict): if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): if info_dict.get('extra_param_to_segment_url') or info_dict.get('extra_param_to_key_url'):
self.report_error('pycrypto not found. Please install it.') self.report_error('pycrypto not found. Please install it.')
return False return False
self.report_warning( self.report_warning(
@ -115,13 +115,17 @@ class HlsFD(FragmentFD):
extra_segment_query = None extra_segment_query = None
extra_key_query = None extra_key_query = None
extra_key_url = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
extra_param_to_key_url = info_dict.get('extra_param_to_key_url')
if extra_param_to_segment_url: if extra_param_to_segment_url:
extra_segment_query = compat_urlparse.parse_qs(extra_param_to_segment_url) extra_segment_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
extra_key_query = compat_urlparse.parse_qs(extra_param_to_segment_url) extra_key_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
extra_param_to_key_url = info_dict.get('extra_param_to_key_url')
if extra_param_to_key_url: if extra_param_to_key_url:
extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url) if extra_param_to_key_url.startswith('http'):
extra_key_url = extra_param_to_key_url
else:
extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url)
i = 0 i = 0
media_sequence = 0 media_sequence = 0
decrypt_info = {'METHOD': 'NONE'} decrypt_info = {'METHOD': 'NONE'}
@ -174,8 +178,10 @@ class HlsFD(FragmentFD):
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() self._prepare_url(info_dict, decrypt_info['URI'])).read()
# We don't decrypt fragments during the test # Since "self._TEST_FILE_SIZE" is set to 10241 bytes, only those will be downloaded for the first fragment
# In case a fragment is bigger then 10241 bytes, the fragment will be cropped so AES-CBC decryption will fail.
# For this reason we can't decrypt fragments during the tests.
if not test: if not test:
frag_content = AES.new( frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
@ -196,6 +202,8 @@ class HlsFD(FragmentFD):
man_url, decrypt_info['URI']) man_url, decrypt_info['URI'])
if extra_key_query: if extra_key_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_key_query) decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_key_query)
elif extra_key_url:
decrypt_info['URI'] = extra_key_url
if decrypt_url != decrypt_info['URI']: if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None decrypt_info['KEY'] = None
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):

View File

@ -12,7 +12,7 @@ from ..compat import (
class SproutVideoIE(InfoExtractor): class SproutVideoIE(InfoExtractor):
_NOSCHEMA_URL = r'//videos.sproutvideo.com/embed/(?P<id>[a-f0-9]+)/[a-f0-9]+' _NOSCHEMA_URL = r'//videos\.sproutvideo\.com/embed/(?P<id>[a-f0-9]+)/[a-f0-9]+'
_VALID_URL = r'https?:%s' % _NOSCHEMA_URL _VALID_URL = r'https?:%s' % _NOSCHEMA_URL
_TEST = { _TEST = {
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3', 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
@ -28,14 +28,14 @@ class SproutVideoIE(InfoExtractor):
def _extract_urls(webpage): def _extract_urls(webpage):
# Fix the video URL if the iframe doesn't have a defined schema # Fix the video URL if the iframe doesn't have a defined schema
return [sprout.group('url') for sprout in re.finditer( return [sprout.group('url') for sprout in re.finditer(
r'(?:<iframe\s+class=[\'\"]sproutvideo-player.*src|href)=[\'\"](?P<url>(?:https?:|)%s[^\'\"]+)[\'\"]' % SproutVideoIE._NOSCHEMA_URL, r'<iframe[^>]+src=[\'"](?P<url>(?:https?:|)%s[^\'"]+)[\'"]' % SproutVideoIE._NOSCHEMA_URL,
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data = self._search_regex(r'<script[^>]+>var dat = \'([^\']+)\';</script>', webpage, 'data') data = self._search_regex(r"var\s+dat\s+=\s+'([^']+)';", webpage, 'data')
data_decoded = compat_b64decode(data).decode('utf-8') data_decoded = compat_b64decode(data).decode('utf-8')
parsed_data = self._parse_json(data_decoded, video_id) parsed_data = self._parse_json(data_decoded, video_id)
@ -43,18 +43,15 @@ class SproutVideoIE(InfoExtractor):
# signature->m for manifests # signature->m for manifests
# signature->k for keys # signature->k for keys
# signature->t for segments # signature->t for segments
m_sign = self._policy_to_qs(parsed_data, 'm') m_sign = SproutVideoIE._policy_to_qs(parsed_data, 'm')
k_sign = self._policy_to_qs(parsed_data, 'k') k_sign = SproutVideoIE._policy_to_qs(parsed_data, 'k')
t_sign = self._policy_to_qs(parsed_data, 't') t_sign = SproutVideoIE._policy_to_qs(parsed_data, 't')
resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}' resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}'.format(
resource_url = resource_url.format(parsed_data['base'], parsed_data['base'], parsed_data['s3_user_hash'], parsed_data['s3_video_hash'], m_sign)
parsed_data['s3_user_hash'],
parsed_data['s3_video_hash'],
m_sign)
formats = self._extract_m3u8_formats(resource_url, video_id, 'mp4', entry_protocol='m3u8_native', formats = self._extract_m3u8_formats(
m3u8_id='hls', fatal=False) resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(formats) self._sort_formats(formats)
for entry in formats: for entry in formats:
@ -70,13 +67,15 @@ class SproutVideoIE(InfoExtractor):
'formats': formats, 'formats': formats,
} }
def _format_qsdata(self, qs_data): @staticmethod
def _format_qsdata(qs_data):
parsed_dict = dict() parsed_dict = dict()
for key in qs_data: for key in qs_data:
parsed_dict[key.replace('CloudFront-', '')] = qs_data[key] parsed_dict[key.replace('CloudFront-', '')] = qs_data[key]
return parsed_dict return parsed_dict
def _policy_to_qs(self, policy, key): @staticmethod
sign = self._format_qsdata(policy['signatures'][key]) def _policy_to_qs(policy, key):
sign = SproutVideoIE._format_qsdata(policy['signatures'][key])
sign['sessionID'] = policy['sessionID'] sign['sessionID'] = policy['sessionID']
return compat_urllib_parse_urlencode(sign, doseq=True) return compat_urllib_parse_urlencode(sign, doseq=True)