This commit is contained in:
TheZ3ro 2020-10-23 11:20:01 -04:00 committed by GitHub
commit 776e31f7aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 126 additions and 11 deletions

View File

@ -64,7 +64,7 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict): if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): if info_dict.get('extra_param_to_segment_url') or info_dict.get('extra_param_to_key_url'):
self.report_error('pycrypto not found. Please install it.') self.report_error('pycrypto not found. Please install it.')
return False return False
self.report_warning( self.report_warning(
@ -113,10 +113,19 @@ class HlsFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
test = self.params.get('test', False) test = self.params.get('test', False)
extra_query = None extra_segment_query = None
extra_key_query = None
extra_key_url = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
extra_param_to_key_url = info_dict.get('extra_param_to_key_url')
if extra_param_to_segment_url: if extra_param_to_segment_url:
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) extra_segment_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
extra_key_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
if extra_param_to_key_url:
if extra_param_to_key_url.startswith('http'):
extra_key_url = extra_param_to_key_url
else:
extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url)
i = 0 i = 0
media_sequence = 0 media_sequence = 0
decrypt_info = {'METHOD': 'NONE'} decrypt_info = {'METHOD': 'NONE'}
@ -136,8 +145,8 @@ class HlsFD(FragmentFD):
line line
if re.match(r'^https?://', line) if re.match(r'^https?://', line)
else compat_urlparse.urljoin(man_url, line)) else compat_urlparse.urljoin(man_url, line))
if extra_query: if extra_segment_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_segment_query)
count = 0 count = 0
headers = info_dict.get('http_headers', {}) headers = info_dict.get('http_headers', {})
if byte_range: if byte_range:
@ -169,9 +178,13 @@ class HlsFD(FragmentFD):
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() self._prepare_url(info_dict, decrypt_info['URI'])).read()
frag_content = AES.new( # Since "self._TEST_FILE_SIZE" is set to 10241 bytes, only those will be downloaded for the first fragment
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) # In case a fragment is bigger then 10241 bytes, the fragment will be cropped so AES-CBC decryption will fail.
# For this reason we can't decrypt fragments during the tests.
if not test:
frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)
# We only download the first fragment during the test # We only download the first fragment during the test
if test: if test:
@ -187,8 +200,10 @@ class HlsFD(FragmentFD):
if not re.match(r'^https?://', decrypt_info['URI']): if not re.match(r'^https?://', decrypt_info['URI']):
decrypt_info['URI'] = compat_urlparse.urljoin( decrypt_info['URI'] = compat_urlparse.urljoin(
man_url, decrypt_info['URI']) man_url, decrypt_info['URI'])
if extra_query: if extra_key_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_key_query)
elif extra_key_url:
decrypt_info['URI'] = extra_key_url
if decrypt_url != decrypt_info['URI']: if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None decrypt_info['KEY'] = None
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):

View File

@ -1065,6 +1065,7 @@ from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
from .sproutvideo import SproutVideoIE
from .srgssr import ( from .srgssr import (
SRGSSRIE, SRGSSRIE,
SRGSSRPlayIE, SRGSSRPlayIE,

View File

@ -119,6 +119,7 @@ from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .sproutvideo import SproutVideoIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2151,6 +2152,18 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# SproutVideo iframe in page
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
},
'params': {
'skip_download': True,
},
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -3213,6 +3226,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
zype_urls, video_id, video_title, ie=ZypeIE.ie_key()) zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
sproutvideo_urls = SproutVideoIE._extract_urls(webpage)
if sproutvideo_urls:
return self.playlist_from_matches(
sproutvideo_urls, video_id, video_title, ie=SproutVideoIE.ie_key())
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries:

View File

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_urllib_parse_urlencode,
)
class SproutVideoIE(InfoExtractor):
_NOSCHEMA_URL = r'//videos\.sproutvideo\.com/embed/(?P<id>[a-f0-9]+)/[a-f0-9]+'
_VALID_URL = r'https?:%s' % _NOSCHEMA_URL
_TEST = {
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
'md5': 'fbc675bb97437e797d11d14d99563f50',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
}
}
@staticmethod
def _extract_urls(webpage):
# Fix the video URL if the iframe doesn't have a defined schema
return [sprout.group('url') for sprout in re.finditer(
r'<iframe[^>]+src=[\'"](?P<url>(?:https?:|)%s[^\'"]+)[\'"]' % SproutVideoIE._NOSCHEMA_URL,
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_regex(r"var\s+dat\s+=\s+'([^']+)';", webpage, 'data')
data_decoded = compat_b64decode(data).decode('utf-8')
parsed_data = self._parse_json(data_decoded, video_id)
# https://github.com/ytdl-org/youtube-dl/issues/16996#issuecomment-406901324
# signature->m for manifests
# signature->k for keys
# signature->t for segments
m_sign = SproutVideoIE._policy_to_qs(parsed_data, 'm')
k_sign = SproutVideoIE._policy_to_qs(parsed_data, 'k')
t_sign = SproutVideoIE._policy_to_qs(parsed_data, 't')
resource_url = 'https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}'.format(
parsed_data['base'], parsed_data['s3_user_hash'], parsed_data['s3_video_hash'], m_sign)
formats = self._extract_m3u8_formats(
resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(formats)
for entry in formats:
entry.update({
'url': '{0}?{1}'.format(entry['url'], m_sign),
'extra_param_to_segment_url': t_sign,
'extra_param_to_key_url': k_sign,
})
return {
'id': video_id,
'title': parsed_data['title'],
'formats': formats,
}
@staticmethod
def _format_qsdata(qs_data):
parsed_dict = dict()
for key in qs_data:
parsed_dict[key.replace('CloudFront-', '')] = qs_data[key]
return parsed_dict
@staticmethod
def _policy_to_qs(policy, key):
sign = SproutVideoIE._format_qsdata(policy['signatures'][key])
sign['sessionID'] = policy['sessionID']
return compat_urllib_parse_urlencode(sign, doseq=True)

View File

@ -97,7 +97,7 @@ class VzaarIE(InfoExtractor):
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
if hls_aes: if hls_aes:
for f in m3u8_formats: for f in m3u8_formats:
f['_decryption_key_url'] = url_templ % ('goose', '') + qs f['extra_param_to_key_url'] = url_templ % ('goose', '') + qs
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
self._sort_formats(formats) self._sort_formats(formats)