[nexx] Download using correct protection tokens (fixes #24309)

This commit is contained in:
FliegendeWurst 2020-04-13 11:33:03 +02:00
parent 00eb865b3c
commit dec0b636b9
2 changed files with 50 additions and 27 deletions

View File

@ -25,6 +25,24 @@ class FunkIE(InfoExtractor):
'upload_date': '20171229',
},
}, {
# protection tokens (available until 2020-07-29)
'url': 'https://www.funk.net/channel/the-job-lot-1199/unter-druck-185103/the-job-lot-staffel-1-1444',
'md5': '91330fc2708e3ef3fc7e259a341bf4d5',
'info_dict': {
'id': '185103',
'ext': 'mp4',
'title': 'Unter Druck',
'duration': 1459,
'upload_date': '20170419',
'timestamp': 1492618074,
'description': 'md5:0e752a4151c9a340db26f1455b735fa4',
},
'params': {
'format': 'azure-dash-1_V_video_7',
},
# really just want to test that it does not return a 400 Bad Request
'file_minsize': None,
}, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True,

View File

@ -37,7 +37,7 @@ class NexxIE(InfoExtractor):
'alt_title': 'Wie ein Test abläuft',
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
'duration': 2509,
'timestamp': 1384264416,
'upload_date': '20131112',
@ -51,7 +51,7 @@ class NexxIE(InfoExtractor):
'title': 'Return of the Golden Child (OV)',
'description': 'md5:5d969537509a92b733de21bae249dc63',
'release_year': 2017,
'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
'duration': 1397,
'timestamp': 1495033267,
'upload_date': '20170517',
@ -69,28 +69,14 @@ class NexxIE(InfoExtractor):
'info_dict': {
'id': '1269984',
'ext': 'mp4',
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
'thumbnail': r're:^https?://.*\.jpg$',
'title': '1 TAG ohne KLO... wortwörtlich! ?',
'alt_title': '1 TAG ohne KLO... wortwörtlich! ?',
'description': 'md5:1cf562de6653f76fbc589ecbdf985727',
'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
'duration': 607,
'timestamp': 1518614955,
'upload_date': '20180214',
},
}, {
# free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
'url': 'nexx:747:1533779',
'md5': '6bf6883912b82b7069fb86c2297e9893',
'info_dict': {
'id': '1533779',
'ext': 'mp4',
'title': 'Aufregung um ausgebrochene Raubtiere',
'alt_title': 'Eifel-Zoo',
'description': 'md5:f21375c91c74ad741dcb164c427999d2',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 111,
'timestamp': 1527874460,
'upload_date': '20180601',
},
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
@ -220,6 +206,9 @@ class NexxIE(InfoExtractor):
return formats
def _protection_tokens(self, video):
return try_get(video, lambda x: x['protectiondata'], dict)
def _extract_azure_formats(self, video, video_id):
stream_data = video['streamdata']
cdn = stream_data['cdnType']
@ -247,24 +236,39 @@ class NexxIE(InfoExtractor):
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
def add_token(url, token, key=None):
return url + '?%s=%s' % (key or 'hdnts', token)
azure_manifest_urls = {}
protection_data = self._protection_tokens(video)
# TODO: is this kind of token still in use? (perhaps add a test)
protection_token = try_get(
video, lambda x: x['protectiondata']['token'], compat_str)
protection_data, lambda x: x['token'], compat_str)
if protection_token:
azure_manifest_url += '?hdnts=%s' % protection_token
azure_manifest_url = add_token(azure_manifest_url, protection_token)
elif protection_data:
token_key = try_get(protection_data, lambda x: x['tokenReference'], compat_str)
for key in protection_data:
value = protection_data[key]
if isinstance(value, compat_str) and key.startswith('token') and len(key) > 5:
azure_manifest_urls[key[5:]] = add_token(azure_manifest_url, value, key=token_key)
formats = self._extract_m3u8_formats(
azure_manifest_url % '(format=m3u8-aapl)',
(try_get(azure_manifest_urls, lambda x: x['HLS'], compat_str) or azure_manifest_url) % '(format=m3u8-aapl)',
video_id, 'mp4', 'm3u8_native',
m3u8_id='%s-hls' % cdn, fatal=False)
formats.extend(self._extract_mpd_formats(
azure_manifest_url % '(format=mpd-time-csf)',
(try_get(azure_manifest_urls, lambda x: x['DASH'], compat_str) or azure_manifest_url) % '(format=mpd-time-csf)',
video_id, mpd_id='%s-dash' % cdn, fatal=False))
formats.extend(self._extract_ism_formats(
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
(try_get(azure_manifest_urls, lambda x: x['DASH'], compat_str) or azure_manifest_url) % '',
video_id, ism_id='%s-mss' % cdn, fatal=False))
azure_progressive_base = get_cdn_shield_base('Prog', True)
azure_file_distribution = stream_data.get('azureFileDistribution')
if azure_file_distribution:
# TODO: this download method does not work when tokenHLS/tokenDASH is present,
# (simply adding one of the tokens to the url does not fix the 400 Bad Request)
if azure_file_distribution and len(azure_manifest_urls) == 0:
fds = azure_file_distribution.split(',')
if fds:
for fd in fds:
@ -314,7 +318,8 @@ class NexxIE(InfoExtractor):
video = find_video(result)
# not all videos work via arc, e.g. nexx:741:1269984
if not video:
# some videos require extra protection tokens
if not video or self._protection_tokens(video):
# Reverse engineered from JS code (see getDeviceID function)
device_id = '%d:%d:%d%d' % (
random.randint(1, 4), int(time.time()),