[nexx] Download using correct protection tokens (fixes #24309)

2024-11-22 16:44:32 +01:00 · 2020-04-13 11:33:03 +02:00 · 2020-04-13 11:33:03 +02:00 · dec0b636b9
commit dec0b636b9
parent 00eb865b3c
2 changed files with 50 additions and 27 deletions
--- a/youtube_dl/extractor/funk.py
+++ b/youtube_dl/extractor/funk.py
@ -25,6 +25,24 @@ class FunkIE(InfoExtractor):
            'upload_date': '20171229',
        },
    }, {
        # protection tokens (available until 2020-07-29)
        'url': 'https://www.funk.net/channel/the-job-lot-1199/unter-druck-185103/the-job-lot-staffel-1-1444',
        'md5': '91330fc2708e3ef3fc7e259a341bf4d5',
        'info_dict': {
            'id': '185103',
            'ext': 'mp4',
            'title': 'Unter Druck',
            'duration': 1459,
            'upload_date': '20170419',
            'timestamp': 1492618074,
            'description': 'md5:0e752a4151c9a340db26f1455b735fa4',
        },
        'params': {
            'format': 'azure-dash-1_V_video_7',
        },
        # really just want to test that it does not return a 400 Bad Request
        'file_minsize': None,
    }, {
        'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
        'only_matching': True,
--- a/youtube_dl/extractor/nexx.py
+++ b/youtube_dl/extractor/nexx.py
@ -37,7 +37,7 @@ class NexxIE(InfoExtractor):
            'alt_title': 'Wie ein Test abläuft',
            'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
            'creator': 'SPIEGEL TV',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
            'duration': 2509,
            'timestamp': 1384264416,
            'upload_date': '20131112',
@ -51,7 +51,7 @@ class NexxIE(InfoExtractor):
            'title': 'Return of the Golden Child (OV)',
            'description': 'md5:5d969537509a92b733de21bae249dc63',
            'release_year': 2017,
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
            'duration': 1397,
            'timestamp': 1495033267,
            'upload_date': '20170517',
@ -69,28 +69,14 @@ class NexxIE(InfoExtractor):
        'info_dict': {
            'id': '1269984',
            'ext': 'mp4',
-            'title': '1 TAG ohne KLO... wortwörtlich! 😑',
+            'title': '1 TAG ohne KLO... wortwörtlich! ?',
-            'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
+            'alt_title': '1 TAG ohne KLO... wortwörtlich! ?',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:1cf562de6653f76fbc589ecbdf985727',
            'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
            'duration': 607,
            'timestamp': 1518614955,
            'upload_date': '20180214',
        },
    }, {
        # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
        'url': 'nexx:747:1533779',
        'md5': '6bf6883912b82b7069fb86c2297e9893',
        'info_dict': {
            'id': '1533779',
            'ext': 'mp4',
            'title': 'Aufregung um ausgebrochene Raubtiere',
            'alt_title': 'Eifel-Zoo',
            'description': 'md5:f21375c91c74ad741dcb164c427999d2',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 111,
            'timestamp': 1527874460,
            'upload_date': '20180601',
        },
    }, {
        'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
        'only_matching': True,
@ -220,6 +206,9 @@ class NexxIE(InfoExtractor):
        return formats
    def _protection_tokens(self, video):
        return try_get(video, lambda x: x['protectiondata'], dict)
    def _extract_azure_formats(self, video, video_id):
        stream_data = video['streamdata']
        cdn = stream_data['cdnType']
@ -247,24 +236,39 @@ class NexxIE(InfoExtractor):
        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
        def add_token(url, token, key=None):
            return url + '?%s=%s' % (key or 'hdnts', token)
        azure_manifest_urls = {}
        protection_data = self._protection_tokens(video)
        # TODO: is this kind of token still in use? (perhaps add a test)
        protection_token = try_get(
-            video, lambda x: x['protectiondata']['token'], compat_str)
+            protection_data, lambda x: x['token'], compat_str)
        if protection_token:
-            azure_manifest_url += '?hdnts=%s' % protection_token
+            azure_manifest_url = add_token(azure_manifest_url, protection_token)
        elif protection_data:
            token_key = try_get(protection_data, lambda x: x['tokenReference'], compat_str)
            for key in protection_data:
                value = protection_data[key]
                if isinstance(value, compat_str) and key.startswith('token') and len(key) > 5:
                    azure_manifest_urls[key[5:]] = add_token(azure_manifest_url, value, key=token_key)
        formats = self._extract_m3u8_formats(
-            azure_manifest_url % '(format=m3u8-aapl)',
+            (try_get(azure_manifest_urls, lambda x: x['HLS'], compat_str) or azure_manifest_url) % '(format=m3u8-aapl)',
            video_id, 'mp4', 'm3u8_native',
            m3u8_id='%s-hls' % cdn, fatal=False)
        formats.extend(self._extract_mpd_formats(
-            azure_manifest_url % '(format=mpd-time-csf)',
+            (try_get(azure_manifest_urls, lambda x: x['DASH'], compat_str) or azure_manifest_url) % '(format=mpd-time-csf)',
            video_id, mpd_id='%s-dash' % cdn, fatal=False))
        formats.extend(self._extract_ism_formats(
-            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+            (try_get(azure_manifest_urls, lambda x: x['DASH'], compat_str) or azure_manifest_url) % '',
            video_id, ism_id='%s-mss' % cdn, fatal=False))
        azure_progressive_base = get_cdn_shield_base('Prog', True)
        azure_file_distribution = stream_data.get('azureFileDistribution')
-        if azure_file_distribution:
+        # TODO: this download method does not work when tokenHLS/tokenDASH is present,
        # (simply adding one of the tokens to the url does not fix the 400 Bad Request)
        if azure_file_distribution and len(azure_manifest_urls) == 0:
            fds = azure_file_distribution.split(',')
            if fds:
                for fd in fds:
@ -314,7 +318,8 @@ class NexxIE(InfoExtractor):
                video = find_video(result)
        # not all videos work via arc, e.g. nexx:741:1269984
-        if not video:
+        # some videos require extra protection tokens
        if not video or self._protection_tokens(video):
            # Reverse engineered from JS code (see getDeviceID function)
            device_id = '%d:%d:%d%d' % (
                random.randint(1, 4), int(time.time()),