[nexx] Download using correct protection tokens (fixes #24309)

2024-11-22 08:34:32 +01:00 · 2020-04-13 11:33:03 +02:00 · 2020-04-13 11:33:03 +02:00 · dec0b636b9
commit dec0b636b9
parent 00eb865b3c
2 changed files with 50 additions and 27 deletions
--- a/youtube_dl/extractor/funk.py
+++ b/youtube_dl/extractor/funk.py
@ -25,6 +25,24 @@ class FunkIE(InfoExtractor):
            'upload_date': '20171229',
        },

+    }, {
+        # protection tokens (available until 2020-07-29)
+        'url': 'https://www.funk.net/channel/the-job-lot-1199/unter-druck-185103/the-job-lot-staffel-1-1444',
+        'md5': '91330fc2708e3ef3fc7e259a341bf4d5',
+        'info_dict': {
+            'id': '185103',
+            'ext': 'mp4',
+            'title': 'Unter Druck',
+            'duration': 1459,
+            'upload_date': '20170419',
+            'timestamp': 1492618074,
+            'description': 'md5:0e752a4151c9a340db26f1455b735fa4',
+        },
+        'params': {
+            'format': 'azure-dash-1_V_video_7',
+        },
+        # really just want to test that it does not return a 400 Bad Request
+        'file_minsize': None,
    }, {
        'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
        'only_matching': True,
--- a/youtube_dl/extractor/nexx.py
+++ b/youtube_dl/extractor/nexx.py
@ -37,7 +37,7 @@ class NexxIE(InfoExtractor):
            'alt_title': 'Wie ein Test abläuft',
            'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
            'creator': 'SPIEGEL TV',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
            'duration': 2509,
            'timestamp': 1384264416,
            'upload_date': '20131112',
@ -51,7 +51,7 @@ class NexxIE(InfoExtractor):
            'title': 'Return of the Golden Child (OV)',
            'description': 'md5:5d969537509a92b733de21bae249dc63',
            'release_year': 2017,
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
            'duration': 1397,
            'timestamp': 1495033267,
            'upload_date': '20170517',
@ -69,28 +69,14 @@ class NexxIE(InfoExtractor):
        'info_dict': {
            'id': '1269984',
            'ext': 'mp4',
-            'title': '1 TAG ohne KLO... wortwörtlich! 😑',
-            'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': '1 TAG ohne KLO... wortwörtlich! ?',
+            'alt_title': '1 TAG ohne KLO... wortwörtlich! ?',
+            'description': 'md5:1cf562de6653f76fbc589ecbdf985727',
+            'thumbnail': r're:^https?://.*\.(?:jpg|webp)$',
            'duration': 607,
            'timestamp': 1518614955,
            'upload_date': '20180214',
        },
-    }, {
-        # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
-        'url': 'nexx:747:1533779',
-        'md5': '6bf6883912b82b7069fb86c2297e9893',
-        'info_dict': {
-            'id': '1533779',
-            'ext': 'mp4',
-            'title': 'Aufregung um ausgebrochene Raubtiere',
-            'alt_title': 'Eifel-Zoo',
-            'description': 'md5:f21375c91c74ad741dcb164c427999d2',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 111,
-            'timestamp': 1527874460,
-            'upload_date': '20180601',
-        },
    }, {
        'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
        'only_matching': True,
@ -220,6 +206,9 @@ class NexxIE(InfoExtractor):

        return formats

+    def _protection_tokens(self, video):
+        return try_get(video, lambda x: x['protectiondata'], dict)
+
    def _extract_azure_formats(self, video, video_id):
        stream_data = video['streamdata']
        cdn = stream_data['cdnType']
@ -247,24 +236,39 @@ class NexxIE(InfoExtractor):
        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'

+        def add_token(url, token, key=None):
+            return url + '?%s=%s' % (key or 'hdnts', token)
+        azure_manifest_urls = {}
+
+        protection_data = self._protection_tokens(video)
+        # TODO: is this kind of token still in use? (perhaps add a test)
        protection_token = try_get(
-            video, lambda x: x['protectiondata']['token'], compat_str)
+            protection_data, lambda x: x['token'], compat_str)
        if protection_token:
-            azure_manifest_url += '?hdnts=%s' % protection_token
+            azure_manifest_url = add_token(azure_manifest_url, protection_token)
+        elif protection_data:
+            token_key = try_get(protection_data, lambda x: x['tokenReference'], compat_str)
+            for key in protection_data:
+                value = protection_data[key]
+                if isinstance(value, compat_str) and key.startswith('token') and len(key) > 5:
+                    azure_manifest_urls[key[5:]] = add_token(azure_manifest_url, value, key=token_key)

        formats = self._extract_m3u8_formats(
-            azure_manifest_url % '(format=m3u8-aapl)',
+            (try_get(azure_manifest_urls, lambda x: x['HLS'], compat_str) or azure_manifest_url) % '(format=m3u8-aapl)',
            video_id, 'mp4', 'm3u8_native',
            m3u8_id='%s-hls' % cdn, fatal=False)
        formats.extend(self._extract_mpd_formats(
-            azure_manifest_url % '(format=mpd-time-csf)',
+            (try_get(azure_manifest_urls, lambda x: x['DASH'], compat_str) or azure_manifest_url) % '(format=mpd-time-csf)',
            video_id, mpd_id='%s-dash' % cdn, fatal=False))
        formats.extend(self._extract_ism_formats(
-            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+            (try_get(azure_manifest_urls, lambda x: x['DASH'], compat_str) or azure_manifest_url) % '',
+            video_id, ism_id='%s-mss' % cdn, fatal=False))

        azure_progressive_base = get_cdn_shield_base('Prog', True)
        azure_file_distribution = stream_data.get('azureFileDistribution')
-        if azure_file_distribution:
+        # TODO: this download method does not work when tokenHLS/tokenDASH is present,
+        # (simply adding one of the tokens to the url does not fix the 400 Bad Request)
+        if azure_file_distribution and len(azure_manifest_urls) == 0:
            fds = azure_file_distribution.split(',')
            if fds:
                for fd in fds:
@ -314,7 +318,8 @@ class NexxIE(InfoExtractor):
                video = find_video(result)

        # not all videos work via arc, e.g. nexx:741:1269984
-        if not video:
+        # some videos require extra protection tokens
+        if not video or self._protection_tokens(video):
            # Reverse engineered from JS code (see getDeviceID function)
            device_id = '%d:%d:%d%d' % (
                random.randint(1, 4), int(time.time()),