[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel). Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage. Current extractor find call of Yandex stream player, parse parameters and find information about program.
2024-11-22 16:44:32 +01:00 · 2020-10-08 13:01:24 +03:00 · 2020-10-08 13:01:24 +03:00 · 01dcf9ddea
commit 01dcf9ddea
parent d65d89183f
1 changed files with 40 additions and 8 deletions
--- a/youtube_dl/extractor/yandexvideo.py
+++ b/youtube_dl/extractor/yandexvideo.py
@ -7,17 +7,20 @@ from ..utils import (
    int_or_none,
    url_or_none,
 )
+import re
+import json


 class YandexVideoIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
-                            yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
+                            yandex\.ru(?:/portal/(?:video|efir)|/efir|/video)?/?\?.*?(stream_id=|stream_channel=)|
                            frontend\.vh\.yandex\.ru/player/
                        )
-                        (?P<id>[\da-f]+)
+                        (?P<id>[\d\w]+)
                    '''
+
    _TESTS = [{
        'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
        'md5': '33955d7ae052f15853dc41f35f17581c',
@ -52,17 +55,46 @@ class YandexVideoIE(InfoExtractor):
        # DASH with DRM
        'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
        'only_matching': True,
+    }, {
+        # Efir for program
+        'url': 'https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo',
+        'only_matching': True,
+    }, {
+        # Efir for channel
+        'url': 'https://yandex.ru/efir?stream_channel=1335',
+        'only_matching': True,
    }]

+    def split_params(self, txt: str) -> []:
+        braket = 0
+        line = ''
+        items = []
+        for c in txt:
+            if c == '{':
+                braket += 1
+            elif c == '}':
+                braket -= 1
+            if c == ',' and braket == 0:
+                items.append(line)
+                line = ''
+            else:
+                line += c
+        if line != '':
+            items.append(line)
+        return items
+
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        content = self._download_json(
-            'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
-            video_id, query={
-                'stream_options': 'hires',
-                'disable_trackings': 1,
-            })['content']
+        html = self._download_webpage(url, video_id)
+
+        # search call of Yandex stream player
+        func_params = self._search_regex(r'\s*Ya\.streamModules\.player\.play\(\s*([\w\W\\R\"]*)\,\s*true\);\s*',
+                                         html, 'Yandex player', flags=re.DOTALL | re.MULTILINE)
+        params = self.split_params(func_params)
+        json_text = params[1]
+        content = json.loads(json_text)
+        content = content.get('program')

        content_url = url_or_none(content.get('content_url')) or url_or_none(
            content['streams'][0]['url'])