From 01dcf9ddea32ed854eb91f110dc5d639fc1664d8 Mon Sep 17 00:00:00 2001 From: devbreath Date: Thu, 8 Oct 2020 13:01:24 +0300 Subject: [PATCH] [yandex video][yandex efir] fixed extractor Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel). Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage. Current extractor find call of Yandex stream player, parse parameters and find information about program. --- youtube_dl/extractor/yandexvideo.py | 48 ++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py index 46529be05..44d844adc 100644 --- a/youtube_dl/extractor/yandexvideo.py +++ b/youtube_dl/extractor/yandexvideo.py @@ -7,17 +7,20 @@ from ..utils import ( int_or_none, url_or_none, ) +import re +import json class YandexVideoIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=| + yandex\.ru(?:/portal/(?:video|efir)|/efir|/video)?/?\?.*?(stream_id=|stream_channel=)| frontend\.vh\.yandex\.ru/player/ ) - (?P[\da-f]+) + (?P[\d\w]+) ''' + _TESTS = [{ 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d', 'md5': '33955d7ae052f15853dc41f35f17581c', @@ -52,17 +55,46 @@ class YandexVideoIE(InfoExtractor): # DASH with DRM 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', 'only_matching': True, + }, { + # Efir for program + 'url': 'https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo', + 'only_matching': True, + }, { + # Efir for channel + 'url': 'https://yandex.ru/efir?stream_channel=1335', + 'only_matching': True, }] + def split_params(self, txt: str) -> []: + braket = 0 + line = '' + items = [] + for c in txt: + if c == '{': + braket += 1 + elif c == '}': + braket -= 1 + if c == ',' and braket == 0: + items.append(line) + line = '' + else: + line += c + if line != '': + items.append(line) + return items + def _real_extract(self, url): video_id = self._match_id(url) - content = self._download_json( - 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id, - video_id, query={ - 'stream_options': 'hires', - 'disable_trackings': 1, - })['content'] + html = self._download_webpage(url, video_id) + + # search call of Yandex stream player + func_params = self._search_regex(r'\s*Ya\.streamModules\.player\.play\(\s*([\w\W\\R\"]*)\,\s*true\);\s*', + html, 'Yandex player', flags=re.DOTALL | re.MULTILINE) + params = self.split_params(func_params) + json_text = params[1] + content = json.loads(json_text) + content = content.get('program') content_url = url_or_none(content.get('content_url')) or url_or_none( content['streams'][0]['url'])