[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
This commit is contained in:
devbreath 2020-10-08 13:01:24 +03:00 committed by GitHub
parent d65d89183f
commit 01dcf9ddea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 40 additions and 8 deletions

View File

@ -7,17 +7,20 @@ from ..utils import (
int_or_none,
url_or_none,
)
import re
import json
class YandexVideoIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
yandex\.ru(?:/portal/(?:video|efir)|/efir|/video)?/?\?.*?(stream_id=|stream_channel=)|
frontend\.vh\.yandex\.ru/player/
)
(?P<id>[\da-f]+)
(?P<id>[\d\w]+)
'''
_TESTS = [{
'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
'md5': '33955d7ae052f15853dc41f35f17581c',
@ -52,17 +55,46 @@ class YandexVideoIE(InfoExtractor):
# DASH with DRM
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
'only_matching': True,
}, {
# Efir for program
'url': 'https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo',
'only_matching': True,
}, {
# Efir for channel
'url': 'https://yandex.ru/efir?stream_channel=1335',
'only_matching': True,
}]
def split_params(self, txt: str) -> []:
braket = 0
line = ''
items = []
for c in txt:
if c == '{':
braket += 1
elif c == '}':
braket -= 1
if c == ',' and braket == 0:
items.append(line)
line = ''
else:
line += c
if line != '':
items.append(line)
return items
def _real_extract(self, url):
video_id = self._match_id(url)
content = self._download_json(
'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
video_id, query={
'stream_options': 'hires',
'disable_trackings': 1,
})['content']
html = self._download_webpage(url, video_id)
# search call of Yandex stream player
func_params = self._search_regex(r'\s*Ya\.streamModules\.player\.play\(\s*([\w\W\\R\"]*)\,\s*true\);\s*',
html, 'Yandex player', flags=re.DOTALL | re.MULTILINE)
params = self.split_params(func_params)
json_text = params[1]
content = json.loads(json_text)
content = content.get('program')
content_url = url_or_none(content.get('content_url')) or url_or_none(
content['streams'][0]['url'])