From a7effddc0c741f6035d56b6b256ad1817ba36577 Mon Sep 17 00:00:00 2001 From: Itay Perl Date: Fri, 6 Mar 2020 13:30:30 +0200 Subject: [PATCH] reshet: new CDN --- youtube_dl/extractor/reshet.py | 63 +++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/reshet.py b/youtube_dl/extractor/reshet.py index f5cd21183..1d30048d9 100644 --- a/youtube_dl/extractor/reshet.py +++ b/youtube_dl/extractor/reshet.py @@ -4,17 +4,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +import os from ..utils import ( + js_to_json, urljoin, ) class ReshetIE(InfoExtractor): - _VALID_URL = r'https?://13tv\.co\.il/item/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/?' + _VALID_URL = r'https?://13tv\.co\.il/(?:(?:item/)?[^/]+/[^/]+/[^/]+/[^/]+(?:/(?P[^/]+))?/?|live/?)' _TEST = { 'url': 'https://13tv.co.il/item/entertainment/gav-hauma/season-10/episodes/jz1a1-1028855', - 'note': 'Test brightcove URL extraction', + 'note': 'Test URL extraction', 'info_dict': { 'id': '6015811232001', 'ext': 'mp4', @@ -26,17 +28,60 @@ class ReshetIE(InfoExtractor): } def _real_extract(self, url): - reshet_id = self._match_id(url) + if re.search('live/?$', url): + reshet_id = 'live' + else: + reshet_id = self._match_id(url) page = self._download_webpage(url, reshet_id) - data = self._parse_json(re.search(r'var initial_data = (.*?);\n', page).group(1), reshet_id) - item = data['items'][str(data['curItem'])] - brightcove_id = item['video']['videoID'] + + data = self._parse_json(re.search(r'window.data_query = (.*?).data_query;\n', page).group(1), reshet_id) + + liveId = data.get('header', {}).get('Live', {}).get('videoId') + curItem = data.get('curItem') + + if liveId is None and curItem is None: + # create a playlist result + entries = [] + + for item in data['items'].values(): + if item.get('video') is None: + continue + + entries.append(self.url_result(item['link'], + video_id=item['post_ID'], + video_title=item['title'])) + + return self.playlist_result(entries) main_js_url = urljoin(url, re.search(r'