From 5583153b36f95416d6bb01aca6745a25bed8f99a Mon Sep 17 00:00:00 2001 From: 0x9fff00 <0x9fff00+git@protonmail.ch> Date: Wed, 1 May 2019 17:19:34 +0200 Subject: [PATCH] [europaconsilium] Add extractor (closes #20679) --- youtube_dl/extractor/europaconsilium.py | 49 +++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/kaltura.py | 23 ++++++++++-- 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 youtube_dl/extractor/europaconsilium.py diff --git a/youtube_dl/extractor/europaconsilium.py b/youtube_dl/extractor/europaconsilium.py new file mode 100644 index 000000000..27c1b9f17 --- /dev/null +++ b/youtube_dl/extractor/europaconsilium.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + NO_DEFAULT, + smuggle_url, + str_or_none, +) + + +class EuropaConsiliumIE(InfoExtractor): + _VALID_URL = r'https?://video.consilium.europa.eu/(?P[a-z]{2})/(?:webcast|embed)/(?P[a-f0-9-]+)' + _TEST = { + 'url': 'https://video.consilium.europa.eu/en/webcast/6c841728-4a85-40ac-8536-5b91f1a65fa9', + 'md5': 'befcce5d4de2ba9b045680135ccfe3bc', + 'info_dict': { + 'id': '0_2sj82qqy', + 'ext': 'mp4', + 'title': 'Agriculture and Fisheries Council - Public session', + 'timestamp': 1551889485, + 'upload_date': '20190318', + 'uploader_id': 'cms', + } + } + + def _real_extract(self, url): + lang, video_id = re.match(self._VALID_URL, url).groups() + info = self._download_json( + 'https://councilconnect.streamamg.com/api/%s/webcasts/%s' % ( + lang, video_id), video_id) + entry_id = info['EntryId'] + webpage = self._download_webpage(url, video_id, fatal=False) + partner_id = '3000261' + if webpage: + partner_id = self._search_regex( + r'data-partnerid\s*=\s*(["\'])(?P\d+)\1', webpage, + 'partner id', default=partner_id, fatal=False, group='id') + + return { + '_type': 'url_transparent', + 'url': smuggle_url('kaltura:%s:%s' % (partner_id, entry_id), {'service_url': 'https://open.http.mp.streamamg.com'}), + 'ie_key': 'Kaltura', + 'title': self._og_search_title(webpage, default=info.get('Title') or NO_DEFAULT), + 'thumbnail': self._og_search_thumbnail(webpage), + 'upload_date': str_or_none(info.get('ScheduleDay')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c10bcbcc1..bd556bc1d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -332,6 +332,7 @@ from .espn import ( ) from .esri import EsriVideoIE from .europa import EuropaIE +from .europaconsilium import EuropaConsiliumIE from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE from .expressen import ExpressenIE diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 2d38b758b..f7cdae13d 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -6,8 +6,9 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_parse_qs, + compat_str, + compat_urlparse, ) from ..utils import ( clean_html, @@ -317,10 +318,23 @@ class KalturaIE(InfoExtractor): if f.get('isOriginal') is True and not self._is_valid_url( video_url, entry_id, format_id): continue - # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g + # audio-only has no or empty videoCodecId (e.g. kaltura:1926081:0_c03e1b5g # -f mp4-56) - vcodec = 'none' if 'videoCodecId' not in f and f.get( + vcodec = 'none' if not f.get('videoCodecId') and f.get( 'frameRate') == 0 else f.get('videoCodecId') + + tags = f.get('tags') + language = None + language_preference = -1 + format_note = None + if tags and isinstance(tags, compat_str): + language = self._search_regex(r'(?:^|,)lang([a-z]{2})(?:$|,)', + tags, 'language', default=None) + if 'defaultlang' in tags: + language_preference = 10 + format_note = self._search_regex(r'(?:^|,)label([^,]+)(?:$|,)', + tags, 'label', default=None) + formats.append({ 'format_id': format_id, 'ext': f.get('fileExt'), @@ -331,6 +345,9 @@ class KalturaIE(InfoExtractor): 'vcodec': vcodec, 'height': int_or_none(f.get('height')), 'width': int_or_none(f.get('width')), + 'language': language, + 'language_preference': language_preference, + 'format_note': format_note, 'url': video_url, }) if '/playManifest/' in data_url: