[europaconsilium] Add extractor (closes #20679)

2024-11-22 16:44:32 +01:00 · 2019-05-01 17:19:34 +02:00 · 2019-05-01 17:19:34 +02:00 · 5583153b36
commit 5583153b36
parent c4bd9cb7bb
3 changed files with 70 additions and 3 deletions
--- a/youtube_dl/extractor/europaconsilium.py
+++ b/youtube_dl/extractor/europaconsilium.py
@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    NO_DEFAULT,
+    smuggle_url,
+    str_or_none,
+)
+
+
+class EuropaConsiliumIE(InfoExtractor):
+    _VALID_URL = r'https?://video.consilium.europa.eu/(?P<lang>[a-z]{2})/(?:webcast|embed)/(?P<id>[a-f0-9-]+)'
+    _TEST = {
+        'url': 'https://video.consilium.europa.eu/en/webcast/6c841728-4a85-40ac-8536-5b91f1a65fa9',
+        'md5': 'befcce5d4de2ba9b045680135ccfe3bc',
+        'info_dict': {
+            'id': '0_2sj82qqy',
+            'ext': 'mp4',
+            'title': 'Agriculture and Fisheries Council  - Public session',
+            'timestamp': 1551889485,
+            'upload_date': '20190318',
+            'uploader_id': 'cms',
+        }
+    }
+
+    def _real_extract(self, url):
+        lang, video_id = re.match(self._VALID_URL, url).groups()
+        info = self._download_json(
+            'https://councilconnect.streamamg.com/api/%s/webcasts/%s' % (
+                lang, video_id), video_id)
+        entry_id = info['EntryId']
+        webpage = self._download_webpage(url, video_id, fatal=False)
+        partner_id = '3000261'
+        if webpage:
+            partner_id = self._search_regex(
+                r'data-partnerid\s*=\s*(["\'])(?P<id>\d+)\1', webpage,
+                'partner id', default=partner_id, fatal=False, group='id')
+
+        return {
+            '_type': 'url_transparent',
+            'url': smuggle_url('kaltura:%s:%s' % (partner_id, entry_id), {'service_url': 'https://open.http.mp.streamamg.com'}),
+            'ie_key': 'Kaltura',
+            'title': self._og_search_title(webpage, default=info.get('Title') or NO_DEFAULT),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': str_or_none(info.get('ScheduleDay')),
+        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -332,6 +332,7 @@ from .espn import (
 )
 from .esri import EsriVideoIE
 from .europa import EuropaIE
+from .europaconsilium import EuropaConsiliumIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .expotv import ExpoTVIE
 from .expressen import ExpressenIE
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@ -6,8 +6,9 @@ import base64

 from .common import InfoExtractor
 from ..compat import (
-    compat_urlparse,
    compat_parse_qs,
+    compat_str,
+    compat_urlparse,
 )
 from ..utils import (
    clean_html,
@ -317,10 +318,23 @@ class KalturaIE(InfoExtractor):
            if f.get('isOriginal') is True and not self._is_valid_url(
                    video_url, entry_id, format_id):
                continue
-            # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
+            # audio-only has no or empty videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
            # -f mp4-56)
-            vcodec = 'none' if 'videoCodecId' not in f and f.get(
+            vcodec = 'none' if not f.get('videoCodecId') and f.get(
                'frameRate') == 0 else f.get('videoCodecId')
+
+            tags = f.get('tags')
+            language = None
+            language_preference = -1
+            format_note = None
+            if tags and isinstance(tags, compat_str):
+                language = self._search_regex(r'(?:^|,)lang([a-z]{2})(?:$|,)',
+                                              tags, 'language', default=None)
+                if 'defaultlang' in tags:
+                    language_preference = 10
+                format_note = self._search_regex(r'(?:^|,)label([^,]+)(?:$|,)',
+                                                 tags, 'label', default=None)
+
            formats.append({
                'format_id': format_id,
                'ext': f.get('fileExt'),
@ -331,6 +345,9 @@ class KalturaIE(InfoExtractor):
                'vcodec': vcodec,
                'height': int_or_none(f.get('height')),
                'width': int_or_none(f.get('width')),
+                'language': language,
+                'language_preference': language_preference,
+                'format_note': format_note,
                'url': video_url,
            })
        if '/playManifest/' in data_url: