[opencast] Add support for opencast playlists

2020-10-19 15:59:10 +02:00 · 2020-10-19 15:59:10 +02:00 · 36a34bc4ba
parent baa16c6037
commit 36a34bc4ba
2 changed files with 132 additions and 54 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -798,7 +798,10 @@ from .ooyala import (
    OoyalaIE,
    OoyalaExternalIE,
 )
-from .opencast import OpencastIE
+from .opencast import (
+    OpencastIE,
+    OpencastPlaylistIE,
+)
 from .ora import OraTVIE
 from .orf import (
    ORFTVthekIE,
--- a/youtube_dl/extractor/opencast.py
+++ b/youtube_dl/extractor/opencast.py
@ -4,11 +4,15 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import parse_iso8601, parse_resolution, int_or_none
+from ..utils import (
+    parse_iso8601,
+    parse_resolution,
+    int_or_none,
+    ExtractorError,
+)


-class OpencastIE(InfoExtractor):
-    IE_NAME = 'Opencast'
+class OpencastBaseIE(InfoExtractor):
    _INSTANCES_RE = r'''(?:
                            opencast\.informatik\.kit\.edu|
                            electures\.uni-muenster\.de|
@ -31,52 +35,12 @@ class OpencastIE(InfoExtractor):
                            mcmedia\.missioncollege\.org|
                            clases\.odon\.edu\.uy
                        )'''
-
    _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'

-    _VALID_URL = r'''(?x)
-                    https?://(?P<host>%s)/paella/ui/watch.html\?.*?
-                    id=(?P<id>%s)
-                    ''' % (
-        _INSTANCES_RE,
-        _UUID_RE,
-    )
-
-    _API_BASE = 'https://%s/search/episode.json?id=%s'
-
-    _TEST = {
-        'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
-        'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
-        'info_dict': {
-            'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
-            'ext': 'mp4',
-            'title': '11 - Kryptographie - 24.11.2015',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'timestamp': 1606208400,
-            'upload_date': '20201124'
-        },
-    }
-
    def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
        return self._download_json(self._API_BASE % (host, video_id), video_id, note=note, errnote=errnote, fatal=fatal)

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        video_id = mobj.group('id')
-
-        api_json = self._call_api(host, video_id, '', note='Downloading video JSON')
-
-        video = api_json.get('search-results', {}).get('result', {}).get('mediapackage', {})
-
-        # webpage = self._download_webpage(url, video_id)
-
-        title = video.get('title', '')
-        series = video.get('seriestitle', '')
-        season_id = video.get('series', '')
-        creator = video.get('creators', {}).get('creator', '')
-        timestamp = parse_iso8601(video.get('start', ''))
-
+    def _parse_mediapackage(self, video):
        tracks = video.get('media', {}).get('track', [])
        formats = []

@ -111,15 +75,31 @@ class OpencastIE(InfoExtractor):

        self._sort_formats(formats)

-        result_obj = {
-            'id': video_id,
-            'title': title,
-            'creator': creator,
-            'series': series,
-            'season_id': season_id,
-            'formats': formats,
-            'timestamp': timestamp,
-        }
+        result_obj = {'formats': formats}
+
+        video_id = video.get('id')
+        if video_id is not None:
+            result_obj.update({'id': video_id})
+
+        title = video.get('title')
+        if title is not None:
+            result_obj.update({'title': title})
+
+        series = video.get('seriestitle')
+        if series is not None:
+            result_obj.update({'series': series})
+
+        season_id = video.get('series')
+        if season_id is not None:
+            result_obj.update({'season_id': season_id})
+
+        creator = video.get('creators', {}).get('creator')
+        if creator is not None:
+            result_obj.update({'creator': creator})
+
+        timestamp = parse_iso8601(video.get('start'))
+        if timestamp is not None:
+            result_obj.update({'timestamp': timestamp})

        attachments = video.get('attachments', {}).get('attachment', [])
        if len(attachments) > 0:
@ -127,3 +107,98 @@ class OpencastIE(InfoExtractor):
            result_obj.update({'thumbnail': thumbnail})

        return result_obj
+
+
+class OpencastIE(OpencastBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://(?P<host>%s)/paella/ui/watch.html\?.*?
+                    id=(?P<id>%s)
+                    ''' % (
+        OpencastBaseIE._INSTANCES_RE,
+        OpencastBaseIE._UUID_RE,
+    )
+
+    _API_BASE = 'https://%s/search/episode.json?id=%s'
+
+    _TEST = {
+        'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
+        'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
+        'info_dict': {
+            'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
+            'ext': 'mp4',
+            'title': '11 - Kryptographie - 24.11.2015',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1606208400,
+            'upload_date': '20201124',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        video_id = mobj.group('id')
+
+        api_json = self._call_api(host, video_id, '', note='Downloading video JSON')
+
+        search_results = api_json.get('search-results', {})
+        if 'result' not in search_results:
+            raise ExtractorError('Video was not found')
+
+        result_dict = search_results.get('result', {})
+        if not isinstance(result_dict, dict):
+            raise ExtractorError('More than one video was unexpectedly returned.')
+
+        video = result_dict.get('mediapackage', {})
+
+        result_obj = self._parse_mediapackage(video)
+        return result_obj
+
+
+class OpencastPlaylistIE(OpencastBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://(?P<host>%s)/engage/ui/index.html\?.*?
+                    epFrom=(?P<id>%s)
+                    ''' % (
+        OpencastBaseIE._INSTANCES_RE,
+        OpencastBaseIE._UUID_RE,
+    )
+
+    _API_BASE = 'https://%s/search/episode.json?sid=%s'
+
+    _TEST = {
+        'url': 'https://oc-video1.ruhr-uni-bochum.de/engage/ui/index.html?epFrom=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+        'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
+        'info_dict': {
+            'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+            'title': 'Kryptographie - WiSe 15/16',
+        },
+        'playlist_mincount': 28,
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        video_id = mobj.group('id')
+
+        api_json = self._call_api(host, video_id, '', note='Downloading video JSON')
+
+        search_results = api_json.get('search-results', {})
+        if 'result' not in search_results:
+            raise ExtractorError('Playlist was not found')
+
+        result_list = search_results.get('result', {})
+        if isinstance(result_list, dict):
+            result_list = [result_list]
+
+        entries = []
+        for episode in result_list:
+            video = episode.get('mediapackage', {})
+            entries.append(self._parse_mediapackage(video))
+
+        if len(entries) == 0:
+            raise ExtractorError('Playlist has no entries')
+
+        playlist_title = entries[0].get('series')
+
+        result_obj = self.playlist_result(entries, playlist_id=video_id, playlist_title=playlist_title)
+        return result_obj