Merge pull request #5 from ytdl-org/master

02.03.2020 Master merge
2024-11-24 01:14:32 +01:00 · 2020-03-02 21:44:25 +01:00 · 2020-03-02 21:44:25 +01:00 · 45b2cd360f
commit 45b2cd360f
parent 5770e2911c 1e1c1960aa
4 changed files with 107 additions and 37 deletions
--- a/youtube_dl/extractor/peertube.py
+++ b/youtube_dl/extractor/peertube.py
@ -8,6 +8,7 @@ from ..compat import compat_str
 from ..utils import (
    int_or_none,
    parse_resolution,
+    str_or_none,
    try_get,
    unified_timestamp,
    url_or_none,
@ -415,6 +416,7 @@ class PeerTubeIE(InfoExtractor):
                            peertube\.cpy\.re
                        )'''
    _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+    _API_BASE = 'https://%s/api/v1/videos/%s/%s'
    _VALID_URL = r'''(?x)
                    (?:
                        peertube:(?P<host>[^:]+):|
@ -423,26 +425,30 @@ class PeerTubeIE(InfoExtractor):
                    (?P<id>%s)
                    ''' % (_INSTANCES_RE, _UUID_RE)
    _TESTS = [{
-        'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
-        'md5': '80f24ff364cc9d333529506a263e7feb',
+        'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+        'md5': '9bed8c0137913e17b86334e5885aacff',
        'info_dict': {
-            'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+            'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
            'ext': 'mp4',
-            'title': 'wow',
-            'description': 'wow such video, so gif',
+            'title': 'What is PeerTube?',
+            'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
-            'timestamp': 1519297480,
-            'upload_date': '20180222',
-            'uploader': 'Luclu7',
-            'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
-            'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
-            'license': 'Unknown',
-            'duration': 3,
+            'timestamp': 1538391166,
+            'upload_date': '20181001',
+            'uploader': 'Framasoft',
+            'uploader_id': '3',
+            'uploader_url': 'https://framatube.org/accounts/framasoft',
+            'channel': 'Les vidéos de Framasoft',
+            'channel_id': '2',
+            'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+            'language': 'en',
+            'license': 'Attribution - Share Alike',
+            'duration': 113,
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
-            'tags': list,
-            'categories': list,
+            'tags': ['framasoft', 'peertube'],
+            'categories': ['Science & Technology'],
        }
    }, {
        'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
@ -484,13 +490,38 @@ class PeerTubeIE(InfoExtractor):
                entries = [peertube_url]
        return entries

+    def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+        return self._download_json(
+            self._API_BASE % (host, video_id, path), video_id,
+            note=note, errnote=errnote, fatal=fatal)
+
+    def _get_subtitles(self, host, video_id):
+        captions = self._call_api(
+            host, video_id, 'captions', note='Downloading captions JSON',
+            fatal=False)
+        if not isinstance(captions, dict):
+            return
+        data = captions.get('data')
+        if not isinstance(data, list):
+            return
+        subtitles = {}
+        for e in data:
+            language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+            caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+            if not caption_url:
+                continue
+            subtitles.setdefault(language_id or 'en', []).append({
+                'url': caption_url,
+            })
+        return subtitles
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host') or mobj.group('host_2')
        video_id = mobj.group('id')

-        video = self._download_json(
-            'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
+        video = self._call_api(
+            host, video_id, '', note='Downloading video JSON')

        title = video['name']

@ -513,10 +544,28 @@ class PeerTubeIE(InfoExtractor):
            formats.append(f)
        self._sort_formats(formats)

-        def account_data(field):
-            return try_get(video, lambda x: x['account'][field], compat_str)
+        full_description = self._call_api(
+            host, video_id, 'description', note='Downloading description JSON',
+            fatal=False)

-        category = try_get(video, lambda x: x['category']['label'], compat_str)
+        description = None
+        if isinstance(full_description, dict):
+            description = str_or_none(full_description.get('description'))
+        if not description:
+            description = video.get('description')
+
+        subtitles = self.extract_subtitles(host, video_id)
+
+        def data(section, field, type_):
+            return try_get(video, lambda x: x[section][field], type_)
+
+        def account_data(field, type_):
+            return data('account', field, type_)
+
+        def channel_data(field, type_):
+            return data('channel', field, type_)
+
+        category = data('category', 'label', compat_str)
        categories = [category] if category else None

        nsfw = video.get('nsfw')
@ -528,14 +577,17 @@ class PeerTubeIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
-            'description': video.get('description'),
+            'description': description,
            'thumbnail': urljoin(url, video.get('thumbnailPath')),
            'timestamp': unified_timestamp(video.get('publishedAt')),
-            'uploader': account_data('displayName'),
-            'uploader_id': account_data('uuid'),
-            'uploder_url': account_data('url'),
-            'license': try_get(
-                video, lambda x: x['licence']['label'], compat_str),
+            'uploader': account_data('displayName', compat_str),
+            'uploader_id': str_or_none(account_data('id', int)),
+            'uploader_url': url_or_none(account_data('url', compat_str)),
+            'channel': channel_data('displayName', compat_str),
+            'channel_id': str_or_none(channel_data('id', int)),
+            'channel_url': url_or_none(channel_data('url', compat_str)),
+            'language': data('language', 'id', compat_str),
+            'license': data('licence', 'label', compat_str),
            'duration': int_or_none(video.get('duration')),
            'view_count': int_or_none(video.get('views')),
            'like_count': int_or_none(video.get('likes')),
@ -544,4 +596,5 @@ class PeerTubeIE(InfoExtractor):
            'tags': try_get(video, lambda x: x['tags'], list),
            'categories': categories,
            'formats': formats,
+            'subtitles': subtitles
        }
--- a/youtube_dl/extractor/servus.py
+++ b/youtube_dl/extractor/servus.py
@ -7,9 +7,18 @@ from .common import InfoExtractor


 class ServusIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
+                            servustv\.com/videos
+                        )
+                        /(?P<id>[aA]{2}-\w+|\d+-\d+)
+                    '''
    _TESTS = [{
-        'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+        # new URL schema
+        'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
        'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
        'info_dict': {
            'id': 'AA-1T6VBU5PW1W12',
@ -18,6 +27,10 @@ class ServusIE(InfoExtractor):
            'description': 'md5:1247204d85783afe3682644398ff2ec4',
            'thumbnail': r're:^https?://.*\.jpg',
        }
+    }, {
+        # old URL schema
+        'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+        'only_matching': True,
    }, {
        'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
        'only_matching': True,
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -33,6 +33,7 @@ from ..utils import (
    unified_timestamp,
    unsmuggle_url,
    urlencode_postdata,
+    urljoin,
    unescapeHTML,
 )

@ -191,7 +192,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
            for tt in text_tracks:
                subtitles[tt['lang']] = [{
                    'ext': 'vtt',
-                    'url': 'https://vimeo.com' + tt['url'],
+                    'url': urljoin('https://vimeo.com', tt['url']),
                }]

        thumbnails = []
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -2495,20 +2495,23 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
    _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
-        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
        'info_dict': {
-            'title': 'ytdl test PL',
-            'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+            'uploader': 'Sergey M.',
+            'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+            'title': 'youtube-dl public playlist',
        },
-        'playlist_count': 3,
+        'playlist_count': 1,
    }, {
-        'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
        'info_dict': {
-            'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
-            'title': 'YDL_Empty_List',
+            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+            'uploader': 'Sergey M.',
+            'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+            'title': 'youtube-dl empty playlist',
        },
        'playlist_count': 0,
-        'skip': 'This playlist is private',
    }, {
        'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
        'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
@ -2518,7 +2521,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
            'uploader': 'Christiaan008',
            'uploader_id': 'ChRiStIaAn008',
        },
-        'playlist_count': 95,
+        'playlist_count': 96,
    }, {
        'note': 'issue #673',
        'url': 'PLBB231211A4F62143',