[wasdtv] Add new extractor

2024-11-26 02:14:32 +01:00 · 2020-09-14 16:04:04 +03:00 · 2020-09-14 16:04:04 +03:00 · cdf98a34ea
commit cdf98a34ea
parent e8c5d40bc8
2 changed files with 230 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1383,6 +1383,11 @@ from .vyborymos import VyboryMosIE
 from .vzaar import VzaarIE
 from .wakanim import WakanimIE
 from .walla import WallaIE
+from .wasdtv import (
+    WASDTVStreamIE,
+    WASDTVRecordIE,
+    WASDTVClipIE,
+)
 from .washingtonpost import (
    WashingtonPostIE,
    WashingtonPostArticleIE,
--- a/youtube_dl/extractor/wasdtv.py
+++ b/youtube_dl/extractor/wasdtv.py
@ -0,0 +1,225 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_kwargs,
+    compat_str,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_iso8601,
+    urljoin,
+)
+
+
+class WASDTVBaseIE(InfoExtractor):
+    _API_BASE = 'https://wasd.tv/api/'
+    _THUMBNAIL_SIZES = ('small', 'medium', 'large')
+
+    def _fetch(self, *path, **kwargs):
+        """
+        Fetch the resource using WASD.TV API.
+
+        The positional arguments are the parts of the resource path relative
+        to the _API_BASE.
+
+        The following keyword arguments are required by this method:
+            * item_id -- item identifier (for logging purposes).
+            * description -- human-readable resource description (for logging
+            purposes).
+
+        Any additional keyword arguments are passed directly to
+        the _download_json method.
+        """
+        description = kwargs.pop('description')
+        response = self._download_json(
+            urljoin(self._API_BASE, '/'.join(map(compat_str, path))),
+            kwargs.pop('item_id'),
+            note='Downloading {} metadata'.format(description),
+            errnote='Unable to download {} metadata'.format(description),
+            **compat_kwargs(kwargs))
+        if not isinstance(response, dict):
+            raise ExtractorError(
+                'JSON object expected, got: {!r}'.format(response))
+        error = response.get('error')
+        if error:
+            raise ExtractorError(
+                '{} returned error: {}'.format(self.IE_NAME, error['code']),
+                expected=True)
+        return response['result']
+
+    def _extract_thumbnails(self, thumbnails_dict):
+        if not thumbnails_dict:
+            return None
+        thumbnails = []
+        for index, thumbnail_size in enumerate(self._THUMBNAIL_SIZES):
+            thumbnail_url = thumbnails_dict.get(thumbnail_size)
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'preference': index,
+            })
+        return thumbnails
+
+    def _extract_og_title(self, url, item_id):
+        return self._og_search_title(self._download_webpage(url, item_id))
+
+
+class WASDTVBaseVideoIE(WASDTVBaseIE):
+
+    def _get_container(self, url):
+        """
+        Download and extract the media container dict for the given URL.
+        Return the container dict.
+        """
+        raise NotImplementedError
+
+    def _get_media_url(self, media_meta):
+        """
+        Extract the m3u8 URL from the media_meta dict.
+        Return a tuple (url: str, is_live: bool).
+        """
+        raise NotImplementedError
+
+    def _real_extract(self, url):
+        container = self._get_container(url)
+        stream = container['media_container_streams'][0]
+        media = stream['stream_media'][0]
+        media_meta = media['media_meta']
+        media_url, is_live = self._get_media_url(media_meta)
+        video_id = media.get('media_id') or container.get('media_container_id')
+        return {
+            'id': compat_str(video_id),
+            'title': (
+                container.get('media_container_name')
+                or self._extract_og_title(url, video_id)),
+            'description': container.get('media_container_description'),
+            'thumbnails': self._extract_thumbnails(
+                media_meta.get('media_preview_images')),
+            'timestamp': parse_iso8601(container.get('created_at')),
+            'view_count': int_or_none(stream.get(
+                'stream_current_viewers' if is_live
+                else 'stream_total_viewers')),
+            'is_live': is_live,
+            'formats': self._extract_m3u8_formats(media_url, video_id, 'mp4'),
+        }
+
+
+class WASDTVStreamIE(WASDTVBaseVideoIE):
+    IE_NAME = 'wasdtv:stream'
+    _VALID_URL = r'https?://wasd\.tv/(?P<id>[^/#?]+)$'
+    _TEST = {
+        'url': 'https://wasd.tv/24_7',
+        'info_dict': {
+            'id': '559738',
+            'ext': 'mp4',
+            'title': 'Live 24/7 Music',
+            'description': '24&#x2F;7 Music',
+            'timestamp': int,
+            'upload_date': r're:^\d{8}$',
+            'is_live': True,
+            'view_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _get_container(self, url):
+        nickname = self._match_id(url)
+        channel = self._fetch(
+            'channels', 'nicknames', nickname,
+            item_id=nickname,
+            description='channel')
+        channel_id = channel['channel_id']
+        containers = self._fetch(
+            'v2', 'media-containers',
+            query={
+                'channel_id': channel_id,
+                'media_container_type': 'SINGLE',
+                'media_container_status': 'RUNNING',
+            },
+            item_id=channel_id,
+            description='running media containers')
+        if not containers:
+            raise ExtractorError(
+                '{} is offline'.format(nickname), expected=True)
+        return containers[0]
+
+    def _get_media_url(self, media_meta):
+        return media_meta['media_url'], True
+
+
+class WASDTVRecordIE(WASDTVBaseVideoIE):
+    IE_NAME = 'wasdtv:record'
+    _VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P<id>\d+)$'
+    _TEST = {
+        'url': 'https://wasd.tv/mightypoot/videos?record=551500',
+        'info_dict': {
+            'id': '551593',
+            'ext': 'mp4',
+            'title': 'Похвали Стримера Финал: Fall Guys, Darkest Dungeon',
+            'description': 'Здрасте.\nна этом всё',
+            'timestamp': 1598885270,
+            'upload_date': '20200831',
+            'is_live': False,
+            'view_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _get_container(self, url):
+        container_id = self._match_id(url)
+        return self._fetch(
+            'v2', 'media-containers', container_id,
+            item_id=container_id,
+            description='media container')
+
+    def _get_media_url(self, media_meta):
+        media_archive_url = media_meta.get('media_archive_url')
+        if media_archive_url:
+            return media_archive_url, False
+        return media_meta['media_url'], True
+
+
+class WASDTVClipIE(WASDTVBaseIE):
+    IE_NAME = 'wasdtv:clip'
+    _VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P<id>\d+)$'
+    _TEST = {
+        'url': 'https://wasd.tv/dawgos/clips?clip=5539',
+        'info_dict': {
+            'id': '5539',
+            'ext': 'mp4',
+            'title': 'это про вас',
+            'timestamp': 1598912283,
+            'upload_date': '20200831',
+            'view_count': None,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        clip_id = self._match_id(url)
+        clip = self._fetch(
+            'v2', 'clips', clip_id,
+            item_id=clip_id,
+            description='clip')
+        clip_data = clip['clip_data']
+        return {
+            'id': compat_str(clip_id),
+            'title': (
+                clip.get('clip_title')
+                or self._extract_og_title(url, clip_id)),
+            'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
+            'timestamp': parse_iso8601(clip.get('created_at')),
+            'view_count': int_or_none(clip.get('clip_views_count')),
+            'formats': self._extract_m3u8_formats(
+                clip_data['url'], clip_id, 'mp4'),
+        }