[wasdtv] Add new extractor

2024-11-26 10:24:33 +01:00 · 2020-09-14 16:04:04 +03:00 · 2020-09-14 16:04:04 +03:00 · cdf98a34ea
commit cdf98a34ea
parent e8c5d40bc8
2 changed files with 230 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1383,6 +1383,11 @@ from .vyborymos import VyboryMosIE
 from .vzaar import VzaarIE
 from .wakanim import WakanimIE
 from .walla import WallaIE
 from .wasdtv import (
    WASDTVStreamIE,
    WASDTVRecordIE,
    WASDTVClipIE,
 )
 from .washingtonpost import (
    WashingtonPostIE,
    WashingtonPostArticleIE,
--- a/youtube_dl/extractor/wasdtv.py
+++ b/youtube_dl/extractor/wasdtv.py
@ -0,0 +1,225 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import (
    compat_kwargs,
    compat_str,
 )
 from ..utils import (
    ExtractorError,
    int_or_none,
    parse_iso8601,
    urljoin,
 )
 class WASDTVBaseIE(InfoExtractor):
    _API_BASE = 'https://wasd.tv/api/'
    _THUMBNAIL_SIZES = ('small', 'medium', 'large')
    def _fetch(self, *path, **kwargs):
        """
        Fetch the resource using WASD.TV API.
        The positional arguments are the parts of the resource path relative
        to the _API_BASE.
        The following keyword arguments are required by this method:
            * item_id -- item identifier (for logging purposes).
            * description -- human-readable resource description (for logging
            purposes).
        Any additional keyword arguments are passed directly to
        the _download_json method.
        """
        description = kwargs.pop('description')
        response = self._download_json(
            urljoin(self._API_BASE, '/'.join(map(compat_str, path))),
            kwargs.pop('item_id'),
            note='Downloading {} metadata'.format(description),
            errnote='Unable to download {} metadata'.format(description),
            **compat_kwargs(kwargs))
        if not isinstance(response, dict):
            raise ExtractorError(
                'JSON object expected, got: {!r}'.format(response))
        error = response.get('error')
        if error:
            raise ExtractorError(
                '{} returned error: {}'.format(self.IE_NAME, error['code']),
                expected=True)
        return response['result']
    def _extract_thumbnails(self, thumbnails_dict):
        if not thumbnails_dict:
            return None
        thumbnails = []
        for index, thumbnail_size in enumerate(self._THUMBNAIL_SIZES):
            thumbnail_url = thumbnails_dict.get(thumbnail_size)
            if not thumbnail_url:
                continue
            thumbnails.append({
                'url': thumbnail_url,
                'preference': index,
            })
        return thumbnails
    def _extract_og_title(self, url, item_id):
        return self._og_search_title(self._download_webpage(url, item_id))
 class WASDTVBaseVideoIE(WASDTVBaseIE):
    def _get_container(self, url):
        """
        Download and extract the media container dict for the given URL.
        Return the container dict.
        """
        raise NotImplementedError
    def _get_media_url(self, media_meta):
        """
        Extract the m3u8 URL from the media_meta dict.
        Return a tuple (url: str, is_live: bool).
        """
        raise NotImplementedError
    def _real_extract(self, url):
        container = self._get_container(url)
        stream = container['media_container_streams'][0]
        media = stream['stream_media'][0]
        media_meta = media['media_meta']
        media_url, is_live = self._get_media_url(media_meta)
        video_id = media.get('media_id') or container.get('media_container_id')
        return {
            'id': compat_str(video_id),
            'title': (
                container.get('media_container_name')
                or self._extract_og_title(url, video_id)),
            'description': container.get('media_container_description'),
            'thumbnails': self._extract_thumbnails(
                media_meta.get('media_preview_images')),
            'timestamp': parse_iso8601(container.get('created_at')),
            'view_count': int_or_none(stream.get(
                'stream_current_viewers' if is_live
                else 'stream_total_viewers')),
            'is_live': is_live,
            'formats': self._extract_m3u8_formats(media_url, video_id, 'mp4'),
        }
 class WASDTVStreamIE(WASDTVBaseVideoIE):
    IE_NAME = 'wasdtv:stream'
    _VALID_URL = r'https?://wasd\.tv/(?P<id>[^/#?]+)$'
    _TEST = {
        'url': 'https://wasd.tv/24_7',
        'info_dict': {
            'id': '559738',
            'ext': 'mp4',
            'title': 'Live 24/7 Music',
            'description': '24&#x2F;7 Music',
            'timestamp': int,
            'upload_date': r're:^\d{8}$',
            'is_live': True,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }
    def _get_container(self, url):
        nickname = self._match_id(url)
        channel = self._fetch(
            'channels', 'nicknames', nickname,
            item_id=nickname,
            description='channel')
        channel_id = channel['channel_id']
        containers = self._fetch(
            'v2', 'media-containers',
            query={
                'channel_id': channel_id,
                'media_container_type': 'SINGLE',
                'media_container_status': 'RUNNING',
            },
            item_id=channel_id,
            description='running media containers')
        if not containers:
            raise ExtractorError(
                '{} is offline'.format(nickname), expected=True)
        return containers[0]
    def _get_media_url(self, media_meta):
        return media_meta['media_url'], True
 class WASDTVRecordIE(WASDTVBaseVideoIE):
    IE_NAME = 'wasdtv:record'
    _VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P<id>\d+)$'
    _TEST = {
        'url': 'https://wasd.tv/mightypoot/videos?record=551500',
        'info_dict': {
            'id': '551593',
            'ext': 'mp4',
            'title': 'Похвали Стримера Финал: Fall Guys, Darkest Dungeon',
            'description': 'Здрасте.\nна этом всё',
            'timestamp': 1598885270,
            'upload_date': '20200831',
            'is_live': False,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }
    def _get_container(self, url):
        container_id = self._match_id(url)
        return self._fetch(
            'v2', 'media-containers', container_id,
            item_id=container_id,
            description='media container')
    def _get_media_url(self, media_meta):
        media_archive_url = media_meta.get('media_archive_url')
        if media_archive_url:
            return media_archive_url, False
        return media_meta['media_url'], True
 class WASDTVClipIE(WASDTVBaseIE):
    IE_NAME = 'wasdtv:clip'
    _VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P<id>\d+)$'
    _TEST = {
        'url': 'https://wasd.tv/dawgos/clips?clip=5539',
        'info_dict': {
            'id': '5539',
            'ext': 'mp4',
            'title': 'это про вас',
            'timestamp': 1598912283,
            'upload_date': '20200831',
            'view_count': None,
        },
        'params': {
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        clip_id = self._match_id(url)
        clip = self._fetch(
            'v2', 'clips', clip_id,
            item_id=clip_id,
            description='clip')
        clip_data = clip['clip_data']
        return {
            'id': compat_str(clip_id),
            'title': (
                clip.get('clip_title')
                or self._extract_og_title(url, clip_id)),
            'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
            'timestamp': parse_iso8601(clip.get('created_at')),
            'view_count': int_or_none(clip.get('clip_views_count')),
            'formats': self._extract_m3u8_formats(
                clip_data['url'], clip_id, 'mp4'),
        }