1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 10:24:33 +01:00

[wasdtv] Add new extractor

This commit is contained in:
un.def 2020-09-14 16:04:04 +03:00
parent e8c5d40bc8
commit cdf98a34ea
2 changed files with 230 additions and 0 deletions

View File

@ -1383,6 +1383,11 @@ from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE from .vzaar import VzaarIE
from .wakanim import WakanimIE from .wakanim import WakanimIE
from .walla import WallaIE from .walla import WallaIE
from .wasdtv import (
WASDTVStreamIE,
WASDTVRecordIE,
WASDTVClipIE,
)
from .washingtonpost import ( from .washingtonpost import (
WashingtonPostIE, WashingtonPostIE,
WashingtonPostArticleIE, WashingtonPostArticleIE,

View File

@ -0,0 +1,225 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_kwargs,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
urljoin,
)
class WASDTVBaseIE(InfoExtractor):
_API_BASE = 'https://wasd.tv/api/'
_THUMBNAIL_SIZES = ('small', 'medium', 'large')
def _fetch(self, *path, **kwargs):
"""
Fetch the resource using WASD.TV API.
The positional arguments are the parts of the resource path relative
to the _API_BASE.
The following keyword arguments are required by this method:
* item_id -- item identifier (for logging purposes).
* description -- human-readable resource description (for logging
purposes).
Any additional keyword arguments are passed directly to
the _download_json method.
"""
description = kwargs.pop('description')
response = self._download_json(
urljoin(self._API_BASE, '/'.join(map(compat_str, path))),
kwargs.pop('item_id'),
note='Downloading {} metadata'.format(description),
errnote='Unable to download {} metadata'.format(description),
**compat_kwargs(kwargs))
if not isinstance(response, dict):
raise ExtractorError(
'JSON object expected, got: {!r}'.format(response))
error = response.get('error')
if error:
raise ExtractorError(
'{} returned error: {}'.format(self.IE_NAME, error['code']),
expected=True)
return response['result']
def _extract_thumbnails(self, thumbnails_dict):
if not thumbnails_dict:
return None
thumbnails = []
for index, thumbnail_size in enumerate(self._THUMBNAIL_SIZES):
thumbnail_url = thumbnails_dict.get(thumbnail_size)
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'preference': index,
})
return thumbnails
def _extract_og_title(self, url, item_id):
return self._og_search_title(self._download_webpage(url, item_id))
class WASDTVBaseVideoIE(WASDTVBaseIE):
def _get_container(self, url):
"""
Download and extract the media container dict for the given URL.
Return the container dict.
"""
raise NotImplementedError
def _get_media_url(self, media_meta):
"""
Extract the m3u8 URL from the media_meta dict.
Return a tuple (url: str, is_live: bool).
"""
raise NotImplementedError
def _real_extract(self, url):
container = self._get_container(url)
stream = container['media_container_streams'][0]
media = stream['stream_media'][0]
media_meta = media['media_meta']
media_url, is_live = self._get_media_url(media_meta)
video_id = media.get('media_id') or container.get('media_container_id')
return {
'id': compat_str(video_id),
'title': (
container.get('media_container_name')
or self._extract_og_title(url, video_id)),
'description': container.get('media_container_description'),
'thumbnails': self._extract_thumbnails(
media_meta.get('media_preview_images')),
'timestamp': parse_iso8601(container.get('created_at')),
'view_count': int_or_none(stream.get(
'stream_current_viewers' if is_live
else 'stream_total_viewers')),
'is_live': is_live,
'formats': self._extract_m3u8_formats(media_url, video_id, 'mp4'),
}
class WASDTVStreamIE(WASDTVBaseVideoIE):
IE_NAME = 'wasdtv:stream'
_VALID_URL = r'https?://wasd\.tv/(?P<id>[^/#?]+)$'
_TEST = {
'url': 'https://wasd.tv/24_7',
'info_dict': {
'id': '559738',
'ext': 'mp4',
'title': 'Live 24/7 Music',
'description': '24&#x2F;7 Music',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'is_live': True,
'view_count': int,
},
'params': {
'skip_download': True,
},
}
def _get_container(self, url):
nickname = self._match_id(url)
channel = self._fetch(
'channels', 'nicknames', nickname,
item_id=nickname,
description='channel')
channel_id = channel['channel_id']
containers = self._fetch(
'v2', 'media-containers',
query={
'channel_id': channel_id,
'media_container_type': 'SINGLE',
'media_container_status': 'RUNNING',
},
item_id=channel_id,
description='running media containers')
if not containers:
raise ExtractorError(
'{} is offline'.format(nickname), expected=True)
return containers[0]
def _get_media_url(self, media_meta):
return media_meta['media_url'], True
class WASDTVRecordIE(WASDTVBaseVideoIE):
IE_NAME = 'wasdtv:record'
_VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P<id>\d+)$'
_TEST = {
'url': 'https://wasd.tv/mightypoot/videos?record=551500',
'info_dict': {
'id': '551593',
'ext': 'mp4',
'title': 'Похвали Стримера Финал: Fall Guys, Darkest Dungeon',
'description': 'Здрасте.\nна этом всё',
'timestamp': 1598885270,
'upload_date': '20200831',
'is_live': False,
'view_count': int,
},
'params': {
'skip_download': True,
},
}
def _get_container(self, url):
container_id = self._match_id(url)
return self._fetch(
'v2', 'media-containers', container_id,
item_id=container_id,
description='media container')
def _get_media_url(self, media_meta):
media_archive_url = media_meta.get('media_archive_url')
if media_archive_url:
return media_archive_url, False
return media_meta['media_url'], True
class WASDTVClipIE(WASDTVBaseIE):
IE_NAME = 'wasdtv:clip'
_VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P<id>\d+)$'
_TEST = {
'url': 'https://wasd.tv/dawgos/clips?clip=5539',
'info_dict': {
'id': '5539',
'ext': 'mp4',
'title': 'это про вас',
'timestamp': 1598912283,
'upload_date': '20200831',
'view_count': None,
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._fetch(
'v2', 'clips', clip_id,
item_id=clip_id,
description='clip')
clip_data = clip['clip_data']
return {
'id': compat_str(clip_id),
'title': (
clip.get('clip_title')
or self._extract_og_title(url, clip_id)),
'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
'timestamp': parse_iso8601(clip.get('created_at')),
'view_count': int_or_none(clip.get('clip_views_count')),
'formats': self._extract_m3u8_formats(
clip_data['url'], clip_id, 'mp4'),
}