1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-23 00:54:31 +01:00

Fail if mandatory fields absent. Add test.

This commit is contained in:
FA 2019-05-07 17:17:43 -07:00
parent e7e8e4b445
commit c06ca8dcc9

View File

@ -3,16 +3,18 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
urljoin, urljoin,
int_or_none, int_or_none,
url_or_none, url_or_none,
try_get, try_get,
js_to_json,
) )
class EarthCamIE(InfoExtractor): class EarthCamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?earthcam\.com/.*\?.*cam=(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?earthcam\.com/.*\?.*cam=(?P<id>\w+)'
_TEST = { _TESTS = [{
'url': 'https://www.earthcam.com/usa/newyork/timessquare/?cam=tsrobo1', 'url': 'https://www.earthcam.com/usa/newyork/timessquare/?cam=tsrobo1',
'info_dict': { 'info_dict': {
'id': 'tsrobo1', 'id': 'tsrobo1',
@ -23,28 +25,42 @@ class EarthCamIE(InfoExtractor):
'is_live': True, 'is_live': True,
'thumbnail': r're:^https?://.*\.(jpg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|png)$',
}, },
} }, {
'url': 'https://www.earthcam.com/usa/louisiana/neworleans/bourbonstreet/?cam=catsmeowkaraoke',
'info_dict': {
'id': 'catsmeowkaraoke',
'ext': 'mp4',
'title': 'New Orleans, LA',
'description': 'Get a front row seat to all the wild and crazy stage performances happening at the Cat\'s Meow Karaoke Bar! Over the years, thousands of guests have enjoyed their moment singing in the spotlight at this popular local spot!',
'view_count': int,
'is_live': True,
'thumbnail': r're:^https?://.*\.(jpg|png)$',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
json_str = self._html_search_regex(r'var\s+json_base\s*=\s*(?P<jstr>{\s*"cam"\s*:\s*{.*}.*});', webpage, 'json', group='jstr') json_str = self._html_search_regex(r'var\s+json_base\s*=\s*(?P<json_str>{\s*"cam"\s*:\s*{.*}.*});', webpage, 'json', group='json_str', default='{}')
json_base = self._parse_json(json_str, video_id) json_base = self._parse_json(js_to_json(json_str), video_id)
video_info = try_get(json_base, lambda x: x['cam'][video_id], dict) or {}
title = video_info.get("long_title") video_info = jsonn_base['cam'][video_id]
description = video_info.get("description") domain = video_info['html5_streamingdomain']
thumbnail = video_info.get("thumbimage") path = video_info['html5_streampath']
view_count = int_or_none(video_info.get("streamviews"))
domain = video_info.get("html5_streamingdomain")
path = video_info.get("html5_streampath")
m3u8_url = urljoin(domain, path) m3u8_url = urljoin(domain, path)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native')
title = video_info.get('long_title') or self._og_search_title(webpage)
description = video_info.get('description') or self._og_search_description(webpage)
thumbnail = url_or_none(video_info.get('thumbimage')) or self._og_search_thumbnail(webpage)
view_count = int_or_none(video_info.get("streamviews"))
return { return {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native'), 'formats': formats,
'title': title or self._og_search_title(webpage), 'title': title,
'description': description or self._og_search_description(webpage), 'description': description,
'view_count': view_count, 'view_count': view_count,
'is_live': True, 'is_live': True,
'thumbnail': url_or_none(thumbnail), 'thumbnail': thumbnail,
} }