diff --git a/youtube_dl/extractor/earthcam.py b/youtube_dl/extractor/earthcam.py index 0c82c7ee0..2544e8c59 100644 --- a/youtube_dl/extractor/earthcam.py +++ b/youtube_dl/extractor/earthcam.py @@ -3,16 +3,18 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, urljoin, int_or_none, url_or_none, try_get, + js_to_json, ) class EarthCamIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?earthcam\.com/.*\?.*cam=(?P\w+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.earthcam.com/usa/newyork/timessquare/?cam=tsrobo1', 'info_dict': { 'id': 'tsrobo1', @@ -22,29 +24,43 @@ class EarthCamIE(InfoExtractor): 'view_count': int, 'is_live': True, 'thumbnail': r're:^https?://.*\.(jpg|png)$', - }, - } + }, + }, { + 'url': 'https://www.earthcam.com/usa/louisiana/neworleans/bourbonstreet/?cam=catsmeowkaraoke', + 'info_dict': { + 'id': 'catsmeowkaraoke', + 'ext': 'mp4', + 'title': 'New Orleans, LA', + 'description': 'Get a front row seat to all the wild and crazy stage performances happening at the Cat\'s Meow Karaoke Bar! Over the years, thousands of guests have enjoyed their moment singing in the spotlight at this popular local spot!', + 'view_count': int, + 'is_live': True, + 'thumbnail': r're:^https?://.*\.(jpg|png)$', + } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - json_str = self._html_search_regex(r'var\s+json_base\s*=\s*(?P{\s*"cam"\s*:\s*{.*}.*});', webpage, 'json', group='jstr') - json_base = self._parse_json(json_str, video_id) - video_info = try_get(json_base, lambda x: x['cam'][video_id], dict) or {} - title = video_info.get("long_title") - description = video_info.get("description") - thumbnail = video_info.get("thumbimage") - view_count = int_or_none(video_info.get("streamviews")) - domain = video_info.get("html5_streamingdomain") - path = video_info.get("html5_streampath") + json_str = self._html_search_regex(r'var\s+json_base\s*=\s*(?P{\s*"cam"\s*:\s*{.*}.*});', webpage, 'json', group='json_str', default='{}') + json_base = self._parse_json(js_to_json(json_str), video_id) + + video_info = jsonn_base['cam'][video_id] + domain = video_info['html5_streamingdomain'] + path = video_info['html5_streampath'] m3u8_url = urljoin(domain, path) + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native') + title = video_info.get('long_title') or self._og_search_title(webpage) + description = video_info.get('description') or self._og_search_description(webpage) + thumbnail = url_or_none(video_info.get('thumbimage')) or self._og_search_thumbnail(webpage) + view_count = int_or_none(video_info.get("streamviews")) + return { 'id': video_id, - 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native'), - 'title': title or self._og_search_title(webpage), - 'description': description or self._og_search_description(webpage), + 'formats': formats, + 'title': title, + 'description': description, 'view_count': view_count, 'is_live': True, - 'thumbnail': url_or_none(thumbnail), + 'thumbnail': thumbnail, }