diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09ee08aad..7ca2cfd19 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,7 +958,11 @@ from .sport5 import Sport5IE from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE -from .spreaker import SpreakerIE +from .spreaker import ( + SpreakerIE, + SpreakerAPIEpisodeIE, + SpreakerPlaylistIE +) from .sprout import SproutIE from .srgssr import ( SRGSSRIE, diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 3982267a8..d2fb6c304 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -8,39 +8,49 @@ from ..compat import compat_str from ..utils import int_or_none, ExtractorError -class SpreakerIE(InfoExtractor): +class SpreakerPlaylistIE(InfoExtractor): IE_NAME = 'spreaker' - _VALID_URL = r"""(?x)^ - https?:// - (?:www.|api.)? - spreaker.com/ - (?: - show/[a-z0-9_-]+| - user/[a-z0-9_-]+/[a-z0-9_-]| - episode/(?P[0-9]+) - ) - """ - _TESTS = [ - { + _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' + _TEST = { 'url': 'https://www.spreaker.com/show/success-with-music', 'info_dict': { 'title': 'Success With Music', 'id': 2317431, }, 'playlist_mincount': 14, - }, - { - 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' - 'market-your-music-part-2'), - 'info_dict': { - 'id': '12534508', - 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', - 'upload_date': '20170809', - 'uploader': 'SWM', - 'uploader_id': 9780658, - }, - }, + } + + def _real_extract(self, url): + html = self._download_webpage(url, None) + playlist_url = self._html_search_regex( + r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') + items = self._download_json(playlist_url, None) + items = items['response']['playlist']['items'] + + if not items: + raise ExtractorError('Empty playlist') + + urls = [x['api_url'] for x in items] + ret = [] + for index, url in enumerate(urls): + data = self._download_json(url, None)['response']['episode'] + dict_ = SpreakerIE._spreaker_episode_data_to_info(data) + dict_.update({ + 'playlist_id': compat_str(data['show_id']), + 'playlist_title': data['show']['title'], + 'playlist_index': index, + }) + ret.append(dict_) + + return self.playlist_result(ret, + data['show_id'], + data['show']['title']) + + +class SpreakerAPIEpisodeIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' + _TESTS = [ { 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { @@ -51,23 +61,59 @@ class SpreakerIE(InfoExtractor): 'uploader': 'SWM', 'uploader_id': 9780658, }, - } + }, + { + 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + }, ] - def _spreaker_episode_data_to_info(self, data): - upload_date = data['published_at'][0:10].replace('-', '') - author = data.get('author') - if not author: - author = {} - stats = data.get('stats') - view_count = like_count = comment_count = 0 - show = data.get('show') - if not show: - show = {} - else: - show_image = show.get('image') - if not show_image: - show_image = {} + def _real_extract(self, url): + episode_id = self._match_id(url) + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Invalid ID') + + url = 'https://api.spreaker.com/episode/%s' % (episode_id,) + data = self._download_json(url, episode_id)['response']['episode'] + if not data['download_enabled']: + raise ExtractorError('Not supported yet') + + return SpreakerIE._spreaker_episode_data_to_info(data) + + +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]' + _TEST = { + 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } + + @staticmethod + def _spreaker_episode_data_to_info(data): + published_at = data.get('published_at') + upload_date = None + if published_at: + upload_date = published_at[0:10].replace('-', '') + author = data.get('author', {}) + stats = data.get('stats', {}) + view_count = like_count = comment_count = None + show = data.get('show', {}) + show_image = show.get('image', {}) if stats: view_count = (stats.get('plays', 0) + @@ -117,45 +163,11 @@ class SpreakerIE(InfoExtractor): } def _real_extract(self, url): - episode_id = self._match_id(url) + html = self._download_webpage(url, None) + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - if re.match(r'^[0-9]+$', episode_id): - data_url = url - elif '/show/' in url: - html = self._download_webpage(url, None) - playlist_url = self._html_search_regex( - r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') - items = self._download_json(playlist_url, None) - items = items['response']['playlist']['items'] - - if not items: - raise ExtractorError('Empty playlist') - - urls = [x['api_url'] for x in items] - ret = [] - for index, url in enumerate(urls): - data = self._download_json(url, None)['response']['episode'] - dict_ = self._spreaker_episode_data_to_info(data) - dict_.update({ - 'playlist_id': compat_str(data['show_id']), - 'playlist_title': data['show']['title'], - 'playlist_index': index, - }) - ret.append(dict_) - - return self.playlist_result(ret, - data['show_id'], - data['show']['title']) - else: - html = self._download_webpage(url, None) - episode_id = self._html_search_regex( - r'data-episode_id="(?P[0-9]+)"', html, 'id') - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Could not find episode ID') - data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - - data = self._download_json(data_url, episode_id)['response']['episode'] - if not data['download_enabled']: - raise ExtractorError('Not supported yet') - - return self._spreaker_episode_data_to_info(data) + return self.url_result(data_url)