1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2025-01-07 13:47:54 +01:00

[spreaker] Fixes requested

Escape . in regexes
Make separate extractors for episode page, playlist (show), API
Support API's direct links to MP3 files
Make counts set to None in case they are not found
Handle when published_at is not present
Other fixes
This commit is contained in:
Andrew Udvare 2017-08-10 16:38:47 -04:00
parent 91e64c6941
commit 2eb228df1c
2 changed files with 99 additions and 83 deletions

View File

@ -958,7 +958,11 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE from .sportbox import SportBoxEmbedIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .sportschau import SportschauIE from .sportschau import SportschauIE
from .spreaker import SpreakerIE from .spreaker import (
SpreakerIE,
SpreakerAPIEpisodeIE,
SpreakerPlaylistIE
)
from .sprout import SproutIE from .sprout import SproutIE
from .srgssr import ( from .srgssr import (
SRGSSRIE, SRGSSRIE,

View File

@ -8,39 +8,49 @@ from ..compat import compat_str
from ..utils import int_or_none, ExtractorError from ..utils import int_or_none, ExtractorError
class SpreakerIE(InfoExtractor): class SpreakerPlaylistIE(InfoExtractor):
IE_NAME = 'spreaker' IE_NAME = 'spreaker'
_VALID_URL = r"""(?x)^ _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+'
https?:// _TEST = {
(?:www.|api.)?
spreaker.com/
(?:
show/[a-z0-9_-]+|
user/[a-z0-9_-]+/[a-z0-9_-]|
episode/(?P<id>[0-9]+)
)
"""
_TESTS = [
{
'url': 'https://www.spreaker.com/show/success-with-music', 'url': 'https://www.spreaker.com/show/success-with-music',
'info_dict': { 'info_dict': {
'title': 'Success With Music', 'title': 'Success With Music',
'id': 2317431, 'id': 2317431,
}, },
'playlist_mincount': 14, 'playlist_mincount': 14,
}, }
{
'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' def _real_extract(self, url):
'market-your-music-part-2'), html = self._download_webpage(url, None)
'info_dict': { playlist_url = self._html_search_regex(
'id': '12534508', r'data-playlist_url="(?P<url>https\://[^"]+")', html, 'url')
'ext': 'mp3', items = self._download_json(playlist_url, None)
'title': 'Marketing Your Music - Part 2', items = items['response']['playlist']['items']
'upload_date': '20170809',
'uploader': 'SWM', if not items:
'uploader_id': 9780658, raise ExtractorError('Empty playlist')
},
}, urls = [x['api_url'] for x in items]
ret = []
for index, url in enumerate(urls):
data = self._download_json(url, None)['response']['episode']
dict_ = SpreakerIE._spreaker_episode_data_to_info(data)
dict_.update({
'playlist_id': compat_str(data['show_id']),
'playlist_title': data['show']['title'],
'playlist_index': index,
})
ret.append(dict_)
return self.playlist_result(ret,
data['show_id'],
data['show']['title'])
class SpreakerAPIEpisodeIE(InfoExtractor):
IE_NAME = 'spreaker'
_VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P<id>[0-9]+)(?:/[^\.]+\.mp3$)?'
_TESTS = [
{ {
'url': 'https://api.spreaker.com/episode/12534508', 'url': 'https://api.spreaker.com/episode/12534508',
'info_dict': { 'info_dict': {
@ -51,23 +61,59 @@ class SpreakerIE(InfoExtractor):
'uploader': 'SWM', 'uploader': 'SWM',
'uploader_id': 9780658, 'uploader_id': 9780658,
}, },
} },
{
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
'info_dict': {
'id': '12534508',
'ext': 'mp3',
'title': 'Marketing Your Music - Part 2',
'upload_date': '20170809',
'uploader': 'SWM',
'uploader_id': 9780658,
},
},
] ]
def _spreaker_episode_data_to_info(self, data): def _real_extract(self, url):
upload_date = data['published_at'][0:10].replace('-', '') episode_id = self._match_id(url)
author = data.get('author') if not re.match(r'^[0-9]+$', episode_id):
if not author: raise ExtractorError('Invalid ID')
author = {}
stats = data.get('stats') url = 'https://api.spreaker.com/episode/%s' % (episode_id,)
view_count = like_count = comment_count = 0 data = self._download_json(url, episode_id)['response']['episode']
show = data.get('show') if not data['download_enabled']:
if not show: raise ExtractorError('Not supported yet')
show = {}
else: return SpreakerIE._spreaker_episode_data_to_info(data)
show_image = show.get('image')
if not show_image:
show_image = {} class SpreakerIE(InfoExtractor):
IE_NAME = 'spreaker'
_VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]'
_TEST = {
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
'info_dict': {
'id': '12534508',
'ext': 'mp3',
'title': 'Marketing Your Music - Part 2',
'upload_date': '20170809',
'uploader': 'SWM',
'uploader_id': 9780658,
},
}
@staticmethod
def _spreaker_episode_data_to_info(data):
published_at = data.get('published_at')
upload_date = None
if published_at:
upload_date = published_at[0:10].replace('-', '')
author = data.get('author', {})
stats = data.get('stats', {})
view_count = like_count = comment_count = None
show = data.get('show', {})
show_image = show.get('image', {})
if stats: if stats:
view_count = (stats.get('plays', 0) + view_count = (stats.get('plays', 0) +
@ -117,45 +163,11 @@ class SpreakerIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) html = self._download_webpage(url, None)
episode_id = self._html_search_regex(
r'data-episode_id="(?P<id>[0-9]+)"', html, 'id')
if not re.match(r'^[0-9]+$', episode_id):
raise ExtractorError('Could not find episode ID')
data_url = 'https://api.spreaker.com/episode/%s' % (episode_id)
if re.match(r'^[0-9]+$', episode_id): return self.url_result(data_url)
data_url = url
elif '/show/' in url:
html = self._download_webpage(url, None)
playlist_url = self._html_search_regex(
r'data-playlist_url="(?P<url>https\://[^"]+")', html, 'url')
items = self._download_json(playlist_url, None)
items = items['response']['playlist']['items']
if not items:
raise ExtractorError('Empty playlist')
urls = [x['api_url'] for x in items]
ret = []
for index, url in enumerate(urls):
data = self._download_json(url, None)['response']['episode']
dict_ = self._spreaker_episode_data_to_info(data)
dict_.update({
'playlist_id': compat_str(data['show_id']),
'playlist_title': data['show']['title'],
'playlist_index': index,
})
ret.append(dict_)
return self.playlist_result(ret,
data['show_id'],
data['show']['title'])
else:
html = self._download_webpage(url, None)
episode_id = self._html_search_regex(
r'data-episode_id="(?P<id>[0-9]+)"', html, 'id')
if not re.match(r'^[0-9]+$', episode_id):
raise ExtractorError('Could not find episode ID')
data_url = 'https://api.spreaker.com/episode/%s' % (episode_id)
data = self._download_json(data_url, episode_id)['response']['episode']
if not data['download_enabled']:
raise ExtractorError('Not supported yet')
return self._spreaker_episode_data_to_info(data)