mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2025-01-07 13:47:54 +01:00
[spreaker] Fixes requested
Escape . in regexes Make separate extractors for episode page, playlist (show), API Support API's direct links to MP3 files Make counts set to None in case they are not found Handle when published_at is not present Other fixes
This commit is contained in:
parent
91e64c6941
commit
2eb228df1c
@ -958,7 +958,11 @@ from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .spreaker import SpreakerIE
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerAPIEpisodeIE,
|
||||
SpreakerPlaylistIE
|
||||
)
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
|
@ -8,39 +8,49 @@ from ..compat import compat_str
|
||||
from ..utils import int_or_none, ExtractorError
|
||||
|
||||
|
||||
class SpreakerIE(InfoExtractor):
|
||||
class SpreakerPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'spreaker'
|
||||
_VALID_URL = r"""(?x)^
|
||||
https?://
|
||||
(?:www.|api.)?
|
||||
spreaker.com/
|
||||
(?:
|
||||
show/[a-z0-9_-]+|
|
||||
user/[a-z0-9_-]+/[a-z0-9_-]|
|
||||
episode/(?P<id>[0-9]+)
|
||||
)
|
||||
"""
|
||||
_TESTS = [
|
||||
{
|
||||
_VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+'
|
||||
_TEST = {
|
||||
'url': 'https://www.spreaker.com/show/success-with-music',
|
||||
'info_dict': {
|
||||
'title': 'Success With Music',
|
||||
'id': 2317431,
|
||||
},
|
||||
'playlist_mincount': 14,
|
||||
},
|
||||
{
|
||||
'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-'
|
||||
'market-your-music-part-2'),
|
||||
'info_dict': {
|
||||
'id': '12534508',
|
||||
'ext': 'mp3',
|
||||
'title': 'Marketing Your Music - Part 2',
|
||||
'upload_date': '20170809',
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': 9780658,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
html = self._download_webpage(url, None)
|
||||
playlist_url = self._html_search_regex(
|
||||
r'data-playlist_url="(?P<url>https\://[^"]+")', html, 'url')
|
||||
items = self._download_json(playlist_url, None)
|
||||
items = items['response']['playlist']['items']
|
||||
|
||||
if not items:
|
||||
raise ExtractorError('Empty playlist')
|
||||
|
||||
urls = [x['api_url'] for x in items]
|
||||
ret = []
|
||||
for index, url in enumerate(urls):
|
||||
data = self._download_json(url, None)['response']['episode']
|
||||
dict_ = SpreakerIE._spreaker_episode_data_to_info(data)
|
||||
dict_.update({
|
||||
'playlist_id': compat_str(data['show_id']),
|
||||
'playlist_title': data['show']['title'],
|
||||
'playlist_index': index,
|
||||
})
|
||||
ret.append(dict_)
|
||||
|
||||
return self.playlist_result(ret,
|
||||
data['show_id'],
|
||||
data['show']['title'])
|
||||
|
||||
|
||||
class SpreakerAPIEpisodeIE(InfoExtractor):
|
||||
IE_NAME = 'spreaker'
|
||||
_VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P<id>[0-9]+)(?:/[^\.]+\.mp3$)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://api.spreaker.com/episode/12534508',
|
||||
'info_dict': {
|
||||
@ -51,23 +61,59 @@ class SpreakerIE(InfoExtractor):
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': 9780658,
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
|
||||
'info_dict': {
|
||||
'id': '12534508',
|
||||
'ext': 'mp3',
|
||||
'title': 'Marketing Your Music - Part 2',
|
||||
'upload_date': '20170809',
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': 9780658,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _spreaker_episode_data_to_info(self, data):
|
||||
upload_date = data['published_at'][0:10].replace('-', '')
|
||||
author = data.get('author')
|
||||
if not author:
|
||||
author = {}
|
||||
stats = data.get('stats')
|
||||
view_count = like_count = comment_count = 0
|
||||
show = data.get('show')
|
||||
if not show:
|
||||
show = {}
|
||||
else:
|
||||
show_image = show.get('image')
|
||||
if not show_image:
|
||||
show_image = {}
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
if not re.match(r'^[0-9]+$', episode_id):
|
||||
raise ExtractorError('Invalid ID')
|
||||
|
||||
url = 'https://api.spreaker.com/episode/%s' % (episode_id,)
|
||||
data = self._download_json(url, episode_id)['response']['episode']
|
||||
if not data['download_enabled']:
|
||||
raise ExtractorError('Not supported yet')
|
||||
|
||||
return SpreakerIE._spreaker_episode_data_to_info(data)
|
||||
|
||||
|
||||
class SpreakerIE(InfoExtractor):
|
||||
IE_NAME = 'spreaker'
|
||||
_VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]'
|
||||
_TEST = {
|
||||
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
|
||||
'info_dict': {
|
||||
'id': '12534508',
|
||||
'ext': 'mp3',
|
||||
'title': 'Marketing Your Music - Part 2',
|
||||
'upload_date': '20170809',
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': 9780658,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _spreaker_episode_data_to_info(data):
|
||||
published_at = data.get('published_at')
|
||||
upload_date = None
|
||||
if published_at:
|
||||
upload_date = published_at[0:10].replace('-', '')
|
||||
author = data.get('author', {})
|
||||
stats = data.get('stats', {})
|
||||
view_count = like_count = comment_count = None
|
||||
show = data.get('show', {})
|
||||
show_image = show.get('image', {})
|
||||
|
||||
if stats:
|
||||
view_count = (stats.get('plays', 0) +
|
||||
@ -117,45 +163,11 @@ class SpreakerIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
html = self._download_webpage(url, None)
|
||||
episode_id = self._html_search_regex(
|
||||
r'data-episode_id="(?P<id>[0-9]+)"', html, 'id')
|
||||
if not re.match(r'^[0-9]+$', episode_id):
|
||||
raise ExtractorError('Could not find episode ID')
|
||||
data_url = 'https://api.spreaker.com/episode/%s' % (episode_id)
|
||||
|
||||
if re.match(r'^[0-9]+$', episode_id):
|
||||
data_url = url
|
||||
elif '/show/' in url:
|
||||
html = self._download_webpage(url, None)
|
||||
playlist_url = self._html_search_regex(
|
||||
r'data-playlist_url="(?P<url>https\://[^"]+")', html, 'url')
|
||||
items = self._download_json(playlist_url, None)
|
||||
items = items['response']['playlist']['items']
|
||||
|
||||
if not items:
|
||||
raise ExtractorError('Empty playlist')
|
||||
|
||||
urls = [x['api_url'] for x in items]
|
||||
ret = []
|
||||
for index, url in enumerate(urls):
|
||||
data = self._download_json(url, None)['response']['episode']
|
||||
dict_ = self._spreaker_episode_data_to_info(data)
|
||||
dict_.update({
|
||||
'playlist_id': compat_str(data['show_id']),
|
||||
'playlist_title': data['show']['title'],
|
||||
'playlist_index': index,
|
||||
})
|
||||
ret.append(dict_)
|
||||
|
||||
return self.playlist_result(ret,
|
||||
data['show_id'],
|
||||
data['show']['title'])
|
||||
else:
|
||||
html = self._download_webpage(url, None)
|
||||
episode_id = self._html_search_regex(
|
||||
r'data-episode_id="(?P<id>[0-9]+)"', html, 'id')
|
||||
if not re.match(r'^[0-9]+$', episode_id):
|
||||
raise ExtractorError('Could not find episode ID')
|
||||
data_url = 'https://api.spreaker.com/episode/%s' % (episode_id)
|
||||
|
||||
data = self._download_json(data_url, episode_id)['response']['episode']
|
||||
if not data['download_enabled']:
|
||||
raise ExtractorError('Not supported yet')
|
||||
|
||||
return self._spreaker_episode_data_to_info(data)
|
||||
return self.url_result(data_url)
|
||||
|
Loading…
Reference in New Issue
Block a user