From 2eb228df1cb2ad0c91ef37fe25be47add412d313 Mon Sep 17 00:00:00 2001
From: Andrew Udvare <audvare@gmail.com>
Date: Thu, 10 Aug 2017 16:38:47 -0400
Subject: [PATCH] [spreaker] Fixes requested

Escape . in regexes
Make separate extractors for episode page, playlist (show), API
Support API's direct links to MP3 files
Make counts set to None in case they are not found
Handle when published_at is not present
Other fixes
---
 youtube_dl/extractor/extractors.py |   6 +-
 youtube_dl/extractor/spreaker.py   | 176 +++++++++++++++--------------
 2 files changed, 99 insertions(+), 83 deletions(-)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 09ee08aad..7ca2cfd19 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -958,7 +958,11 @@ from .sport5 import Sport5IE
 from .sportbox import SportBoxEmbedIE
 from .sportdeutschland import SportDeutschlandIE
 from .sportschau import SportschauIE
-from .spreaker import SpreakerIE
+from .spreaker import (
+    SpreakerIE,
+    SpreakerAPIEpisodeIE,
+    SpreakerPlaylistIE
+)
 from .sprout import SproutIE
 from .srgssr import (
     SRGSSRIE,
diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py
index 3982267a8..d2fb6c304 100644
--- a/youtube_dl/extractor/spreaker.py
+++ b/youtube_dl/extractor/spreaker.py
@@ -8,39 +8,49 @@ from ..compat import compat_str
 from ..utils import int_or_none, ExtractorError
 
 
-class SpreakerIE(InfoExtractor):
+class SpreakerPlaylistIE(InfoExtractor):
     IE_NAME = 'spreaker'
-    _VALID_URL = r"""(?x)^
-        https?://
-        (?:www.|api.)?
-        spreaker.com/
-        (?:
-            show/[a-z0-9_-]+|
-            user/[a-z0-9_-]+/[a-z0-9_-]|
-            episode/(?P<id>[0-9]+)
-        )
-    """
-    _TESTS = [
-        {
+    _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+'
+    _TEST = {
             'url': 'https://www.spreaker.com/show/success-with-music',
             'info_dict': {
                 'title': 'Success With Music',
                 'id': 2317431,
             },
             'playlist_mincount': 14,
-        },
-        {
-            'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-'
-                    'market-your-music-part-2'),
-            'info_dict': {
-                'id': '12534508',
-                'ext': 'mp3',
-                'title': 'Marketing Your Music - Part 2',
-                'upload_date': '20170809',
-                'uploader': 'SWM',
-                'uploader_id': 9780658,
-            },
-        },
+    }
+
+    def _real_extract(self, url):
+        html = self._download_webpage(url, None)
+        playlist_url = self._html_search_regex(
+            r'data-playlist_url="(?P<url>https\://[^"]+")', html, 'url')
+        items = self._download_json(playlist_url, None)
+        items = items['response']['playlist']['items']
+
+        if not items:
+            raise ExtractorError('Empty playlist')
+
+        urls = [x['api_url'] for x in items]
+        ret = []
+        for index, url in enumerate(urls):
+            data = self._download_json(url, None)['response']['episode']
+            dict_ = SpreakerIE._spreaker_episode_data_to_info(data)
+            dict_.update({
+                'playlist_id': compat_str(data['show_id']),
+                'playlist_title': data['show']['title'],
+                'playlist_index': index,
+            })
+            ret.append(dict_)
+
+        return self.playlist_result(ret,
+                                    data['show_id'],
+                                    data['show']['title'])
+
+
+class SpreakerAPIEpisodeIE(InfoExtractor):
+    IE_NAME = 'spreaker'
+    _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P<id>[0-9]+)(?:/[^\.]+\.mp3$)?'
+    _TESTS = [
         {
             'url': 'https://api.spreaker.com/episode/12534508',
             'info_dict': {
@@ -51,23 +61,59 @@ class SpreakerIE(InfoExtractor):
                 'uploader': 'SWM',
                 'uploader_id': 9780658,
             },
-        }
+        },
+        {
+            'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
+            'info_dict': {
+                'id': '12534508',
+                'ext': 'mp3',
+                'title': 'Marketing Your Music - Part 2',
+                'upload_date': '20170809',
+                'uploader': 'SWM',
+                'uploader_id': 9780658,
+            },
+        },
     ]
 
-    def _spreaker_episode_data_to_info(self, data):
-        upload_date = data['published_at'][0:10].replace('-', '')
-        author = data.get('author')
-        if not author:
-            author = {}
-        stats = data.get('stats')
-        view_count = like_count = comment_count = 0
-        show = data.get('show')
-        if not show:
-            show = {}
-        else:
-            show_image = show.get('image')
-        if not show_image:
-            show_image = {}
+    def _real_extract(self, url):
+        episode_id = self._match_id(url)
+        if not re.match(r'^[0-9]+$', episode_id):
+            raise ExtractorError('Invalid ID')
+
+        url = 'https://api.spreaker.com/episode/%s' % (episode_id,)
+        data = self._download_json(url, episode_id)['response']['episode']
+        if not data['download_enabled']:
+            raise ExtractorError('Not supported yet')
+
+        return SpreakerIE._spreaker_episode_data_to_info(data)
+
+
+class SpreakerIE(InfoExtractor):
+    IE_NAME = 'spreaker'
+    _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]'
+    _TEST = {
+        'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
+        'info_dict': {
+            'id': '12534508',
+            'ext': 'mp3',
+            'title': 'Marketing Your Music - Part 2',
+            'upload_date': '20170809',
+            'uploader': 'SWM',
+            'uploader_id': 9780658,
+        },
+    }
+
+    @staticmethod
+    def _spreaker_episode_data_to_info(data):
+        published_at = data.get('published_at')
+        upload_date = None
+        if published_at:
+            upload_date = published_at[0:10].replace('-', '')
+        author = data.get('author', {})
+        stats = data.get('stats', {})
+        view_count = like_count = comment_count = None
+        show = data.get('show', {})
+        show_image = show.get('image', {})
 
         if stats:
             view_count = (stats.get('plays', 0) +
@@ -117,45 +163,11 @@ class SpreakerIE(InfoExtractor):
         }
 
     def _real_extract(self, url):
-        episode_id = self._match_id(url)
+        html = self._download_webpage(url, None)
+        episode_id = self._html_search_regex(
+            r'data-episode_id="(?P<id>[0-9]+)"', html, 'id')
+        if not re.match(r'^[0-9]+$', episode_id):
+            raise ExtractorError('Could not find episode ID')
+        data_url = 'https://api.spreaker.com/episode/%s' % (episode_id)
 
-        if re.match(r'^[0-9]+$', episode_id):
-            data_url = url
-        elif '/show/' in url:
-            html = self._download_webpage(url, None)
-            playlist_url = self._html_search_regex(
-                r'data-playlist_url="(?P<url>https\://[^"]+")', html, 'url')
-            items = self._download_json(playlist_url, None)
-            items = items['response']['playlist']['items']
-
-            if not items:
-                raise ExtractorError('Empty playlist')
-
-            urls = [x['api_url'] for x in items]
-            ret = []
-            for index, url in enumerate(urls):
-                data = self._download_json(url, None)['response']['episode']
-                dict_ = self._spreaker_episode_data_to_info(data)
-                dict_.update({
-                    'playlist_id': compat_str(data['show_id']),
-                    'playlist_title': data['show']['title'],
-                    'playlist_index': index,
-                })
-                ret.append(dict_)
-
-            return self.playlist_result(ret,
-                                        data['show_id'],
-                                        data['show']['title'])
-        else:
-            html = self._download_webpage(url, None)
-            episode_id = self._html_search_regex(
-                r'data-episode_id="(?P<id>[0-9]+)"', html, 'id')
-            if not re.match(r'^[0-9]+$', episode_id):
-                raise ExtractorError('Could not find episode ID')
-            data_url = 'https://api.spreaker.com/episode/%s' % (episode_id)
-
-        data = self._download_json(data_url, episode_id)['response']['episode']
-        if not data['download_enabled']:
-            raise ExtractorError('Not supported yet')
-
-        return self._spreaker_episode_data_to_info(data)
+        return self.url_result(data_url)