From b653c19f8fa1d05b2b7479c0bf60e4c83e431717 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Fri, 11 Aug 2017 00:31:57 -0400 Subject: [PATCH] [spreaker] Handle when playlist JSON has multiple pages --- youtube_dl/extractor/spreaker.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 40ccdd973..e27078b76 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -24,12 +24,30 @@ class SpreakerPlaylistIE(InfoExtractor): html = self._download_webpage(url, None) playlist_url = self._html_search_regex( r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') - items = self._download_json(playlist_url, None) - items = items['response']['playlist']['items'] + items = self._download_json(playlist_url, + None, + 'Downloading playlist JSON') + playlist = items['response']['playlist'] + next_url = playlist.get('next_url') + items = playlist.get('items', []) if not items: raise ExtractorError('Empty playlist') + page_no = 2 + download_str = 'Downloading playlist JSON page #%d' + while next_url: + items_ = self._download_json(next_url, + None, + download_str % (page_no,)) + playlist_ = items_['response']['playlist'] + new_items = playlist_.get('items', []) + if not new_items: + break + items += new_items + next_url = playlist_.get('next_url') + page_no += 1 + urls = [x['api_url'] for x in items] ret = [] for index, url in enumerate(urls):