1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2025-01-23 21:17:55 +01:00

Fix playlist download from BBC iPlayer.

This commit is contained in:
Daniel Cassidy 2019-12-17 07:02:41 +00:00
parent 2dbc0967f2
commit 999c87a618

View File

@ -1247,31 +1247,13 @@ class BBCCoUkArticleIE(InfoExtractor):
class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor):
def _entries(self, webpage, url, playlist_id):
single_page = 'page' in compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)
for page_num in itertools.count(2):
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
yield self.url_result(
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
if single_page:
return
next_page = self._search_regex(
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
webpage, 'next page url', default=None, group='url')
if not next_page:
break
webpage = self._download_webpage(
compat_urlparse.urljoin(url, next_page), playlist_id,
'Downloading page %d' % page_num, page_num)
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
title, description = self._extract_title_and_description(webpage) title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return self.playlist_result( return self.playlist_result(
self._entries(webpage, url, playlist_id), self._entries(webpage, url, playlist_id),
@ -1282,7 +1264,6 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
IE_NAME = 'bbc.co.uk:iplayer:playlist' IE_NAME = 'bbc.co.uk:iplayer:playlist'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
'info_dict': { 'info_dict': {
@ -1303,12 +1284,51 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
'playlist_mincount': 10, 'playlist_mincount': 10,
}] }]
def _extract_title_and_description(self, webpage): def _entries(self, webpage, url, playlist_id):
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
description = self._search_regex( single_season = 'seriesId' in query
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>', single_page = 'page' in query
webpage, 'description', fatal=False, group='value')
return title, description redux_state = self._redux_state(webpage, playlist_id)
slices = redux_state.get('header', {}).get('availableSlices', [])
season_ids = list(map(lambda s: s.get('id'), slices))
for season in itertools.count(1):
while True:
pagination = redux_state.get('pagination')
page_num = pagination.get('currentPage')
total_pages = pagination.get('totalPages')
for entity in redux_state.get('entities'):
video_id = entity.get('id')
yield self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
if single_page or page_num >= total_pages:
break
next_page_num = page_num + 1
next_page_href = pagination.get('pageUrl') % next_page_num
url = compat_urlparse.urljoin(url, next_page_href)
webpage = self._download_webpage(url, playlist_id,
'Downloading season %d page %d' % (season, next_page_num),
'season %d page %d' % (season, next_page_num))
redux_state = self._redux_state(webpage, playlist_id)
if single_season or season >= len(season_ids):
break
next_season_id = season_ids[season]
url = compat_urlparse.urljoin(url, '?seriesId=' + next_season_id)
webpage = self._download_webpage(url, playlist_id,
'Downloading season %d page 1' % (season + 1),
'season %d page 1' % (season + 1))
redux_state = self._redux_state(webpage, playlist_id)
def _redux_state(self, webpage, playlist_id):
redux_state_regex = r'<script[^>]*>\s*window.__IPLAYER_REDUX_STATE__\s*=\s*(.*?);?\s*</script>'
redux_state_json = self._search_regex(redux_state_regex, webpage, 'redux_state')
return self._parse_json(redux_state_json, playlist_id, transform_source=unescapeHTML)
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
@ -1353,7 +1373,21 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_title_and_description(self, webpage): def _entries(self, webpage, url, playlist_id):
title = self._og_search_title(webpage, fatal=False) single_page = 'page' in compat_urlparse.parse_qs(
description = self._og_search_description(webpage) compat_urlparse.urlparse(url).query)
return title, description for page_num in itertools.count(2):
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
yield self.url_result(
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
if single_page:
return
next_page = self._search_regex(
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
webpage, 'next page url', default=None, group='url')
if not next_page:
break
webpage = self._download_webpage(
compat_urlparse.urljoin(url, next_page), playlist_id,
'Downloading page %d' % page_num, page_num)