Merge 009cf65c74
into 48c5663c5f
This commit is contained in:
commit
ee9d525719
|
@ -608,9 +608,10 @@ class BBCIE(BBCCoUkIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIASELECTOR_URLS = [
|
||||||
|
'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/iptv-all/vpid/%s/format/xml/',
|
||||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
||||||
# even not geo restricted at all
|
# even not geo restricted at all
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/legacy-iptv-all/vpid/%s',
|
||||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||||
# notukerror for non UK (?) users (e.g.
|
# notukerror for non UK (?) users (e.g.
|
||||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
|
@ -754,7 +755,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
},
|
},
|
||||||
'skip': 'Georestricted to UK',
|
'skip': 'Georestricted to UK',
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist.sxml URL in playlist param
|
# single video with "pid" paramter
|
||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02xycnp',
|
'id': 'p02xycnp',
|
||||||
|
@ -793,10 +794,11 @@ class BBCIE(BBCCoUkIE):
|
||||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with Morph "vpid" parameter
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# single video with "vpid" parameter
|
||||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p06556y7',
|
'id': 'p06556y7',
|
||||||
|
@ -809,6 +811,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# window.__PRELOADED_STATE__
|
# window.__PRELOADED_STATE__
|
||||||
|
# 404
|
||||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b0b9z4vz',
|
'id': 'b0b9z4vz',
|
||||||
|
@ -819,6 +822,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
'uploader_id': 'bbc_radio_three',
|
'uploader_id': 'bbc_radio_three',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# article with embedded video using data-pid parameter
|
||||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p06w9tws',
|
'id': 'p06w9tws',
|
||||||
|
@ -904,6 +908,37 @@ class BBCIE(BBCCoUkIE):
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
initial_data_re = self._search_regex(
|
||||||
|
r'<script[^>]*>window.__INITIAL_DATA__=(.*?);</script>', webpage,
|
||||||
|
'initial data', default=None)
|
||||||
|
if initial_data_re:
|
||||||
|
initial_data = self._parse_json(initial_data_re, playlist_id)
|
||||||
|
for key in initial_data['data']:
|
||||||
|
data = initial_data['data'][key].get('data')
|
||||||
|
if data and isinstance(data, dict):
|
||||||
|
mediaItems = []
|
||||||
|
initialItem = data.get('initialItem')
|
||||||
|
blocks = data.get('blocks')
|
||||||
|
if initialItem:
|
||||||
|
mediaItems.append(initialItem.get('mediaItem'))
|
||||||
|
if blocks:
|
||||||
|
for block in blocks:
|
||||||
|
if block.get('type') == 'media':
|
||||||
|
mediaItems.append(block.get('model'))
|
||||||
|
for mediaItem in mediaItems:
|
||||||
|
title = mediaItem['title']['content'] if mediaItem.get('title') else mediaItem.get('caption')
|
||||||
|
description = '\n'.join([block['model']['text'] for block in mediaItem['summary']['blocks']]) if mediaItem.get('summary') else None
|
||||||
|
programme_id = mediaItem['media']['items'][0]['id']
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
|
||||||
# article with multiple videos embedded with playlist.sxml (e.g.
|
# article with multiple videos embedded with playlist.sxml (e.g.
|
||||||
# http://www.bbc.com/sport/0/football/34475836)
|
# http://www.bbc.com/sport/0/football/34475836)
|
||||||
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
|
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
|
||||||
|
@ -977,22 +1012,25 @@ class BBCIE(BBCCoUkIE):
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
|
||||||
group_id = self._search_regex(
|
|
||||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
|
||||||
webpage, 'group id', default=None)
|
|
||||||
if playlist_id:
|
|
||||||
return self.url_result(
|
|
||||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
|
||||||
ie=BBCCoUkIE.ie_key())
|
|
||||||
|
|
||||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
programme_id = self._search_regex(
|
programme_id = self._search_regex(
|
||||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||||
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
|
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
|
||||||
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
|
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX,
|
||||||
|
r'"vpid":"(%s)"' % self._ID_REGEX,
|
||||||
|
r'"pid":"(%s)"' % self._ID_REGEX],
|
||||||
webpage, 'vpid', default=None)
|
webpage, 'vpid', default=None)
|
||||||
|
|
||||||
|
# bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
|
||||||
|
initial_data = self._search_regex(
|
||||||
|
r'<script[^>]+id="initial-data"[^>]+data-json=\'(.+)\'>',
|
||||||
|
webpage, 'initial data', fatal=False, default=None)
|
||||||
|
if initial_data:
|
||||||
|
programme_id = self._search_regex(
|
||||||
|
r'"versionID":"(%s)"' % self._ID_REGEX,
|
||||||
|
unescapeHTML(initial_data),
|
||||||
|
'programme id', fatal=False, default=None)
|
||||||
|
|
||||||
if programme_id:
|
if programme_id:
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -1014,49 +1052,60 @@ class BBCIE(BBCCoUkIE):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||||
|
group_id = self._search_regex(
|
||||||
|
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||||
|
webpage, 'group id', default=None)
|
||||||
|
if group_id:
|
||||||
|
return self.url_result(
|
||||||
|
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||||
|
ie=BBCCoUkIE.ie_key())
|
||||||
|
|
||||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
# There are several setPayload calls may be present but the video
|
morph_payloads = re.findall(
|
||||||
# seems to be always related to the first one
|
r'Morph\.setPayload\([^,]+,\s*({.+?})\);', webpage)
|
||||||
morph_payload = self._parse_json(
|
if morph_payloads:
|
||||||
self._search_regex(
|
for morph_payload_text in morph_payloads:
|
||||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
morph_payload = self._parse_json(
|
||||||
webpage, 'morph payload', default='{}'),
|
morph_payload_text, playlist_id, fatal=False)
|
||||||
playlist_id, fatal=False)
|
if morph_payload:
|
||||||
if morph_payload:
|
body_text = try_get(morph_payload, lambda x: x['body']['content']['article']['body']) or None
|
||||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
if not body_text:
|
||||||
for component in components:
|
continue
|
||||||
if not isinstance(component, dict):
|
body = self._parse_json(
|
||||||
continue
|
body_text, playlist_id, fatal=False)
|
||||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
if not isinstance(body, list):
|
||||||
if not lead_media:
|
continue
|
||||||
continue
|
for item in body:
|
||||||
identifiers = lead_media.get('identifiers')
|
if not isinstance(item, dict):
|
||||||
if not identifiers or not isinstance(identifiers, dict):
|
continue
|
||||||
continue
|
videoData = item.get('videoData')
|
||||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
if videoData:
|
||||||
if not programme_id:
|
programme_id = videoData.get('vpid') or videoData.get('playablePid')
|
||||||
continue
|
if not programme_id:
|
||||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
continue
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
title = videoData.get('title') or self._og_search_title(webpage)
|
||||||
self._sort_formats(formats)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
description = lead_media.get('summary')
|
self._sort_formats(formats)
|
||||||
uploader = lead_media.get('masterBrand')
|
description = videoData.get('caption') or videoData.get('summary')
|
||||||
uploader_id = lead_media.get('mid')
|
uploader = videoData.get('masterBrand')
|
||||||
duration = None
|
uploader_id = videoData.get('mid')
|
||||||
duration_d = lead_media.get('duration')
|
duration = None
|
||||||
if isinstance(duration_d, dict):
|
duration_d = videoData.get('duration')
|
||||||
duration = parse_duration(dict_get(
|
if isinstance(duration_d, dict):
|
||||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
duration = parse_duration(dict_get(
|
||||||
return {
|
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||||
'id': programme_id,
|
entries.append({
|
||||||
'title': title,
|
'id': programme_id,
|
||||||
'description': description,
|
'title': title,
|
||||||
'duration': duration,
|
'description': description,
|
||||||
'uploader': uploader,
|
'duration': duration,
|
||||||
'uploader_id': uploader_id,
|
'uploader': uploader,
|
||||||
'formats': formats,
|
'uploader_id': uploader_id,
|
||||||
'subtitles': subtitles,
|
'formats': formats,
|
||||||
}
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
preload_state = self._parse_json(self._search_regex(
|
preload_state = self._parse_json(self._search_regex(
|
||||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||||
|
|
Loading…
Reference in New Issue