1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-29 19:47:54 +01:00

Moved to parsing json content and improved regex pattern

This commit is contained in:
Fran Hermoso 2020-05-14 01:19:58 +02:00
parent 266714aeaf
commit c8dba62857

View File

@ -278,25 +278,16 @@ class ITVIE(InfoExtractor):
class ITVBTCCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(articles|races)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [
{
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
'info_dict': {
'id': 'btcc-2019-brands-hatch-gp-race-action',
'title': 'BTCC 2019: Brands Hatch GP race action',
},
'playlist_mincount': 12,
},
{
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
'info_dict': {
'id': 'btcc-2018-all-the-action-from-brands-hatch',
'title': 'BTCC 2018: All the action from Brands Hatch',
},
'playlist_mincount': 9,
}
]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
def _real_extract(self, url):
@ -304,18 +295,28 @@ class ITVBTCCIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
json_map = self._html_search_regex(
'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
webpage,
'json_map'
)
entries = [
self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % video_id['data'].get('id'), {
# ITV does not like some GB IP ranges, so here are some
# IP blocks it accepts
'geo_ip_blocks': [
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
],
'referrer': url,
}),
}
),
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
for video_id in re.findall(r'["\']data["\']:{["\']id["\']:(\d+),', webpage)]
for video_id in self._parse_json(
json_map, playlist_id
)['props']['pageProps']['article']['body']['content']]
title = self._og_search_title(webpage, fatal=False)