1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-12-01 20:57:54 +01:00

Moved to parsing json content and improved regex pattern

This commit is contained in:
Fran Hermoso 2020-05-14 01:19:58 +02:00
parent 266714aeaf
commit c8dba62857

View File

@ -278,25 +278,16 @@ class ITVIE(InfoExtractor):
class ITVBTCCIE(InfoExtractor): class ITVBTCCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(articles|races)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [ _TEST = {
{ 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action', 'info_dict': {
'info_dict': { 'id': 'btcc-2019-brands-hatch-gp-race-action',
'id': 'btcc-2019-brands-hatch-gp-race-action', 'title': 'BTCC 2019: Brands Hatch GP race action',
'title': 'BTCC 2019: Brands Hatch GP race action',
},
'playlist_mincount': 12,
}, },
{ 'playlist_mincount': 12,
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch', }
'info_dict': {
'id': 'btcc-2018-all-the-action-from-brands-hatch',
'title': 'BTCC 2018: All the action from Brands Hatch',
},
'playlist_mincount': 9,
}
]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
@ -304,18 +295,28 @@ class ITVBTCCIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
json_map = self._html_search_regex(
'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
webpage,
'json_map'
)
entries = [ entries = [
self.url_result( self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, { smuggle_url(
# ITV does not like some GB IP ranges, so here are some self.BRIGHTCOVE_URL_TEMPLATE % video_id['data'].get('id'), {
# IP blocks it accepts # ITV does not like some GB IP ranges, so here are some
'geo_ip_blocks': [ # IP blocks it accepts
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21' 'geo_ip_blocks': [
], '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
'referrer': url, ],
}), 'referrer': url,
}
),
ie=BrightcoveNewIE.ie_key(), video_id=video_id) ie=BrightcoveNewIE.ie_key(), video_id=video_id)
for video_id in re.findall(r'["\']data["\']:{["\']id["\']:(\d+),', webpage)] for video_id in self._parse_json(
json_map, playlist_id
)['props']['pageProps']['article']['body']['content']]
title = self._og_search_title(webpage, fatal=False) title = self._og_search_title(webpage, fatal=False)