[elpais] Add support for alternative layout (Closes #8744)

This commit is contained in:
Sergey M․ 2016-03-05 16:43:29 +06:00
parent 845817aadf
commit d1b29d1342
1 changed files with 21 additions and 10 deletions

View File

@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País' IE_DESC = 'El País'
_TEST = { _TESTS = [{
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55', 'md5': '98406f301f19562170ec071b83433d55',
'info_dict': { 'info_dict': {
@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206', 'upload_date': '20140206',
} }
}, {
'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
'md5': '3bd5b09509f3519d7d9e763179b013de',
'info_dict': {
'id': '1456340311_668921',
'ext': 'mp4',
'title': 'Cómo hacer el mejor café con cafetera italiana',
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
'upload_date': '20160303',
} }
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex( prefix = self._html_search_regex(
r'var url_cache = "([^"]+)";', webpage, 'URL prefix') r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex( video_suffix = self._search_regex(
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL') r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex( thumbnail_suffix = self._search_regex(
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL', r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
fatal=False) webpage, 'thumbnail URL', fatal=False)
thumbnail = ( thumbnail = (
None if thumbnail_suffix is None None if thumbnail_suffix is None
else prefix + thumbnail_suffix) else prefix + thumbnail_suffix)
title = self._html_search_regex( title = self._html_search_regex(
'<h2 class="entry-header entry-title.*?>(.*?)</h2>', (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
webpage, 'title') webpage, 'title')
date_str = self._search_regex( upload_date = unified_strdate(self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">', r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', fatal=False) webpage, 'upload date', default=None) or self._html_search_meta(
upload_date = (None if date_str is None else unified_strdate(date_str)) 'datePublished', webpage, 'timestamp'))
return { return {
'id': video_id, 'id': video_id,