From 1bedd948bd9a552b6e6e684d8be72c090dc7b0ff Mon Sep 17 00:00:00 2001 From: PC Date: Tue, 6 Oct 2015 22:14:03 +0100 Subject: [PATCH 01/38] TVI/IOL extractor supports live streams --- youtube_dl/extractor/__init__.py | 4 ++ youtube_dl/extractor/iol.py | 82 ++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 youtube_dl/extractor/iol.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3ace1cc2c..beed1af95 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -249,6 +249,10 @@ from .indavideo import ( from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE +from .iol import ( + IOLIE, + IOLStreamIE +) from .iprima import IPrimaIE from .iqiyi import IqiyiIE from .ir90tv import Ir90TvIE diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py new file mode 100644 index 000000000..5d6f458ac --- /dev/null +++ b/youtube_dl/extractor/iol.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class IOLIE(InfoExtractor): + _VALID_URL = r'http://(tviplayer|www\.tvi24)\.iol\.pt/.*/(?P[0-9a-f]{24})[/0-9]*$' + _TESTS = [{ + 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', + 'md5': 'a9b3e3630201401fc3b8099d9d689191', + 'info_dict': { + 'id': '55f878f90cf203f8b03cea6d', + 'ext': 'mp4', + 'title': u'Euromilhões - 15 de setembro de 2015', + # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f87a280cf2e6961770f01f', + 'description': u'Com Mónica Jardim' + } + }, { + 'url': 'http://tviplayer.iol.pt/programa/isso-e-tudo-muito-bonito-mas/55f30f2e0cf2a6b037fc1f2f/video/55f730c40cf23fa665481b18', + 'md5': 'fb7231bf3a12eee731c56eaa6ecb5474', + 'info_dict': { + 'id': '55f730c40cf23fa665481b18', + 'ext': 'mp4', + 'title': u'Isso é tudo muito bonito, mas: Concatena, filho, concatena', + # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f737330cf23fa665481b2d', + 'description': u'Quando os adversários unem perspectivas.' + } + }, { + 'url': 'http://www.tvi24.iol.pt/videos/passos-criacao-de-emprego-e-facto-muito-importante/55f816640cf2e6961770ef7a/2', + 'md5': '49d4da9901f00e72a127ff4b635f69ee', + 'info_dict': { + 'id': '55f816640cf2e6961770ef7a', + 'ext': 'mp4', + 'title': u'Passos: criação de emprego é facto muito importante', + # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', + 'description': u'PM sublinha que é o nível mais elevado de há vários anos' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + title = re.sub(r' \| TVI Player$', '', title, re.IGNORECASE) + + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats + } + + +class IOLStreamIE(IOLIE): + _VALID_URL = r'http://tviplayer\.iol\.pt/direto/(?P\S+)' + _TESTS = [{ + 'url': 'http://tviplayer.iol.pt/direto/TVI', + 'info_dict': { + 'id': 'TVI', + 'ext': 'mp4', + 'title': 're:^Direto TVI', + 'description': u'A TVI ao pé de si. Sempre.', + 'is_live': True, + } + }] + + def _real_extract(self, url): + ret = IOLIE._real_extract(self, url) + ret['is_live'] = True + ret['title'] = self._live_title(ret['title']) + + return ret From 859bd49e8e791ff667828881baa431929a0f1a1f Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 05:10:54 +0100 Subject: [PATCH 02/38] Supports http_mp4 and adds another m3u8 source --- youtube_dl/extractor/iol.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 5d6f458ac..634152a3e 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -7,7 +7,7 @@ import re class IOLIE(InfoExtractor): - _VALID_URL = r'http://(tviplayer|www\.tvi24)\.iol\.pt/.*/(?P[0-9a-f]{24})[/0-9]*$' + _VALID_URL = r'http://(tviplayer|(www\.tvi24))\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', 'md5': 'a9b3e3630201401fc3b8099d9d689191', @@ -52,6 +52,22 @@ class IOLIE(InfoExtractor): m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) + if match: + multimedia_id = match.group(1) + m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' + formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') + formats.extend(formats_m3u8_default) + formats.append({ + 'url': 'http://www.iol.pt/videos-file/' + multimedia_id + '-L-500k.mp4', + 'format_id': 'http_500', + 'tbr': 500, + 'protocol': 'http', + 'protocol': 'http', + 'preference': -1, + 'no_resume': False + }) + return { 'id': video_id, 'title': title, From efa028e6581f5f11fc51064815ef6341323052d7 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 05:19:42 +0100 Subject: [PATCH 03/38] fixed test md5s http now permits tests on the first 100Kb --- youtube_dl/extractor/iol.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 634152a3e..3b268a859 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -10,7 +10,7 @@ class IOLIE(InfoExtractor): _VALID_URL = r'http://(tviplayer|(www\.tvi24))\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', - 'md5': 'a9b3e3630201401fc3b8099d9d689191', + 'md5': '983ca0edae370af67c458c6e5a71aae5', 'info_dict': { 'id': '55f878f90cf203f8b03cea6d', 'ext': 'mp4', @@ -20,7 +20,7 @@ class IOLIE(InfoExtractor): } }, { 'url': 'http://tviplayer.iol.pt/programa/isso-e-tudo-muito-bonito-mas/55f30f2e0cf2a6b037fc1f2f/video/55f730c40cf23fa665481b18', - 'md5': 'fb7231bf3a12eee731c56eaa6ecb5474', + 'md5': 'ef5171a5abf69197726e5d7c7633c27a', 'info_dict': { 'id': '55f730c40cf23fa665481b18', 'ext': 'mp4', @@ -30,7 +30,7 @@ class IOLIE(InfoExtractor): } }, { 'url': 'http://www.tvi24.iol.pt/videos/passos-criacao-de-emprego-e-facto-muito-importante/55f816640cf2e6961770ef7a/2', - 'md5': '49d4da9901f00e72a127ff4b635f69ee', + 'md5': 'd836f1225c289c7987beddebe11619b9', 'info_dict': { 'id': '55f816640cf2e6961770ef7a', 'ext': 'mp4', From 93c48924550a28d679a1f2bb5c5ffb37013e2bc7 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 14:50:14 +0100 Subject: [PATCH 04/38] removed duplicate line --- youtube_dl/extractor/iol.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 3b268a859..1605ca7c3 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -63,7 +63,6 @@ class IOLIE(InfoExtractor): 'format_id': 'http_500', 'tbr': 500, 'protocol': 'http', - 'protocol': 'http', 'preference': -1, 'no_resume': False }) From 736f21d1ecb543b48f6a098185dac419e7abf0d4 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 15:09:19 +0100 Subject: [PATCH 05/38] multimedia_id extracted from meta still uses old method as fallback --- youtube_dl/extractor/iol.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 1605ca7c3..7d22f6610 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -52,9 +52,14 @@ class IOLIE(InfoExtractor): m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') - match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) - if match: - multimedia_id = match.group(1) + multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) + if multimedia_id is None: + match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) + self.report_extraction('multimedia_id (fallback)') + if match: + multimedia_id = match.group(1) + + if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) From 23eee291d9b856735d346fee716a4ad312100b62 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 15:09:19 +0100 Subject: [PATCH 06/38] multimedia_id extracted from meta still uses old method as fallback --- youtube_dl/extractor/iol.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 1605ca7c3..49ebf2368 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -52,15 +52,20 @@ class IOLIE(InfoExtractor): m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') - match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) - if match: - multimedia_id = match.group(1) + multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) + if multimedia_id is None: + match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) + self.report_extraction('multimedia_id (fallback)') + if match: + multimedia_id = match.group(1) + + if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) formats.append({ 'url': 'http://www.iol.pt/videos-file/' + multimedia_id + '-L-500k.mp4', - 'format_id': 'http_500', + 'format_id': 'http-500', 'tbr': 500, 'protocol': 'http', 'preference': -1, From 633a37e06e3ed457b13e00c32423de8bec5ce0d8 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 16:19:57 +0100 Subject: [PATCH 07/38] rtmp not working --- youtube_dl/extractor/iol.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 7d22f6610..1d08eeba5 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -63,6 +63,17 @@ class IOLIE(InfoExtractor): m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) + + server = 'video2.iol.pt' + formats.append({ + 'url': 'rtmp://'+server+'/vod', + 'play_path': 'mp4:' + multimedia_id + '-L-500k', + 'format_id': 'rtmp-500', + 'tbr': 500, + 'protocol': 'rtmp', + 'ext': 'mp4' + }) + formats.append({ 'url': 'http://www.iol.pt/videos-file/' + multimedia_id + '-L-500k.mp4', 'format_id': 'http_500', From 647f1fc679cf461556ab2d51e54416786e1eaff3 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 19:16:41 +0100 Subject: [PATCH 08/38] IOL RTMP support --- youtube_dl/extractor/iol.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 1d08eeba5..f0b000870 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -64,19 +64,31 @@ class IOLIE(InfoExtractor): formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) - server = 'video2.iol.pt' - formats.append({ - 'url': 'rtmp://'+server+'/vod', - 'play_path': 'mp4:' + multimedia_id + '-L-500k', - 'format_id': 'rtmp-500', - 'tbr': 500, - 'protocol': 'rtmp', - 'ext': 'mp4' - }) + # try rtmp format + if self._html_search_regex(r' Date: Sat, 10 Oct 2015 19:51:58 +0100 Subject: [PATCH 09/38] consider all iol.pt pages as download matches --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 49ebf2368..ddad3add5 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -7,7 +7,7 @@ import re class IOLIE(InfoExtractor): - _VALID_URL = r'http://(tviplayer|(www\.tvi24))\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' + _VALID_URL = r'http://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', 'md5': '983ca0edae370af67c458c6e5a71aae5', From 89e0d31d04da146e6d8f8d8d7040a24024b15ddb Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 21:23:56 +0100 Subject: [PATCH 10/38] fixed testing thumbnail typo --- youtube_dl/extractor/iol.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index ddad3add5..e270e7057 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -15,7 +15,7 @@ class IOLIE(InfoExtractor): 'id': '55f878f90cf203f8b03cea6d', 'ext': 'mp4', 'title': u'Euromilhões - 15 de setembro de 2015', - # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f87a280cf2e6961770f01f', + 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f87a280cf2e6961770f01f', 'description': u'Com Mónica Jardim' } }, { @@ -25,7 +25,7 @@ class IOLIE(InfoExtractor): 'id': '55f730c40cf23fa665481b18', 'ext': 'mp4', 'title': u'Isso é tudo muito bonito, mas: Concatena, filho, concatena', - # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f737330cf23fa665481b2d', + 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f737330cf23fa665481b2d', 'description': u'Quando os adversários unem perspectivas.' } }, { @@ -35,7 +35,7 @@ class IOLIE(InfoExtractor): 'id': '55f816640cf2e6961770ef7a', 'ext': 'mp4', 'title': u'Passos: criação de emprego é facto muito importante', - # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', + 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', 'description': u'PM sublinha que é o nível mais elevado de há vários anos' } }] From f6d8fd8d6e152339c17854df9a61d26993ff703e Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 21:30:34 +0100 Subject: [PATCH 11/38] iol now tries to use javascript structures added tests for maisfutebol (complete with titles with no cruft) --- youtube_dl/extractor/iol.py | 44 +++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index e270e7057..8cd3993aa 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import js_to_json, ExtractorError import re @@ -38,6 +39,26 @@ class IOLIE(InfoExtractor): 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', 'description': u'PM sublinha que é o nível mais elevado de há vários anos' } + }, { + 'url': 'http://www.maisfutebol.iol.pt/videos/560b04f80cf25f02cc1d843f/fc-porto/lopetegui-nao-quer-faltar-ao-respeito-ao-maccabi', + 'md5': '738a970259469fbb54b2d391c4c69dab', + 'info_dict': { + 'id': '560b04f80cf25f02cc1d843f', + 'ext': 'mp4', + 'title': u'Lopetegui não quer «faltar ao respeito ao Maccabi»', + 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/560b06c50cf2c14000fb838d/600', + 'description': u'Treinador do FC Porto e a possibilidade de disparar na tabela nos dois jogos com os israelitas.' + } + }, { + 'url': 'http://www.maisfutebol.iol.pt/videos/5611a7e30cf2d8d8759054eb/liga/perdi-uma-semana-com-ewerton', + 'md5': '9535c58831ecd4bbb95e600d34eaeef8', + 'info_dict': { + 'id': '5611a7e30cf2d8d8759054eb', + 'ext': 'mp4', + 'title': u'«Perdi uma semana com Ewerton»', + 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/5611aa840cf20a9cbc0da934/600', + 'description': u'Treinador explica situação do central.' + } }] def _real_extract(self, url): @@ -45,19 +66,28 @@ class IOLIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) - title = re.sub(r' \| TVI Player$', '', title, re.IGNORECASE) - description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) - m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') + + iol_js = self._html_search_regex(r'\.iolplayer\(\s*(\{.+?\})\s*\)', webpage, 'iolplayer', flags=re.DOTALL) + + # in a perfect world this would work but in practice it breaks too many times. RegExps are more "robust" + try: + iol_json = self._parse_json(iol_js, video_id, transform_source=js_to_json) + m3u8_url = iol_json['videoUrl'] + title = iol_json.get('title', title) # this title information has less cruft (defaults to _og_search_title) + except ExtractorError: + # need to parse using regexps + m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', iol_js, 'm3u8 playlist (json fallback)') + title_js = self._html_search_regex(r'''title:\s*'(.+?)'\s*,''', iol_js, 'title (json fallback)', fatal=False, default=None) + if title_js is not None: + title = title_js + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) if multimedia_id is None: - match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) - self.report_extraction('multimedia_id (fallback)') - if match: - multimedia_id = match.group(1) + multimedia_id = self._search_regex(r'smil:([0-9a-f]{24})-L', m3u8_url, 'multimedia_id (fallback)', flags=re.IGNORECASE, default=None) if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' From 19f838cf64cdc0e6261928986948df9b73972f9c Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:22:00 +0100 Subject: [PATCH 12/38] need to clean title in live version --- youtube_dl/extractor/iol.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 8cd3993aa..2974ca07f 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -40,13 +40,13 @@ class IOLIE(InfoExtractor): 'description': u'PM sublinha que é o nível mais elevado de há vários anos' } }, { - 'url': 'http://www.maisfutebol.iol.pt/videos/560b04f80cf25f02cc1d843f/fc-porto/lopetegui-nao-quer-faltar-ao-respeito-ao-maccabi', + 'url': 'http://www.maisfutebol.iol.pt/videos/560b04f80cf25f02cc1d843f/fc-porto/lopetegui-nao-quer-faltar-ao-respeito-ao-maccabi', 'md5': '738a970259469fbb54b2d391c4c69dab', 'info_dict': { 'id': '560b04f80cf25f02cc1d843f', 'ext': 'mp4', 'title': u'Lopetegui não quer «faltar ao respeito ao Maccabi»', - 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/560b06c50cf2c14000fb838d/600', + 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/560b06c50cf2c14000fb838d/600', 'description': u'Treinador do FC Porto e a possibilidade de disparar na tabela nos dois jogos com os israelitas.' } }, { @@ -75,7 +75,7 @@ class IOLIE(InfoExtractor): try: iol_json = self._parse_json(iol_js, video_id, transform_source=js_to_json) m3u8_url = iol_json['videoUrl'] - title = iol_json.get('title', title) # this title information has less cruft (defaults to _og_search_title) + title = iol_json.get('title', title) # this title information has less cruft (defaults to _og_search_title) except ExtractorError: # need to parse using regexps m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', iol_js, 'm3u8 playlist (json fallback)') @@ -126,7 +126,9 @@ class IOLStreamIE(IOLIE): def _real_extract(self, url): ret = IOLIE._real_extract(self, url) + # can't find uncluttered title information for live + title = re.sub(r'\s*\|\s*TVI Player\s*$', '', ret['title'], re.IGNORECASE) ret['is_live'] = True - ret['title'] = self._live_title(ret['title']) + ret['title'] = self._live_title(title) return ret From ec6d7b62e56cb87f4db51b246f136c524546d129 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:34:55 +0100 Subject: [PATCH 13/38] maybe there are https sites --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 89e5b0e7f..0d70f6695 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -8,7 +8,7 @@ import re class IOLIE(InfoExtractor): - _VALID_URL = r'http://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' + _VALID_URL = r'https?://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', 'md5': '983ca0edae370af67c458c6e5a71aae5', From 10bd91e14f5d733acc23677d97fdd83733ef1ea7 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:37:15 +0100 Subject: [PATCH 14/38] multimedia_id should not be fatal --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 0d70f6695..7578afe36 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -87,7 +87,7 @@ class IOLIE(InfoExtractor): multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) if multimedia_id is None: - multimedia_id = self._search_regex(r'smil:([0-9a-f]{24})-L', m3u8_url, 'multimedia_id (fallback)', flags=re.IGNORECASE, default=None) + multimedia_id = self._search_regex(r'smil:([0-9a-f]{24})-L', m3u8_url, 'multimedia_id (fallback)', flags=re.IGNORECASE, default=None, fatal=False) if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' From e6b85761aa82125a27157b2b52a1efb38412a886 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:45:11 +0100 Subject: [PATCH 15/38] sensible default --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 7578afe36..4763aba1a 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -95,7 +95,7 @@ class IOLIE(InfoExtractor): formats.extend(formats_m3u8_default) # try rtmp format - if self._html_search_regex(r' Date: Sat, 10 Oct 2015 22:57:07 +0100 Subject: [PATCH 16/38] no download for stream test --- youtube_dl/extractor/iol.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 4763aba1a..2bc7dbf7a 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -144,7 +144,10 @@ class IOLStreamIE(IOLIE): 'title': 're:^Direto TVI', 'description': u'A TVI ao pé de si. Sempre.', 'is_live': True, - } + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): From 243e2502f53b3245e03cdbb48671a7e06357108b Mon Sep 17 00:00:00 2001 From: PC Date: Sun, 11 Oct 2015 18:57:56 +0100 Subject: [PATCH 17/38] added iol.pt to supportedsites --- docs/supportedsites.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5beb2ecd4..569cc04d9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -226,6 +226,7 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** + - **IOL**: iol.pt sites (tvi archive/stream and maisfutebol) - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** From 317c45a04db708271d32b42b34c71e9be12ab7a2 Mon Sep 17 00:00:00 2001 From: PC Date: Thu, 15 Oct 2015 19:00:22 +0100 Subject: [PATCH 18/38] extractor descriptions --- docs/supportedsites.md | 1 - youtube_dl/extractor/iol.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 569cc04d9..5beb2ecd4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -226,7 +226,6 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** - - **IOL**: iol.pt sites (tvi archive/stream and maisfutebol) - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 2bc7dbf7a..a3adc3924 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -8,6 +8,8 @@ import re class IOLIE(InfoExtractor): + IE_NAME = 'IOL' + IE_DESC = 'iol.pt sites (tvi archive and maisfutebol)' _VALID_URL = r'https?://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', @@ -135,6 +137,8 @@ class IOLIE(InfoExtractor): class IOLStreamIE(IOLIE): + IE_NAME = 'IOL:stream' + IE_DESC = 'iol.pt stream sites' _VALID_URL = r'http://tviplayer\.iol\.pt/direto/(?P\S+)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/direto/TVI', From ef53dab7887543302586aacabdedfbc92c87b683 Mon Sep 17 00:00:00 2001 From: PC Date: Tue, 6 Oct 2015 22:14:03 +0100 Subject: [PATCH 19/38] TVI/IOL extractor supports live streams --- youtube_dl/extractor/__init__.py | 4 ++ youtube_dl/extractor/iol.py | 82 ++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 youtube_dl/extractor/iol.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 18d8dbcd6..1ab96a8cf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -7,6 +7,10 @@ try: except ImportError: _LAZY_LOADER = False from .extractors import * +from .iol import ( + IOLIE, + IOLStreamIE +) _ALL_CLASSES = [ klass diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py new file mode 100644 index 000000000..5d6f458ac --- /dev/null +++ b/youtube_dl/extractor/iol.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class IOLIE(InfoExtractor): + _VALID_URL = r'http://(tviplayer|www\.tvi24)\.iol\.pt/.*/(?P[0-9a-f]{24})[/0-9]*$' + _TESTS = [{ + 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', + 'md5': 'a9b3e3630201401fc3b8099d9d689191', + 'info_dict': { + 'id': '55f878f90cf203f8b03cea6d', + 'ext': 'mp4', + 'title': u'Euromilhões - 15 de setembro de 2015', + # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f87a280cf2e6961770f01f', + 'description': u'Com Mónica Jardim' + } + }, { + 'url': 'http://tviplayer.iol.pt/programa/isso-e-tudo-muito-bonito-mas/55f30f2e0cf2a6b037fc1f2f/video/55f730c40cf23fa665481b18', + 'md5': 'fb7231bf3a12eee731c56eaa6ecb5474', + 'info_dict': { + 'id': '55f730c40cf23fa665481b18', + 'ext': 'mp4', + 'title': u'Isso é tudo muito bonito, mas: Concatena, filho, concatena', + # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f737330cf23fa665481b2d', + 'description': u'Quando os adversários unem perspectivas.' + } + }, { + 'url': 'http://www.tvi24.iol.pt/videos/passos-criacao-de-emprego-e-facto-muito-importante/55f816640cf2e6961770ef7a/2', + 'md5': '49d4da9901f00e72a127ff4b635f69ee', + 'info_dict': { + 'id': '55f816640cf2e6961770ef7a', + 'ext': 'mp4', + 'title': u'Passos: criação de emprego é facto muito importante', + # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', + 'description': u'PM sublinha que é o nível mais elevado de há vários anos' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + title = re.sub(r' \| TVI Player$', '', title, re.IGNORECASE) + + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats + } + + +class IOLStreamIE(IOLIE): + _VALID_URL = r'http://tviplayer\.iol\.pt/direto/(?P\S+)' + _TESTS = [{ + 'url': 'http://tviplayer.iol.pt/direto/TVI', + 'info_dict': { + 'id': 'TVI', + 'ext': 'mp4', + 'title': 're:^Direto TVI', + 'description': u'A TVI ao pé de si. Sempre.', + 'is_live': True, + } + }] + + def _real_extract(self, url): + ret = IOLIE._real_extract(self, url) + ret['is_live'] = True + ret['title'] = self._live_title(ret['title']) + + return ret From 5e05a7e8575f5df931dce2e821c8f554aa39c598 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 05:10:54 +0100 Subject: [PATCH 20/38] Supports http_mp4 and adds another m3u8 source --- youtube_dl/extractor/iol.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 5d6f458ac..634152a3e 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -7,7 +7,7 @@ import re class IOLIE(InfoExtractor): - _VALID_URL = r'http://(tviplayer|www\.tvi24)\.iol\.pt/.*/(?P[0-9a-f]{24})[/0-9]*$' + _VALID_URL = r'http://(tviplayer|(www\.tvi24))\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', 'md5': 'a9b3e3630201401fc3b8099d9d689191', @@ -52,6 +52,22 @@ class IOLIE(InfoExtractor): m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) + if match: + multimedia_id = match.group(1) + m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' + formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') + formats.extend(formats_m3u8_default) + formats.append({ + 'url': 'http://www.iol.pt/videos-file/' + multimedia_id + '-L-500k.mp4', + 'format_id': 'http_500', + 'tbr': 500, + 'protocol': 'http', + 'protocol': 'http', + 'preference': -1, + 'no_resume': False + }) + return { 'id': video_id, 'title': title, From 70885bb4600317ec54195037b9302060c7bbe29c Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 05:19:42 +0100 Subject: [PATCH 21/38] fixed test md5s http now permits tests on the first 100Kb --- youtube_dl/extractor/iol.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 634152a3e..3b268a859 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -10,7 +10,7 @@ class IOLIE(InfoExtractor): _VALID_URL = r'http://(tviplayer|(www\.tvi24))\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', - 'md5': 'a9b3e3630201401fc3b8099d9d689191', + 'md5': '983ca0edae370af67c458c6e5a71aae5', 'info_dict': { 'id': '55f878f90cf203f8b03cea6d', 'ext': 'mp4', @@ -20,7 +20,7 @@ class IOLIE(InfoExtractor): } }, { 'url': 'http://tviplayer.iol.pt/programa/isso-e-tudo-muito-bonito-mas/55f30f2e0cf2a6b037fc1f2f/video/55f730c40cf23fa665481b18', - 'md5': 'fb7231bf3a12eee731c56eaa6ecb5474', + 'md5': 'ef5171a5abf69197726e5d7c7633c27a', 'info_dict': { 'id': '55f730c40cf23fa665481b18', 'ext': 'mp4', @@ -30,7 +30,7 @@ class IOLIE(InfoExtractor): } }, { 'url': 'http://www.tvi24.iol.pt/videos/passos-criacao-de-emprego-e-facto-muito-importante/55f816640cf2e6961770ef7a/2', - 'md5': '49d4da9901f00e72a127ff4b635f69ee', + 'md5': 'd836f1225c289c7987beddebe11619b9', 'info_dict': { 'id': '55f816640cf2e6961770ef7a', 'ext': 'mp4', From 5eb4497afadfbd28d3b1a5939d8a285aa134f674 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 14:50:14 +0100 Subject: [PATCH 22/38] removed duplicate line --- youtube_dl/extractor/iol.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 3b268a859..1605ca7c3 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -63,7 +63,6 @@ class IOLIE(InfoExtractor): 'format_id': 'http_500', 'tbr': 500, 'protocol': 'http', - 'protocol': 'http', 'preference': -1, 'no_resume': False }) From 14c835962ccbe7c7269792e11517d5fada73cfbf Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 15:09:19 +0100 Subject: [PATCH 23/38] multimedia_id extracted from meta still uses old method as fallback --- youtube_dl/extractor/iol.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 1605ca7c3..7d22f6610 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -52,9 +52,14 @@ class IOLIE(InfoExtractor): m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') - match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) - if match: - multimedia_id = match.group(1) + multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) + if multimedia_id is None: + match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) + self.report_extraction('multimedia_id (fallback)') + if match: + multimedia_id = match.group(1) + + if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) From 13705095434a3ec44fc347c34c768ed7c48cd91d Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 15:09:19 +0100 Subject: [PATCH 24/38] multimedia_id extracted from meta still uses old method as fallback --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 7d22f6610..49ebf2368 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -65,7 +65,7 @@ class IOLIE(InfoExtractor): formats.extend(formats_m3u8_default) formats.append({ 'url': 'http://www.iol.pt/videos-file/' + multimedia_id + '-L-500k.mp4', - 'format_id': 'http_500', + 'format_id': 'http-500', 'tbr': 500, 'protocol': 'http', 'preference': -1, From 7eabb134fc9e77e096cd36b2eeac9d6f410947b2 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 16:19:57 +0100 Subject: [PATCH 25/38] rtmp not working --- youtube_dl/extractor/iol.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 49ebf2368..1f7f1e032 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -63,6 +63,17 @@ class IOLIE(InfoExtractor): m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) + + server = 'video2.iol.pt' + formats.append({ + 'url': 'rtmp://'+server+'/vod', + 'play_path': 'mp4:' + multimedia_id + '-L-500k', + 'format_id': 'rtmp-500', + 'tbr': 500, + 'protocol': 'rtmp', + 'ext': 'mp4' + }) + formats.append({ 'url': 'http://www.iol.pt/videos-file/' + multimedia_id + '-L-500k.mp4', 'format_id': 'http-500', From c82b7ac8b0edf034058006291bd282c28195a97e Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 19:16:41 +0100 Subject: [PATCH 26/38] IOL RTMP support --- youtube_dl/extractor/iol.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 1f7f1e032..f0b000870 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -64,15 +64,27 @@ class IOLIE(InfoExtractor): formats_m3u8_default = self._extract_m3u8_formats(m3u8_url_default, video_id, ext='mp4') formats.extend(formats_m3u8_default) - server = 'video2.iol.pt' - formats.append({ - 'url': 'rtmp://'+server+'/vod', - 'play_path': 'mp4:' + multimedia_id + '-L-500k', - 'format_id': 'rtmp-500', - 'tbr': 500, - 'protocol': 'rtmp', - 'ext': 'mp4' - }) + # try rtmp format + if self._html_search_regex(r' Date: Sat, 10 Oct 2015 19:51:58 +0100 Subject: [PATCH 27/38] consider all iol.pt pages as download matches --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index f0b000870..0da8097bb 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -7,7 +7,7 @@ import re class IOLIE(InfoExtractor): - _VALID_URL = r'http://(tviplayer|(www\.tvi24))\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' + _VALID_URL = r'http://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', 'md5': '983ca0edae370af67c458c6e5a71aae5', From 3b4204aee32ec1c8b64547494bed409c5d09e031 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 21:23:56 +0100 Subject: [PATCH 28/38] fixed testing thumbnail typo --- youtube_dl/extractor/iol.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 0da8097bb..f0206dbac 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -15,7 +15,7 @@ class IOLIE(InfoExtractor): 'id': '55f878f90cf203f8b03cea6d', 'ext': 'mp4', 'title': u'Euromilhões - 15 de setembro de 2015', - # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f87a280cf2e6961770f01f', + 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f87a280cf2e6961770f01f', 'description': u'Com Mónica Jardim' } }, { @@ -25,7 +25,7 @@ class IOLIE(InfoExtractor): 'id': '55f730c40cf23fa665481b18', 'ext': 'mp4', 'title': u'Isso é tudo muito bonito, mas: Concatena, filho, concatena', - # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f737330cf23fa665481b2d', + 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f737330cf23fa665481b2d', 'description': u'Quando os adversários unem perspectivas.' } }, { @@ -35,7 +35,7 @@ class IOLIE(InfoExtractor): 'id': '55f816640cf2e6961770ef7a', 'ext': 'mp4', 'title': u'Passos: criação de emprego é facto muito importante', - # 'tumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', + 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', 'description': u'PM sublinha que é o nível mais elevado de há vários anos' } }] From 6f2b17f2517cefe252b78ebd12d07702b1fd4f13 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 21:30:34 +0100 Subject: [PATCH 29/38] iol now tries to use javascript structures added tests for maisfutebol (complete with titles with no cruft) --- youtube_dl/extractor/iol.py | 44 +++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index f0206dbac..4e9a8c5f0 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import js_to_json, ExtractorError import re @@ -38,6 +39,26 @@ class IOLIE(InfoExtractor): 'thumbnail': 'http://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/55f8180e0cf21413dfb1d96d/', 'description': u'PM sublinha que é o nível mais elevado de há vários anos' } + }, { + 'url': 'http://www.maisfutebol.iol.pt/videos/560b04f80cf25f02cc1d843f/fc-porto/lopetegui-nao-quer-faltar-ao-respeito-ao-maccabi', + 'md5': '738a970259469fbb54b2d391c4c69dab', + 'info_dict': { + 'id': '560b04f80cf25f02cc1d843f', + 'ext': 'mp4', + 'title': u'Lopetegui não quer «faltar ao respeito ao Maccabi»', + 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/560b06c50cf2c14000fb838d/600', + 'description': u'Treinador do FC Porto e a possibilidade de disparar na tabela nos dois jogos com os israelitas.' + } + }, { + 'url': 'http://www.maisfutebol.iol.pt/videos/5611a7e30cf2d8d8759054eb/liga/perdi-uma-semana-com-ewerton', + 'md5': '9535c58831ecd4bbb95e600d34eaeef8', + 'info_dict': { + 'id': '5611a7e30cf2d8d8759054eb', + 'ext': 'mp4', + 'title': u'«Perdi uma semana com Ewerton»', + 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/5611aa840cf20a9cbc0da934/600', + 'description': u'Treinador explica situação do central.' + } }] def _real_extract(self, url): @@ -45,19 +66,28 @@ class IOLIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) - title = re.sub(r' \| TVI Player$', '', title, re.IGNORECASE) - description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) - m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', webpage, 'm3u8 playlist') + + iol_js = self._html_search_regex(r'\.iolplayer\(\s*(\{.+?\})\s*\)', webpage, 'iolplayer', flags=re.DOTALL) + + # in a perfect world this would work but in practice it breaks too many times. RegExps are more "robust" + try: + iol_json = self._parse_json(iol_js, video_id, transform_source=js_to_json) + m3u8_url = iol_json['videoUrl'] + title = iol_json.get('title', title) # this title information has less cruft (defaults to _og_search_title) + except ExtractorError: + # need to parse using regexps + m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', iol_js, 'm3u8 playlist (json fallback)') + title_js = self._html_search_regex(r'''title:\s*'(.+?)'\s*,''', iol_js, 'title (json fallback)', fatal=False, default=None) + if title_js is not None: + title = title_js + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) if multimedia_id is None: - match = re.search(r'smil:([0-9a-f]{24})-L', m3u8_url, re.IGNORECASE) - self.report_extraction('multimedia_id (fallback)') - if match: - multimedia_id = match.group(1) + multimedia_id = self._search_regex(r'smil:([0-9a-f]{24})-L', m3u8_url, 'multimedia_id (fallback)', flags=re.IGNORECASE, default=None) if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' From 81f60a06d850c4f27e22ba8a43ee9d15d9e58d85 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:22:00 +0100 Subject: [PATCH 30/38] need to clean title in live version --- youtube_dl/extractor/iol.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 4e9a8c5f0..89e5b0e7f 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -40,13 +40,13 @@ class IOLIE(InfoExtractor): 'description': u'PM sublinha que é o nível mais elevado de há vários anos' } }, { - 'url': 'http://www.maisfutebol.iol.pt/videos/560b04f80cf25f02cc1d843f/fc-porto/lopetegui-nao-quer-faltar-ao-respeito-ao-maccabi', + 'url': 'http://www.maisfutebol.iol.pt/videos/560b04f80cf25f02cc1d843f/fc-porto/lopetegui-nao-quer-faltar-ao-respeito-ao-maccabi', 'md5': '738a970259469fbb54b2d391c4c69dab', 'info_dict': { 'id': '560b04f80cf25f02cc1d843f', 'ext': 'mp4', 'title': u'Lopetegui não quer «faltar ao respeito ao Maccabi»', - 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/560b06c50cf2c14000fb838d/600', + 'thumbnail': 'http://www.maisfutebol.iol.pt/multimedia/oratvi/multimedia/imagem/id/560b06c50cf2c14000fb838d/600', 'description': u'Treinador do FC Porto e a possibilidade de disparar na tabela nos dois jogos com os israelitas.' } }, { @@ -75,7 +75,7 @@ class IOLIE(InfoExtractor): try: iol_json = self._parse_json(iol_js, video_id, transform_source=js_to_json) m3u8_url = iol_json['videoUrl'] - title = iol_json.get('title', title) # this title information has less cruft (defaults to _og_search_title) + title = iol_json.get('title', title) # this title information has less cruft (defaults to _og_search_title) except ExtractorError: # need to parse using regexps m3u8_url = self._html_search_regex(r'''videoUrl:\s*'([^']+\.m3u8[^']*)'\s*,''', iol_js, 'm3u8 playlist (json fallback)') @@ -149,7 +149,9 @@ class IOLStreamIE(IOLIE): def _real_extract(self, url): ret = IOLIE._real_extract(self, url) + # can't find uncluttered title information for live + title = re.sub(r'\s*\|\s*TVI Player\s*$', '', ret['title'], re.IGNORECASE) ret['is_live'] = True - ret['title'] = self._live_title(ret['title']) + ret['title'] = self._live_title(title) return ret From e317041bda93c2e4ed609084c3b23db27485c0f7 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:34:55 +0100 Subject: [PATCH 31/38] maybe there are https sites --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 89e5b0e7f..0d70f6695 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -8,7 +8,7 @@ import re class IOLIE(InfoExtractor): - _VALID_URL = r'http://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' + _VALID_URL = r'https?://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', 'md5': '983ca0edae370af67c458c6e5a71aae5', From c08eeccfe1d235c0ea53b1da609f184d534ba109 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:37:15 +0100 Subject: [PATCH 32/38] multimedia_id should not be fatal --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 0d70f6695..7578afe36 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -87,7 +87,7 @@ class IOLIE(InfoExtractor): multimedia_id = self._html_search_meta('iol:id', webpage, 'multimedia_id', fatal=False, default=None) if multimedia_id is None: - multimedia_id = self._search_regex(r'smil:([0-9a-f]{24})-L', m3u8_url, 'multimedia_id (fallback)', flags=re.IGNORECASE, default=None) + multimedia_id = self._search_regex(r'smil:([0-9a-f]{24})-L', m3u8_url, 'multimedia_id (fallback)', flags=re.IGNORECASE, default=None, fatal=False) if multimedia_id is not None: m3u8_url_default = 'http://video-on-demand.iol.pt/vod_http/mp4:' + multimedia_id + '-L-500k.mp4/playlist.m3u8' From 9653ed1bc6be596457e9d9e9323cdb2f512a4e0b Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 10 Oct 2015 22:45:11 +0100 Subject: [PATCH 33/38] sensible default --- youtube_dl/extractor/iol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 7578afe36..4763aba1a 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -95,7 +95,7 @@ class IOLIE(InfoExtractor): formats.extend(formats_m3u8_default) # try rtmp format - if self._html_search_regex(r' Date: Sat, 10 Oct 2015 22:57:07 +0100 Subject: [PATCH 34/38] no download for stream test --- youtube_dl/extractor/iol.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 4763aba1a..2bc7dbf7a 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -144,7 +144,10 @@ class IOLStreamIE(IOLIE): 'title': 're:^Direto TVI', 'description': u'A TVI ao pé de si. Sempre.', 'is_live': True, - } + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): From 582c617a7ce0c5573366447d0aba133d0943c222 Mon Sep 17 00:00:00 2001 From: PC Date: Sun, 11 Oct 2015 18:57:56 +0100 Subject: [PATCH 35/38] added iol.pt to supportedsites --- docs/supportedsites.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 963b7bbb7..5d4475b07 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -324,6 +324,7 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** + - **IOL**: iol.pt sites (tvi archive/stream and maisfutebol) - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** From 3553e1ec5c70f1f7388affd0cd46cd0970b1c523 Mon Sep 17 00:00:00 2001 From: PC Date: Thu, 15 Oct 2015 19:00:22 +0100 Subject: [PATCH 36/38] extractor descriptions --- docs/supportedsites.md | 1 - youtube_dl/extractor/iol.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5d4475b07..963b7bbb7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -324,7 +324,6 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** - - **IOL**: iol.pt sites (tvi archive/stream and maisfutebol) - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index 2bc7dbf7a..a3adc3924 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -8,6 +8,8 @@ import re class IOLIE(InfoExtractor): + IE_NAME = 'IOL' + IE_DESC = 'iol.pt sites (tvi archive and maisfutebol)' _VALID_URL = r'https?://[^/]*\.iol\.pt/.*/(?P[0-9a-f]{24})($|\/)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/programa/euromilhoes/53c6b3153004dc006243b07b/video/55f878f90cf203f8b03cea6d', @@ -135,6 +137,8 @@ class IOLIE(InfoExtractor): class IOLStreamIE(IOLIE): + IE_NAME = 'IOL:stream' + IE_DESC = 'iol.pt stream sites' _VALID_URL = r'http://tviplayer\.iol\.pt/direto/(?P\S+)' _TESTS = [{ 'url': 'http://tviplayer.iol.pt/direto/TVI', From accf39d6557af5aabb7dab3aad18416d63bf2547 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 29 Oct 2016 22:24:26 +0100 Subject: [PATCH 37/38] module import fix --- youtube_dl/extractor/__init__.py | 4 ---- youtube_dl/extractor/extractors.py | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1ab96a8cf..18d8dbcd6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -7,10 +7,6 @@ try: except ImportError: _LAZY_LOADER = False from .extractors import * -from .iol import ( - IOLIE, - IOLStreamIE -) _ALL_CLASSES = [ klass diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dea97920b..4dacb197b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -398,6 +398,10 @@ from .indavideo import ( from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE +from .iol import ( + IOLIE, + IOLStreamIE +) from .iprima import IPrimaIE from .iqiyi import IqiyiIE from .ir90tv import Ir90TvIE From acdc67d5033a02221200a49a0e50407ea111f8e7 Mon Sep 17 00:00:00 2001 From: PC Date: Sat, 29 Oct 2016 22:25:00 +0100 Subject: [PATCH 38/38] sort formats --- youtube_dl/extractor/iol.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/iol.py b/youtube_dl/extractor/iol.py index a3adc3924..76752e6d1 100644 --- a/youtube_dl/extractor/iol.py +++ b/youtube_dl/extractor/iol.py @@ -127,6 +127,8 @@ class IOLIE(InfoExtractor): 'no_resume': False }) + self._sort_formats(formats) + return { 'id': video_id, 'title': title,