diff --git a/youtube_dl/extractor/rtllu.py b/youtube_dl/extractor/rtllu.py index 454005388..24f95cd2a 100644 --- a/youtube_dl/extractor/rtllu.py +++ b/youtube_dl/extractor/rtllu.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -22,9 +23,7 @@ class RtlluIE(InfoExtractor): } def _real_extract(self, url): - match = self._VALID_URL_RE.match(url) - id = match.group('id') - + id = self._match_id(url) webpage = self._download_webpage(url, id) javascript_regex = r'' @@ -32,60 +31,91 @@ class RtlluIE(InfoExtractor): try: javascript_sources_regex = r'object.*\.sources = \'(?P.*?)\';' - sources = json.loads(re.search(javascript_sources_regex, javascript).group('value')) + sources = self._search_regex(javascript_sources_regex, javascript, 'sources') + sources = json.loads(sources) - javascript_videoid_regex = r'object.*\.videoid = \'(?P.*?)\';' - javascript_videoid = re.search(javascript_videoid_regex, javascript).group('value') + videoid_regex = r'object.*\.videoid = \'(?P.*?)\';' + videoid = self._search_regex(videoid_regex, javascript, 'videoid', fatal=False, default=id) - javascript_publicdate_regex = r'object.*\.publicdate = \'(?P.*?)\';' - javascript_publicdate = re.search(javascript_publicdate_regex, javascript).group('value') + publicdate_regex = r'object.*\.publicdate = \'(?P.*?)\';' + publicdate = self._search_regex(publicdate_regex, javascript, 'publicdate', fatal=False) - javascript_thumbnail_regex = r'object.*\.thumbnail = \'(?P.*?)\';' - javascript_thumbnail = re.search(javascript_thumbnail_regex, javascript).group('value') + thumbnail_regex = r'object.*\.thumbnail = \'(?P.*?)\';' + thumbnail = self._search_regex(thumbnail_regex, javascript, 'thumbnail', fatal=False) - formats = [ - { - 'url': sources['rtmp']['src'], - 'format': 'RTMP Stream', - 'format_id': 'rtmp', - 'protocol': 'rtmp', - }, + formats = [] - { - 'url': sources['httplq']['src'], - 'format': 'Low Quality', - 'format_id': 'lq', - 'protocol': 'http', - }, - { - 'url': sources['http']['src'], - 'format': 'Standard Quality', - 'format_id': 'sd', - 'protocol': 'http', - }, - { - 'url': sources['httphq']['src'], - 'format': 'High Quality', - 'format_id': 'hq', - 'protocol': 'http', - }, - ] + rtmp_source = sources.get('rtmp') + if rtmp_source is not None: + rtmp_url = rtmp_source.get('src') + + if rtmp_url is not None: + formats.append( + { + 'url': rtmp_url, + 'format': 'RTMP Stream', + 'format_id': 'rtmp', + 'protocol': 'rtmp' + } + ) + + httplq_source = sources.get('httplq') + if httplq_source is not None: + httplq_url = httplq_source.get('src') + + if httplq_url is not None: + formats.append( + { + 'url': httplq_url, + 'format': 'Low Quality', + 'format_id': 'lq', + 'protocol': 'http', + } + ) + + http_source = sources.get('http') + if http_source is not None: + http_url = http_source.get('src') + + if http_url is not None: + formats.append( + { + 'url': http_url, + 'format': 'Standard Quality', + 'format_id': 'sd', + 'protocol': 'http', + } + ) + + httphq_source = sources.get('httphq') + if httphq_source is not None: + httphq_url = httphq_source.get('src') + + if httphq_url is not None: + formats.append( + { + 'url': httphq_url, + 'format': 'High Quality', + 'format_id': 'hq', + 'protocol': 'http', + } + ) return { - 'id': javascript_videoid or id, + 'id': videoid, 'title': self.get_video_title(webpage, javascript), 'formats': formats, - 'thumbnail': javascript_thumbnail, - 'upload_date': javascript_publicdate, + 'thumbnail': thumbnail, + 'upload_date': publicdate, } except AttributeError: - javascript_mp3_regex = r'play_mp3\("object[0-9]*", "(?P.*?)",' - javascript_mp3 = re.search(javascript_mp3_regex, javascript).group('value') + mp3_regex = r'play_mp3\("object[0-9]*", "(?P.*?)",' + mp3_url = self._search_regex(mp3_regex, javascript, 'mp3_url') return { 'id': id, 'title': self.get_audio_title(webpage), - 'url': javascript_mp3, + 'url': mp3_url, } def get_video_title(self, webpage, javascript): @@ -97,7 +127,7 @@ class RtlluIE(InfoExtractor): title = title[-1] javascript_title_regex = r'object.*\.title = \'(?P.*?)\';' - javascript_title = re.search(javascript_title_regex, javascript).group('value') + javascript_title = self._search_regex(javascript_title_regex, javascript, 'javascript_title', fatal=False) return javascript_title or title or self._og_search_title(webpage) def get_audio_title(self, webpage):