diff --git a/youtube_dl/extractor/rtllu.py b/youtube_dl/extractor/rtllu.py
index 454005388..24f95cd2a 100644
--- a/youtube_dl/extractor/rtllu.py
+++ b/youtube_dl/extractor/rtllu.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -22,9 +23,7 @@ class RtlluIE(InfoExtractor):
}
def _real_extract(self, url):
- match = self._VALID_URL_RE.match(url)
- id = match.group('id')
-
+ id = self._match_id(url)
webpage = self._download_webpage(url, id)
javascript_regex = r''
@@ -32,60 +31,91 @@ class RtlluIE(InfoExtractor):
try:
javascript_sources_regex = r'object.*\.sources = \'(?P.*?)\';'
- sources = json.loads(re.search(javascript_sources_regex, javascript).group('value'))
+ sources = self._search_regex(javascript_sources_regex, javascript, 'sources')
+ sources = json.loads(sources)
- javascript_videoid_regex = r'object.*\.videoid = \'(?P.*?)\';'
- javascript_videoid = re.search(javascript_videoid_regex, javascript).group('value')
+ videoid_regex = r'object.*\.videoid = \'(?P.*?)\';'
+ videoid = self._search_regex(videoid_regex, javascript, 'videoid', fatal=False, default=id)
- javascript_publicdate_regex = r'object.*\.publicdate = \'(?P.*?)\';'
- javascript_publicdate = re.search(javascript_publicdate_regex, javascript).group('value')
+ publicdate_regex = r'object.*\.publicdate = \'(?P.*?)\';'
+ publicdate = self._search_regex(publicdate_regex, javascript, 'publicdate', fatal=False)
- javascript_thumbnail_regex = r'object.*\.thumbnail = \'(?P.*?)\';'
- javascript_thumbnail = re.search(javascript_thumbnail_regex, javascript).group('value')
+ thumbnail_regex = r'object.*\.thumbnail = \'(?P.*?)\';'
+ thumbnail = self._search_regex(thumbnail_regex, javascript, 'thumbnail', fatal=False)
- formats = [
- {
- 'url': sources['rtmp']['src'],
- 'format': 'RTMP Stream',
- 'format_id': 'rtmp',
- 'protocol': 'rtmp',
- },
+ formats = []
- {
- 'url': sources['httplq']['src'],
- 'format': 'Low Quality',
- 'format_id': 'lq',
- 'protocol': 'http',
- },
- {
- 'url': sources['http']['src'],
- 'format': 'Standard Quality',
- 'format_id': 'sd',
- 'protocol': 'http',
- },
- {
- 'url': sources['httphq']['src'],
- 'format': 'High Quality',
- 'format_id': 'hq',
- 'protocol': 'http',
- },
- ]
+ rtmp_source = sources.get('rtmp')
+ if rtmp_source is not None:
+ rtmp_url = rtmp_source.get('src')
+
+ if rtmp_url is not None:
+ formats.append(
+ {
+ 'url': rtmp_url,
+ 'format': 'RTMP Stream',
+ 'format_id': 'rtmp',
+ 'protocol': 'rtmp'
+ }
+ )
+
+ httplq_source = sources.get('httplq')
+ if httplq_source is not None:
+ httplq_url = httplq_source.get('src')
+
+ if httplq_url is not None:
+ formats.append(
+ {
+ 'url': httplq_url,
+ 'format': 'Low Quality',
+ 'format_id': 'lq',
+ 'protocol': 'http',
+ }
+ )
+
+ http_source = sources.get('http')
+ if http_source is not None:
+ http_url = http_source.get('src')
+
+ if http_url is not None:
+ formats.append(
+ {
+ 'url': http_url,
+ 'format': 'Standard Quality',
+ 'format_id': 'sd',
+ 'protocol': 'http',
+ }
+ )
+
+ httphq_source = sources.get('httphq')
+ if httphq_source is not None:
+ httphq_url = httphq_source.get('src')
+
+ if httphq_url is not None:
+ formats.append(
+ {
+ 'url': httphq_url,
+ 'format': 'High Quality',
+ 'format_id': 'hq',
+ 'protocol': 'http',
+ }
+ )
return {
- 'id': javascript_videoid or id,
+ 'id': videoid,
'title': self.get_video_title(webpage, javascript),
'formats': formats,
- 'thumbnail': javascript_thumbnail,
- 'upload_date': javascript_publicdate,
+ 'thumbnail': thumbnail,
+ 'upload_date': publicdate,
}
except AttributeError:
- javascript_mp3_regex = r'play_mp3\("object[0-9]*", "(?P.*?)",'
- javascript_mp3 = re.search(javascript_mp3_regex, javascript).group('value')
+ mp3_regex = r'play_mp3\("object[0-9]*", "(?P.*?)",'
+ mp3_url = self._search_regex(mp3_regex, javascript, 'mp3_url')
return {
'id': id,
'title': self.get_audio_title(webpage),
- 'url': javascript_mp3,
+ 'url': mp3_url,
}
def get_video_title(self, webpage, javascript):
@@ -97,7 +127,7 @@ class RtlluIE(InfoExtractor):
title = title[-1]
javascript_title_regex = r'object.*\.title = \'(?P.*?)\';'
- javascript_title = re.search(javascript_title_regex, javascript).group('value')
+ javascript_title = self._search_regex(javascript_title_regex, javascript, 'javascript_title', fatal=False)
return javascript_title or title or self._og_search_title(webpage)
def get_audio_title(self, webpage):