From a9bbd26f1d2bb45205f9fbd2626569522049e40e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Jan 2016 10:49:27 +0600 Subject: [PATCH] [bigflix] Improve formats extraction --- youtube_dl/extractor/bigflix.py | 41 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index b7e498436..24720de86 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -20,7 +20,7 @@ class BigflixIE(InfoExtractor): 'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', } }, { - # multiple formats + # 2 formats 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', 'info_dict': { 'id': '16070', @@ -32,6 +32,10 @@ class BigflixIE(InfoExtractor): 'params': { 'skip_download': True, } + }, { + # multiple formats + 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', + 'only_matching': True, }] def _real_extract(self, url): @@ -45,20 +49,31 @@ class BigflixIE(InfoExtractor): def decode_url(quoted_b64_url): return base64.b64decode(compat_urllib_parse_unquote( - quoted_b64_url)).encode('ascii').decode('utf-8') + quoted_b64_url).encode('ascii')).decode('utf-8') - formats = [{ - 'url': decode_url(encoded_url), - 'format_id': '%sp' % height, - 'height': int(height), - } for height, encoded_url in re.findall( - r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage)] + formats = [] + for height, encoded_url in re.findall( + r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): + video_url = decode_url(encoded_url) + f = { + 'url': video_url, + 'format_id': '%sp' % height, + 'height': int(height), + } + if video_url.startswith('rtmp'): + f['ext'] = 'flv' + formats.append(f) - if not formats: - formats.append({ - 'url': decode_url(self._search_regex( - r'file=([^&]+)', webpage, 'video url')), - }) + file_url = self._search_regex( + r'file=([^&]+)', webpage, 'video url', default=None) + if file_url: + video_url = decode_url(file_url) + if all(f['url'] != video_url for f in formats): + formats.append({ + 'url': decode_url(file_url), + }) + + self._sort_formats(formats) description = self._html_search_meta('description', webpage)