From ead467a9c1cab3f26279ba9c57cf18598507ba79 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 28 May 2019 04:58:12 +0100 Subject: [PATCH 01/24] [rtp] fix extraction(closes #15099) --- youtube_dl/extractor/rtp.py | 83 ++++++++++++++----------------------- 1 file changed, 30 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py index 533ee27cb..02986f442 100644 --- a/youtube_dl/extractor/rtp.py +++ b/youtube_dl/extractor/rtp.py @@ -1,9 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, +) class RTPIE(InfoExtractor): @@ -18,10 +20,6 @@ class RTPIE(InfoExtractor): 'description': 'As paixões musicais de António Cartaxo e António Macedo', 'thumbnail': r're:^https?://.*\.jpg', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'only_matching': True, @@ -33,57 +31,36 @@ class RTPIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_meta( 'twitter:title', webpage, display_name='title', fatal=True) - description = self._html_search_meta('description', webpage) - thumbnail = self._og_search_thumbnail(webpage) - player_config = self._search_regex( - r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config') - config = self._parse_json(player_config, video_id) - - path, ext = config.get('file').rsplit('.', 1) - formats = [{ - 'format_id': 'rtmp', - 'ext': ext, - 'vcodec': config.get('type') == 'audio' and 'none' or None, - 'preference': -2, - 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), - 'app': config.get('application'), - 'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path), - 'page_url': url, - 'rtmp_live': config.get('live', False), - 'player_url': 'http://programas.rtp.pt/play/player.swf?v3', - 'rtmp_real_time': True, - }] - - # Construct regular HTTP download URLs - replacements = { - 'audio': { - 'format_id': 'mp3', - 'pattern': r'^nas2\.share/wavrss/', - 'repl': 'http://rsspod.rtp.pt/podcasts/', - 'vcodec': 'none', - }, - 'video': { - 'format_id': 'mp4_h264', - 'pattern': r'^nas2\.share/h264/', - 'repl': 'http://rsspod.rtp.pt/videocasts/', - 'vcodec': 'h264', - }, - } - r = replacements[config['type']] - if re.match(r['pattern'], config['file']) is not None: - formats.append({ - 'format_id': r['format_id'], - 'url': re.sub(r['pattern'], r['repl'], config['file']), - 'vcodec': r['vcodec'], - }) - - self._sort_formats(formats) + config = self._parse_json(self._search_regex( + r'(?s)RTPPlayer\(({.+?})\);', webpage, + 'player config'), video_id, js_to_json) + file_url = config['file'] + ext = determine_ext(file_url) + if ext == 'm3u8': + file_key = config.get('fileKey') + formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=file_key) + if file_key: + formats.append({ + 'url': 'https://cdn-ondemand.rtp.pt' + file_key, + 'preference': 1, + }) + self._sort_formats(formats) + else: + formats = [{ + 'url': file_url, + 'ext': ext, + }] + if config.get('mediaType') == 'audio': + for f in formats: + f['vcodec'] = 'none' return { 'id': video_id, 'title': title, 'formats': formats, - 'description': description, - 'thumbnail': thumbnail, + 'description': self._html_search_meta(['description', 'twitter:description'], webpage), + 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), } From 33b2218b2f67b4c06d45e28035968320e0a0bf05 Mon Sep 17 00:00:00 2001 From: bitraid Date: Tue, 28 May 2019 15:31:11 +0300 Subject: [PATCH 02/24] [LiveLeak] Check if the original videos exist (closes #21206) (#21208) --- youtube_dl/extractor/liveleak.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 5df14bb41..4ac437c8b 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -82,6 +82,10 @@ class LiveLeakIE(InfoExtractor): }, { 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227', 'only_matching': True, + }, { + # No original video + 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804', + 'only_matching': True, }] @staticmethod @@ -134,11 +138,13 @@ class LiveLeakIE(InfoExtractor): orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url']) if a_format['url'] != orig_url: format_id = a_format.get('format_id') - formats.append({ - 'format_id': 'original' + ('-' + format_id if format_id else ''), - 'url': orig_url, - 'preference': 1, - }) + format_id = 'original' + ('-' + format_id if format_id else '') + if self._is_valid_url(orig_url, video_id, format_id): + formats.append({ + 'format_id': format_id, + 'url': orig_url, + 'preference': 1, + }) self._sort_formats(formats) info_dict['formats'] = formats From 26a87972a9dcd4eac6112c06372745d6d0983f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 29 May 2019 04:42:19 +0700 Subject: [PATCH 03/24] [viki] Switch to HTTPS (closes #21001) --- youtube_dl/extractor/viki.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 546de95d8..b0dcdc0e6 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -21,7 +21,7 @@ from ..utils import ( class VikiBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/' _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com' - _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s' + _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s' _APP = '100005a' _APP_VERSION = '2.2.5.1428709186' @@ -377,7 +377,7 @@ class VikiChannelIE(VikiBaseIE): for video in page['response']: video_id = video['id'] entries.append(self.url_result( - 'http://www.viki.com/videos/%s' % video_id, 'Viki')) + 'https://www.viki.com/videos/%s' % video_id, 'Viki')) if not page['pagination']['next']: break From 0e2dd3fcbce3a01dc4f6ce3911f5f279c3827edf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Jun 2019 11:16:44 +0100 Subject: [PATCH 04/24] [vrv] extract adaptive_hls formats(closes #21243) --- youtube_dl/extractor/vrv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index b698bf66c..c814a8a4a 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -130,7 +130,7 @@ class VRVIE(VRVBaseIE): self._TOKEN_SECRET = token_credentials['oauth_token_secret'] def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): - if not url or stream_format not in ('hls', 'dash'): + if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'): return [] stream_id_list = [] if audio_lang: @@ -140,7 +140,7 @@ class VRVIE(VRVBaseIE): format_id = stream_format if stream_id_list: format_id += '-' + '-'.join(stream_id_list) - if stream_format == 'hls': + if 'hls' in stream_format: adaptive_formats = self._extract_m3u8_formats( url, video_id, 'mp4', m3u8_id=format_id, note='Downloading %s information' % format_id, From c5eb75b35a709baa134fb96e774a4b712cc65e93 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Jun 2019 15:12:30 +0100 Subject: [PATCH 05/24] [prosiebensat1] add support for new API(closes #21272) --- youtube_dl/extractor/prosiebensat1.py | 197 ++++++++++++++++---------- 1 file changed, 121 insertions(+), 76 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7d11c2b9b..e19a470a5 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -16,6 +16,11 @@ from ..utils import ( class ProSiebenSat1BaseIE(InfoExtractor): + _GEO_COUNTRIES = ['DE'] + _ACCESS_ID = None + _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' + _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' + def _extract_video_info(self, url, clip_id): client_location = url @@ -31,93 +36,128 @@ class ProSiebenSat1BaseIE(InfoExtractor): if video.get('is_protected') is True: raise ExtractorError('This video is DRM protected.', expected=True) - duration = float_or_none(video.get('duration')) - source_ids = [compat_str(source['id']) for source in video['sources']] - - client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() - - sources = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, - clip_id, 'Downloading sources JSON', query={ - 'access_token': self._TOKEN, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': self._CLIENT_NAME, - }) - server_id = sources['server_id'] - - def fix_bitrate(bitrate): - bitrate = int_or_none(bitrate) - if not bitrate: - return None - return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - formats = [] - for source_id in source_ids: - client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() - urls = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, - clip_id, 'Downloading urls JSON', fatal=False, query={ + if self._ACCESS_ID: + raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID + server_token = (self._download_json( + self._V4_BASE_URL + 'protocols', clip_id, + 'Downloading protocols JSON', + headers=self.geo_verification_headers(), query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct).encode()).hexdigest(), + 'video_id': clip_id, + }, fatal=False) or {}).get('server_token') + if server_token: + urls = (self._download_json( + self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), + 'protocols': self._SUPPORTED_PROTOCOLS, + 'server_token': server_token, + 'video_id': clip_id, + }, fatal=False) or {}).get('urls') or {} + for protocol, variant in urls.items(): + source_url = variant.get('clear', {}).get('url') + if not source_url: + continue + if protocol == 'dash': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id=protocol, fatal=False)) + elif protocol == 'hls': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id=protocol, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': protocol, + }) + if not formats: + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ 'access_token': self._TOKEN, 'client_id': client_id, 'client_location': client_location, 'client_name': self._CLIENT_NAME, - 'server_id': server_id, - 'source_ids': source_id, }) - if not urls: - continue - if urls.get('status_code') != 0: - raise ExtractorError('This video is unavailable', expected=True) - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() - for source in urls_sources: - source_url = source.get('url') - if not source_url: + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: continue - protocol = source.get('protocol') - mimetype = source.get('mimetype') - if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, clip_id, f4m_id='hds', fatal=False)) - elif mimetype == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif mimetype == 'application/dash+xml': - formats.extend(self._extract_mpd_formats( - source_url, clip_id, mpd_id='dash', fatal=False)) - else: - tbr = fix_bitrate(source['bitrate']) - if protocol in ('rtmp', 'rtmpe'): - mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) - if not mobj: - continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'tbr': tbr, - 'ext': 'flv', - 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), - }) + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif mimetype == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id='dash', fatal=False)) else: - formats.append({ - 'url': source_url, - 'tbr': tbr, - 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), - }) + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) self._sort_formats(formats) return { - 'duration': duration, + 'duration': float_or_none(video.get('duration')), 'formats': formats, } @@ -344,6 +384,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): _TOKEN = 'prosieben' _SALT = '01!8d8F_)r9]4s[qeuXfP%' _CLIENT_NAME = 'kolibri-2.0.19-splec4' + + _ACCESS_ID = 'x_prosiebenmaxx-de' + _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' + _IV = 'Aeluchoc6aevechuipiexeeboowedaok' + _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', From 0c84002650cf21f38685a7446cd4cdb0f963a412 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 3 Jun 2019 12:43:39 +0100 Subject: [PATCH 06/24] [southpark] add support for videos with english audio track(closes #21271) --- youtube_dl/extractor/southpark.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index da75a43a7..5374c8423 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -78,6 +78,10 @@ class SouthParkDeIE(SouthParkIE): 'only_matching': True, }] + def _real_initialize(self): + lang = self._get_cookies('http://www.southpark.de/alle-episoden/').get('SPS_video_language') + self._LANG = lang.value if lang else 'de' + class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' From c94c121a99ec893962aecbaf1f445dafc9e22bf1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 4 Jun 2019 00:37:58 +0100 Subject: [PATCH 07/24] Revert "[southpark] add support for videos with english audio track" This reverts commit 0c84002650cf21f38685a7446cd4cdb0f963a412. --- youtube_dl/extractor/southpark.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 5374c8423..da75a43a7 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -78,10 +78,6 @@ class SouthParkDeIE(SouthParkIE): 'only_matching': True, }] - def _real_initialize(self): - lang = self._get_cookies('http://www.southpark.de/alle-episoden/').get('SPS_video_language') - self._LANG = lang.value if lang else 'de' - class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' From 59ca17b1c8351506fc10e7346ebd3fe238ddbf24 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 4 Jun 2019 21:03:52 +0100 Subject: [PATCH 08/24] [vvvvid] relax _VALID_URL(closes #21299) --- youtube_dl/extractor/vvvvid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index 3d0dc403b..6906cd2ab 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -12,7 +12,7 @@ from ..utils import ( class VVVVIDIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P\d+)/[^/]+/(?P\d+)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P\d+)/[^/]+/(?P\d+)/(?P[0-9]+)' _TESTS = [{ # video_type == 'video/vvvvid' 'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong', From 1a01639bf9514c20d54e7460ba9b493b3283ca9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Jun 2019 03:06:35 +0700 Subject: [PATCH 09/24] [downloader/common] Improve rate limit (#21301) --- youtube_dl/downloader/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 646d7f779..1cdba89cd 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -176,7 +176,9 @@ class FileDownloader(object): return speed = float(byte_counter) / elapsed if speed > rate_limit: - time.sleep(max((byte_counter // rate_limit) - elapsed, 0)) + sleep_time = float(byte_counter) / rate_limit - elapsed + if sleep_time > 0: + time.sleep(sleep_time) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" From 2e11e51c041bd95dd1305af5b958abacc27f39f7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 5 Jun 2019 01:22:57 +0100 Subject: [PATCH 10/24] [cbsnews] fix extraction(closes #9659)(closes #15397) --- youtube_dl/extractor/cbsnews.py | 78 ++++++++++++++++++------------ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 51df15fac..345debcf0 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,40 +1,62 @@ # coding: utf-8 from __future__ import unicode_literals +import re +import zlib + from .common import InfoExtractor from .cbs import CBSIE +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, +) from ..utils import ( parse_duration, ) +class CBSNewsEmbedIE(CBSIE): + IE_NAME = 'cbsnews:embed' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P.+)' + _TESTS = [{ + 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A', + 'only_matching': True, + }] + + def _real_extract(self, url): + item = self._parse_json(zlib.decompress(compat_b64decode( + compat_urllib_parse_unquote(self._match_id(url))), + -zlib.MAX_WBITS), None)['video']['items'][0] + return self._extract_video_info(item['mpxRefId'], 'cbsnews') + + class CBSNewsIE(CBSIE): IE_NAME = 'cbsnews' IE_DESC = 'CBS News' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P[\da-z_-]+)' _TESTS = [ { # 60 minutes 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/', 'info_dict': { - 'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_', - 'ext': 'mp4', - 'title': 'Artificial Intelligence', - 'description': 'md5:8818145f9974431e0fb58a1b8d69613c', + 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4', + 'ext': 'flv', + 'title': 'Artificial Intelligence, real-life applications', + 'description': 'md5:a7aaf27f1b4777244de8b0b442289304', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1606, + 'duration': 317, 'uploader': 'CBSI-NEW', - 'timestamp': 1498431900, - 'upload_date': '20170625', + 'timestamp': 1476046464, + 'upload_date': '20161009', }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { - 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', + 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', 'info_dict': { 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', 'ext': 'mp4', @@ -60,37 +82,29 @@ class CBSNewsIE(CBSIE): # 48 hours 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/', 'info_dict': { - 'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1', - 'ext': 'mp4', 'title': 'Cold as Ice', - 'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.', - 'upload_date': '20170604', - 'timestamp': 1496538000, - 'uploader': 'CBSI-NEW', - }, - 'params': { - 'skip_download': True, + 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?', }, + 'playlist_mincount': 7, }, ] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - video_info = self._parse_json(self._html_search_regex( - r'(?: