From 6797de75e059ec02ed91548ec8bfed8c89578344 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 3 Dec 2019 11:37:30 +0100 Subject: [PATCH 01/20] [vzaar] add support for AES HLS manifests(closes #17521)(closes #23299) --- youtube_dl/extractor/vzaar.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 3336e6c15..b43975ead 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -32,6 +32,15 @@ class VzaarIE(InfoExtractor): 'ext': 'mp3', 'title': 'MP3', }, + }, { + # hlsAes = true + 'url': 'https://view.vzaar.com/10165560/player', + 'md5': '5f66f121fb28b9d16cce3d4f3df7e72e', + 'info_dict': { + 'id': '10165560', + 'ext': 'mp4', + 'title': 'Video Demo vzaar Secure.mp4', + }, }, { # with null videoTitle 'url': 'https://view.vzaar.com/20313539/download', @@ -58,6 +67,7 @@ class VzaarIE(InfoExtractor): f = { 'url': source_url, 'format_id': 'http', + 'preference': 1, } if 'audio' in source_url: f.update({ @@ -75,12 +85,13 @@ class VzaarIE(InfoExtractor): video_guid = video_data.get('guid') usp = video_data.get('usp') - if isinstance(video_guid, compat_str) and isinstance(usp, dict): - m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?' - % (video_guid, video_id)) + '&'.join( + if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict): + hls_aes = video_data.get('hlsAes') + m3u8_url = ('http://fable.vzaar.com/v5/usp%s/%s/%s.ism/.m3u8?' + % ('aes' if hls_aes else '', video_guid, video_id)) + '&'.join( '%s=%s' % (k, v) for k, v in usp.items()) formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_url, video_id, 'mp4', 'm3u8' if hls_aes else 'm3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) From c712b16dc41b792757ee8e13a59bce9ab3b4e5b4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 3 Dec 2019 12:23:08 +0100 Subject: [PATCH 02/20] [vzaar] override AES decryption key URL(closes #17521) --- youtube_dl/downloader/hls.py | 4 ++-- youtube_dl/extractor/vzaar.py | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index b59aad73f..84bc34928 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -64,7 +64,7 @@ class HlsFD(FragmentFD): s = urlh.read().decode('utf-8', 'ignore') if not self.can_download(s, info_dict): - if info_dict.get('extra_param_to_segment_url'): + if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): self.report_error('pycrypto not found. Please install it.') return False self.report_warning( @@ -169,7 +169,7 @@ class HlsFD(FragmentFD): if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, decrypt_info['URI'])).read() + self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() frag_content = AES.new( decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) self._append_fragment(ctx, frag_content) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index b43975ead..f02e8530b 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -87,12 +87,15 @@ class VzaarIE(InfoExtractor): usp = video_data.get('usp') if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict): hls_aes = video_data.get('hlsAes') - m3u8_url = ('http://fable.vzaar.com/v5/usp%s/%s/%s.ism/.m3u8?' - % ('aes' if hls_aes else '', video_guid, video_id)) + '&'.join( - '%s=%s' % (k, v) for k, v in usp.items()) - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8' if hls_aes else 'm3u8_native', - m3u8_id='hls', fatal=False)) + qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items()) + url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id) + m3u8_formats = self._extract_m3u8_formats( + url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + if hls_aes: + for f in m3u8_formats: + f['_decryption_key_url'] = url_templ % ('goose', '') + qs + formats.extend(m3u8_formats) self._sort_formats(formats) From 63fe44eb4dc91e2a9755a5cf23b9d39cbd36dae7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 3 Dec 2019 12:31:16 +0100 Subject: [PATCH 03/20] [vzaar] update test --- youtube_dl/extractor/vzaar.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index f02e8530b..b7d02fca3 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -34,12 +34,15 @@ class VzaarIE(InfoExtractor): }, }, { # hlsAes = true - 'url': 'https://view.vzaar.com/10165560/player', - 'md5': '5f66f121fb28b9d16cce3d4f3df7e72e', + 'url': 'https://view.vzaar.com/11379930/player', 'info_dict': { - 'id': '10165560', + 'id': '11379930', 'ext': 'mp4', - 'title': 'Video Demo vzaar Secure.mp4', + 'title': 'Videoaula', + }, + 'params': { + # m3u8 download + 'skip_download': True, }, }, { # with null videoTitle From 22974a378273bb43641813b0ec97d0dff1248d48 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 3 Dec 2019 21:13:44 +0100 Subject: [PATCH 04/20] [yahoo] correct gyao brightcove player id(closes #23303) --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b9a9e88a0..238d9cea0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -383,7 +383,7 @@ class YahooGyaOPlayerIE(InfoExtractor): 'id': video_id, 'title': video['title'], 'url': smuggle_url( - 'http://players.brightcove.net/4235717419001/default_default/index.html?videoId=' + video['videoId'], + 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['videoId'], {'geo_countries': ['JP']}), 'description': video.get('longDescription'), 'ie_key': BrightcoveNewIE.ie_key(), From 3ae878605dd28461896e62f56e20bc50336c45bd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 4 Dec 2019 17:20:53 +0100 Subject: [PATCH 05/20] [ufctv] fix extraction and add support for UFC Arabia(closes #23312) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/imggaming.py | 109 +++++++++++++++++++++++++++++ youtube_dl/extractor/ufctv.py | 73 +++---------------- 3 files changed, 121 insertions(+), 66 deletions(-) create mode 100644 youtube_dl/extractor/imggaming.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 465d9d364..74bf58f38 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1234,7 +1234,10 @@ from .udemy import ( UdemyCourseIE ) from .udn import UDNEmbedIE -from .ufctv import UFCTVIE +from .ufctv import ( + UFCTVIE, + UFCArabiaIE, +) from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .dlive import ( diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py new file mode 100644 index 000000000..96fddeac0 --- /dev/null +++ b/youtube_dl/extractor/imggaming.py @@ -0,0 +1,109 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + try_get, +) + + +class ImgGamingBaseIE(InfoExtractor): + _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/' + _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf' + _HEADERS = None + _LOGIN_REQUIRED = True + _LOGIN_SUFFIX = '' + _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} + _REALM = None + _TOKEN = None + _VALID_URL_TEMPL = r'https?://%s/(?Plive|video)/(?P\d+)' + + def _real_initialize(self): + if not self._LOGIN_REQUIRED: + return + + self._HEADERS = { + 'Realm': 'dce.' + self._REALM, + 'x-api-key': self._API_KEY, + } + + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + p_headers = self._HEADERS.copy() + p_headers['Content-Type'] = 'application/json' + self._HEADERS['Authorization'] = 'Bearer ' + self._download_json( + self._API_BASE + 'login' + self._LOGIN_SUFFIX, + None, 'Logging in', data=json.dumps({ + 'id': email, + 'secret': password, + }).encode(), headers=p_headers)['authorisationToken'] + + def _extract_media_id(self, url, display_id): + return display_id + + def _extract_dve_api_url(self, media_id, media_type): + url = self._API_BASE + 'stream' + if media_type == 'video': + url += '/vod/' + media_id + else: + url += '?eventId=' + media_id + try: + return self._download_json( + url, media_id, headers=self._HEADERS)['playerUrlCallback'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + raise ExtractorError( + self._parse_json(e.cause.read().decode(), media_id)['messages'][0], + expected=True) + raise + + def _real_extract(self, url): + media_type, display_id = re.match(self._VALID_URL, url).groups() + media_id = self._extract_media_id(url, display_id) + dve_api_url = self._extract_dve_api_url(media_id, media_type) + video_data = self._download_json(dve_api_url, media_id) + is_live = media_type == 'live' + if is_live: + title = self._live_title(self._download_json( + self._API_BASE + 'event/' + media_id, + media_id, headers=self._HEADERS)['title']) + else: + title = video_data['name'] + + formats = [] + for proto in ('hls', 'dash'): + media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url']) + if not media_url: + continue + if proto == 'hls': + m3u8_formats = self._extract_m3u8_formats( + media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native', + m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS) + for f in m3u8_formats: + f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS) + formats.append(f) + else: + formats.extend(self._extract_mpd_formats( + media_url, media_id, mpd_id='dash', fatal=False, + headers=self._MANIFEST_HEADERS)) + self._sort_formats(formats) + + return { + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'thumbnail': video_data.get('thumbnailUrl'), + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'tags': video_data.get('tags'), + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index f3eaee6b3..160b0f104 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -1,73 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - parse_iso8601, - urlencode_postdata, -) +from .imggaming import ImgGamingBaseIE -class UFCTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P[^/]+)' +class UFCTVIE(ImgGamingBaseIE): + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv' _NETRC_MACHINE = 'ufctv' - _TEST = { - 'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode', - 'info_dict': { - 'id': '34167', - 'ext': 'mp4', - 'title': 'UFC 219 Countdown: Full Episode', - 'description': 'md5:26d4e8bf4665ae5878842d7050c3c646', - 'timestamp': 1513962360, - 'upload_date': '20171222', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - } + _REALM = 'ufc' - def _real_initialize(self): - username, password = self._get_login_info() - if username is None: - return - code = self._download_json( - 'https://www.ufc.tv/secure/authenticate', - None, 'Logging in', data=urlencode_postdata({ - 'username': username, - 'password': password, - 'format': 'json', - })).get('code') - if code and code != 'loginsuccess': - raise ExtractorError(code, expected=True) - - def _real_extract(self, url): - display_id = self._match_id(url) - video_data = self._download_json(url, display_id, query={ - 'format': 'json', - }) - video_id = str(video_data['id']) - title = video_data['name'] - m3u8_url = self._download_json( - 'https://www.ufc.tv/service/publishpoint', video_id, query={ - 'type': 'video', - 'format': 'json', - 'id': video_id, - }, headers={ - 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1', - })['path'] - m3u8_url = m3u8_url.replace('_iphone.', '.') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description'), - 'duration': parse_duration(video_data.get('runtime')), - 'timestamp': parse_iso8601(video_data.get('releaseDate')), - 'formats': formats, - } +class UFCArabiaIE(ImgGamingBaseIE): + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com' + _NETRC_MACHINE = 'ufcarabia' + _REALM = 'admufc' From 7d53fa475a97baf2a676d935847b3dc2af562a7c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 4 Dec 2019 20:56:23 +0100 Subject: [PATCH 06/20] [imggaming] add support for playlists and extract subtitles --- youtube_dl/extractor/imggaming.py | 56 +++++++++++++++++++++++++------ youtube_dl/extractor/ufctv.py | 2 ++ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py index 96fddeac0..8bb5de463 100644 --- a/youtube_dl/extractor/imggaming.py +++ b/youtube_dl/extractor/imggaming.py @@ -9,6 +9,7 @@ from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, + str_or_none, try_get, ) @@ -16,13 +17,14 @@ from ..utils import ( class ImgGamingBaseIE(InfoExtractor): _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/' _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf' + _DOMAIN = None _HEADERS = None _LOGIN_REQUIRED = True _LOGIN_SUFFIX = '' _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} _REALM = None _TOKEN = None - _VALID_URL_TEMPL = r'https?://%s/(?Plive|video)/(?P\d+)' + _VALID_URL_TEMPL = r'https?://%s/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' def _real_initialize(self): if not self._LOGIN_REQUIRED: @@ -46,18 +48,22 @@ class ImgGamingBaseIE(InfoExtractor): 'secret': password, }).encode(), headers=p_headers)['authorisationToken'] + def _call_api(self, path, media_id): + return self._download_json( + self._API_BASE + path + media_id, media_id, headers=self._HEADERS) + def _extract_media_id(self, url, display_id): return display_id def _extract_dve_api_url(self, media_id, media_type): - url = self._API_BASE + 'stream' + stream_path = 'stream' if media_type == 'video': - url += '/vod/' + media_id + stream_path += '/vod/' else: - url += '?eventId=' + media_id + stream_path += '?eventId=' try: - return self._download_json( - url, media_id, headers=self._HEADERS)['playerUrlCallback'] + return self._call_api( + stream_path, media_id)['playerUrlCallback'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: raise ExtractorError( @@ -66,15 +72,35 @@ class ImgGamingBaseIE(InfoExtractor): raise def _real_extract(self, url): - media_type, display_id = re.match(self._VALID_URL, url).groups() + media_type, display_id, playlist_id = re.match(self._VALID_URL, url).groups() media_id = self._extract_media_id(url, display_id) + + if playlist_id: + if self._downloader.params.get('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % media_id) + else: + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + media_type, media_id = 'playlist', playlist_id + + if media_type == 'playlist': + playlist = self._call_api('vod/playlist/', media_id) + entries = [] + for video in try_get(playlist, lambda x: x['videos']['vods']) or []: + video_id = str_or_none(video.get('id')) + if not video_id: + continue + entries.append(self.url_result( + 'https://%s/video/%s' % (self._DOMAIN, video_id), + self.ie_key(), video_id)) + return self.playlist_result( + entries, media_id, playlist.get('title'), + playlist.get('description')) + dve_api_url = self._extract_dve_api_url(media_id, media_type) video_data = self._download_json(dve_api_url, media_id) is_live = media_type == 'live' if is_live: - title = self._live_title(self._download_json( - self._API_BASE + 'event/' + media_id, - media_id, headers=self._HEADERS)['title']) + title = self._live_title(self._call_api('event/', media_id)['title']) else: title = video_data['name'] @@ -96,6 +122,15 @@ class ImgGamingBaseIE(InfoExtractor): headers=self._MANIFEST_HEADERS)) self._sort_formats(formats) + subtitles = {} + for subtitle in video_data.get('subtitles', []): + subtitle_url = subtitle.get('url') + if not subtitle_url: + continue + subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({ + 'url': subtitle_url, + }) + return { 'id': media_id, 'display_id': display_id, @@ -106,4 +141,5 @@ class ImgGamingBaseIE(InfoExtractor): 'duration': int_or_none(video_data.get('duration')), 'tags': video_data.get('tags'), 'is_live': is_live, + 'subtitles': subtitles, } diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index 160b0f104..d07fa1280 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -7,10 +7,12 @@ from .imggaming import ImgGamingBaseIE class UFCTVIE(ImgGamingBaseIE): _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv' _NETRC_MACHINE = 'ufctv' + _DOMAIN = 'ufc.tv' _REALM = 'ufc' class UFCArabiaIE(ImgGamingBaseIE): _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com' _NETRC_MACHINE = 'ufcarabia' + _DOMAIN = 'app.ufcarabia.com' _REALM = 'admufc' From 4067a2327069c24915945fb5f5182e7fa987a57e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 6 Dec 2019 11:04:12 +0100 Subject: [PATCH 07/20] [ufctv] add support for more domains and remove compatibility code(closes #23332) --- youtube_dl/extractor/imggaming.py | 20 ++++---------------- youtube_dl/extractor/ufctv.py | 6 ++---- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py index 8bb5de463..10d26adab 100644 --- a/youtube_dl/extractor/imggaming.py +++ b/youtube_dl/extractor/imggaming.py @@ -17,19 +17,12 @@ from ..utils import ( class ImgGamingBaseIE(InfoExtractor): _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/' _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf' - _DOMAIN = None _HEADERS = None - _LOGIN_REQUIRED = True - _LOGIN_SUFFIX = '' _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} _REALM = None - _TOKEN = None - _VALID_URL_TEMPL = r'https?://%s/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' + _VALID_URL_TEMPL = r'https?://(?P(?:(?:app|www)\.)?%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' def _real_initialize(self): - if not self._LOGIN_REQUIRED: - return - self._HEADERS = { 'Realm': 'dce.' + self._REALM, 'x-api-key': self._API_KEY, @@ -42,7 +35,7 @@ class ImgGamingBaseIE(InfoExtractor): p_headers = self._HEADERS.copy() p_headers['Content-Type'] = 'application/json' self._HEADERS['Authorization'] = 'Bearer ' + self._download_json( - self._API_BASE + 'login' + self._LOGIN_SUFFIX, + self._API_BASE + 'login', None, 'Logging in', data=json.dumps({ 'id': email, 'secret': password, @@ -52,9 +45,6 @@ class ImgGamingBaseIE(InfoExtractor): return self._download_json( self._API_BASE + path + media_id, media_id, headers=self._HEADERS) - def _extract_media_id(self, url, display_id): - return display_id - def _extract_dve_api_url(self, media_id, media_type): stream_path = 'stream' if media_type == 'video': @@ -72,8 +62,7 @@ class ImgGamingBaseIE(InfoExtractor): raise def _real_extract(self, url): - media_type, display_id, playlist_id = re.match(self._VALID_URL, url).groups() - media_id = self._extract_media_id(url, display_id) + domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups() if playlist_id: if self._downloader.params.get('noplaylist'): @@ -90,7 +79,7 @@ class ImgGamingBaseIE(InfoExtractor): if not video_id: continue entries.append(self.url_result( - 'https://%s/video/%s' % (self._DOMAIN, video_id), + 'https://%s/video/%s' % (domain, video_id), self.ie_key(), video_id)) return self.playlist_result( entries, media_id, playlist.get('title'), @@ -133,7 +122,6 @@ class ImgGamingBaseIE(InfoExtractor): return { 'id': media_id, - 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': video_data.get('thumbnailUrl'), diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index d07fa1280..665eb1cb7 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -5,14 +5,12 @@ from .imggaming import ImgGamingBaseIE class UFCTVIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:www\.)?ufc\.tv' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:ufc\.tv|(?:ufc)?fightpass\.com)' _NETRC_MACHINE = 'ufctv' - _DOMAIN = 'ufc.tv' _REALM = 'ufc' class UFCArabiaIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'app\.ufcarabia\.com' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'ufcarabia\.(?:ae|com)' _NETRC_MACHINE = 'ufcarabia' - _DOMAIN = 'app.ufcarabia.com' _REALM = 'admufc' From 1d31b7ca048d0adf86946b8ace05e25d3216471e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 6 Dec 2019 15:34:35 +0100 Subject: [PATCH 08/20] [twitch] extract m3u8 formats frame rate(closes #23333) --- youtube_dl/extractor/twitch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8c0d70010..1f3df3112 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -327,6 +327,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'allow_audio_only': 'true', 'allow_spectre': 'true', 'player': 'twitchweb', + 'playlist_include_framerate': 'true', 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), @@ -598,6 +599,7 @@ class TwitchStreamIE(TwitchBaseIE): 'allow_spectre': 'true', 'p': random.randint(1000000, 10000000), 'player': 'twitchweb', + 'playlist_include_framerate': 'true', 'segment_preference': '4', 'sig': access_token['sig'].encode('utf-8'), 'token': access_token['token'].encode('utf-8'), From 6633103f8e607b23530a2c5846aeb4c9c73f4031 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 7 Dec 2019 19:23:19 +0100 Subject: [PATCH 09/20] [ufctv] add support for ufcfightpass.imgdge.com and ufcfightpass.imggaming.com domains(closes #23343) --- youtube_dl/extractor/imggaming.py | 2 +- youtube_dl/extractor/ufctv.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py index 10d26adab..e11f92053 100644 --- a/youtube_dl/extractor/imggaming.py +++ b/youtube_dl/extractor/imggaming.py @@ -20,7 +20,7 @@ class ImgGamingBaseIE(InfoExtractor): _HEADERS = None _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'} _REALM = None - _VALID_URL_TEMPL = r'https?://(?P(?:(?:app|www)\.)?%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' + _VALID_URL_TEMPL = r'https?://(?P%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' def _real_initialize(self): self._HEADERS = { diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py index 665eb1cb7..3d74ba071 100644 --- a/youtube_dl/extractor/ufctv.py +++ b/youtube_dl/extractor/ufctv.py @@ -5,12 +5,12 @@ from .imggaming import ImgGamingBaseIE class UFCTVIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:ufc\.tv|(?:ufc)?fightpass\.com)' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com' _NETRC_MACHINE = 'ufctv' _REALM = 'ufc' class UFCArabiaIE(ImgGamingBaseIE): - _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'ufcarabia\.(?:ae|com)' + _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)' _NETRC_MACHINE = 'ufcarabia' _REALM = 'admufc' From ce709fcb00a5a35d72e43c588120e40d38b3020d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 7 Dec 2019 20:17:30 +0100 Subject: [PATCH 10/20] [musicplayon] remove extractor(closes #9225) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/musicplayon.py | 66 ----------------------------- 2 files changed, 67 deletions(-) delete mode 100644 youtube_dl/extractor/musicplayon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74bf58f38..4e6f2c442 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -657,7 +657,6 @@ from .mtv import ( MTVJapanIE, ) from .muenchentv import MuenchenTVIE -from .musicplayon import MusicPlayOnIE from .mwave import MwaveIE, MwaveMeetGreetIE from .mychannels import MyChannelsIE from .myspace import MySpaceIE, MySpaceAlbumIE diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py deleted file mode 100644 index 1854d59a5..000000000 --- a/youtube_dl/extractor/musicplayon.py +++ /dev/null @@ -1,66 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - int_or_none, - js_to_json, - mimetype2ext, -) - - -class MusicPlayOnIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P\d+)' - - _TESTS = [{ - 'url': 'http://en.musicplayon.com/play?v=433377', - 'md5': '00cdcdea1726abdf500d1e7fd6dd59bb', - 'info_dict': { - 'id': '433377', - 'ext': 'mp4', - 'title': 'Rick Ross - Interview On Chelsea Lately (2014)', - 'description': 'Rick Ross Interview On Chelsea Lately', - 'duration': 342, - 'uploader': 'ultrafish', - }, - }, { - 'url': 'http://en.musicplayon.com/play?pl=102&play=442629', - 'only_matching': True, - }] - - _URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - url = self._URL_TEMPLATE % video_id - - page = self._download_webpage(url, video_id) - - title = self._og_search_title(page) - description = self._og_search_description(page) - thumbnail = self._og_search_thumbnail(page) - duration = self._html_search_meta('video:duration', page, 'duration', fatal=False) - view_count = self._og_search_property('count', page, fatal=False) - uploader = self._html_search_regex( - r'', page, 'uploader', fatal=False) - - sources = self._parse_json( - self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'), - video_id, transform_source=js_to_json) - formats = [{ - 'url': compat_urlparse.urljoin(url, source['src']), - 'ext': mimetype2ext(source.get('type')), - 'format_note': source.get('data-res'), - } for source in sources] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'duration': int_or_none(duration), - 'view_count': int_or_none(view_count), - 'formats': formats, - } From 9d4424afaafe96161af59a8a59a0f922bd666fee Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 8 Dec 2019 11:54:16 +0100 Subject: [PATCH 11/20] [videopremium] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/videopremium.py | 46 ---------------------------- 2 files changed, 47 deletions(-) delete mode 100644 youtube_dl/extractor/videopremium.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4e6f2c442..7f4044b4a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1290,7 +1290,6 @@ from .videomore import ( VideomoreVideoIE, VideomoreSeasonIE, ) -from .videopremium import VideoPremiumIE from .videopress import VideoPressIE from .vidio import VidioIE from .vidlii import VidLiiIE diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py deleted file mode 100644 index cf690d7b0..000000000 --- a/youtube_dl/extractor/videopremium.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import unicode_literals - -import re -import random - -from .common import InfoExtractor - - -class VideoPremiumIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P\w+)(?:/.*)?' - _TEST = { - 'url': 'http://videopremium.tv/4w7oadjsf156', - 'info_dict': { - 'id': '4w7oadjsf156', - 'ext': 'f4v', - 'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4' - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Test file has been deleted.', - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage_url = 'http://videopremium.tv/' + video_id - webpage = self._download_webpage(webpage_url, video_id) - - if re.match(r'^]*>window\.location\s*=', webpage): - # Download again, we need a cookie - webpage = self._download_webpage( - webpage_url, video_id, - note='Downloading webpage again (with cookie)') - - video_title = self._html_search_regex( - r'\s*(.+?)\s*<', webpage, 'video title') - - return { - 'id': video_id, - 'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16), - 'play_path': 'mp4:%s.f4v' % video_id, - 'page_url': 'http://videopremium.tv/' + video_id, - 'player_url': 'http://videopremium.tv/uplayer/uppod.swf', - 'ext': 'f4v', - 'title': video_title, - } From d686cab084af88260bd28ad99673e27b36fcb4b2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 8 Dec 2019 12:38:21 +0100 Subject: [PATCH 12/20] [kontrtube] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/kontrtube.py | 73 ------------------------------ 2 files changed, 74 deletions(-) delete mode 100644 youtube_dl/extractor/kontrtube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7f4044b4a..fd93730fa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -511,7 +511,6 @@ from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .konserthusetplay import KonserthusetPlayIE -from .kontrtube import KontrTubeIE from .krasview import KrasViewIE from .ku6 import Ku6IE from .kusi import KUSIIE diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py deleted file mode 100644 index 1fda45107..000000000 --- a/youtube_dl/extractor/kontrtube.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, -) - - -class KontrTubeIE(InfoExtractor): - IE_NAME = 'kontrtube' - IE_DESC = 'KontrTube.ru - Труба зовёт' - _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P\d+)/(?P[^/]+)/' - - _TEST = { - 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', - 'md5': '975a991a4926c9a85f383a736a2e6b80', - 'info_dict': { - 'id': '2678', - 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag', - 'ext': 'mp4', - 'title': 'Над олимпийской деревней в Сочи поднят российский флаг', - 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', - 'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg', - 'duration': 270, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage( - url, display_id, 'Downloading page') - - video_url = self._search_regex( - r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL') - thumbnail = self._search_regex( - r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False) - title = self._html_search_regex( - r'(?s)

(.+?)

', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') - - duration = self._search_regex( - r'Длительность: ([^<]+)', webpage, 'duration', fatal=False) - if duration: - duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec')) - - view_count = self._search_regex( - r'Просмотров: ([^<]+)', - webpage, 'view count', fatal=False) - if view_count: - view_count = int_or_none(view_count.replace(' ', '')) - - comment_count = int_or_none(self._search_regex( - r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False)) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'thumbnail': thumbnail, - 'title': title, - 'description': description, - 'duration': duration, - 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - } From 0e6ec3caf6c20bb5b27c063b2b946686e0b5159f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 9 Dec 2019 09:13:02 +0100 Subject: [PATCH 13/20] [vk] improve extraction - fix User Videos extraction(closes #23356) - extract all videos for lists with more than 1000 videos(#23356) - add support for video albums(closes #14327)(closes #14492) --- youtube_dl/extractor/vk.py | 54 +++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index a5e4a3e67..00ec006c4 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import collections +import functools import re from .common import InfoExtractor @@ -11,6 +12,7 @@ from ..utils import ( ExtractorError, get_element_by_class, int_or_none, + OnDemandPagedList, orderedSet, str_or_none, str_to_int, @@ -477,14 +479,23 @@ class VKIE(VKBaseIE): class VKUserVideosIE(VKBaseIE): IE_NAME = 'vk:uservideos' IE_DESC = "VK - User's Videos" - _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P
\w+))?|$)' _TEMPLATE_URL = 'https://vk.com/videos' _TESTS = [{ - 'url': 'http://vk.com/videos205387401', + 'url': 'https://vk.com/videos-767561', 'info_dict': { - 'id': '205387401', + 'id': '-767561_all', }, - 'playlist_mincount': 4, + 'playlist_mincount': 1150, + }, { + 'url': 'https://vk.com/videos-767561?section=uploaded', + 'info_dict': { + 'id': '-767561_uploaded', + }, + 'playlist_mincount': 425, + }, { + 'url': 'http://vk.com/videos205387401', + 'only_matching': True, }, { 'url': 'http://vk.com/videos-77521', 'only_matching': True, @@ -498,25 +509,33 @@ class VKUserVideosIE(VKBaseIE): 'url': 'http://new.vk.com/videos205387401', 'only_matching': True, }] - _VIDEO = collections.namedtuple( - 'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta']) - - def _real_extract(self, url): - page_id = self._match_id(url) + _PAGE_SIZE = 1000 + _VIDEO = collections.namedtuple('Video', ['owner_id', 'id']) + def _fetch_page(self, page_id, section, page): l = self._download_payload('al_video', page_id, { 'act': 'load_videos_silent', + 'offset': page * self._PAGE_SIZE, 'oid': page_id, - })[0]['']['list'] + 'section': section, + })[0][section]['list'] - entries = [] for video in l: - v = self._VIDEO._make(video) + v = self._VIDEO._make(video[:2]) video_id = '%d_%d' % (v.owner_id, v.id) - entries.append(self.url_result( - 'http://vk.com/video' + video_id, 'VK', video_id=video_id)) + yield self.url_result( + 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id) - return self.playlist_result(entries, page_id) + def _real_extract(self, url): + page_id, section = re.match(self._VALID_URL, url).groups() + if not section: + section = 'all' + + entries = OnDemandPagedList( + functools.partial(self._fetch_page, page_id, section), + self._PAGE_SIZE) + + return self.playlist_result(entries, '%s_%s' % (page_id, section)) class VKWallPostIE(VKBaseIE): @@ -580,8 +599,7 @@ class VKWallPostIE(VKBaseIE): 'only_matching': True, }] _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' - _AUDIO = collections.namedtuple( - 'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key']) + _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads']) def _decode(self, enc): dec = '' @@ -629,7 +647,7 @@ class VKWallPostIE(VKBaseIE): for audio in re.findall(r'data-audio="([^"]+)', webpage): audio = self._parse_json(unescapeHTML(audio), post_id) - a = self._AUDIO._make(audio) + a = self._AUDIO._make(audio[:16]) if not a.url: continue title = unescapeHTML(a.title) From cf80ff186eab6963fcfb108919a25b7ed28813d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 9 Dec 2019 14:38:12 +0100 Subject: [PATCH 14/20] [soundcloud] add support for token protected embeds(#18954) --- youtube_dl/extractor/soundcloud.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 988dec4fa..c2ee54457 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -28,7 +28,12 @@ from ..utils import ( class SoundcloudEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P.*)' + _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P.+)' + _TEST = { + # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/ + 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey', + 'only_matching': True, + } @staticmethod def _extract_urls(webpage): @@ -37,8 +42,13 @@ class SoundcloudEmbedIE(InfoExtractor): webpage)] def _real_extract(self, url): - return self.url_result(compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query)['url'][0]) + query = compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query) + api_url = query['url'][0] + secret_token = query.get('secret_token') + if secret_token: + api_url = update_url_query(api_url, {'secret_token': secret_token[0]}) + return self.url_result(api_url) class SoundcloudIE(InfoExtractor): From 232ed8e6e0ec8b86156e68002e496a8bc89e6346 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Dec 2019 11:00:31 +0100 Subject: [PATCH 15/20] [twitch] fix clip extraction(closes #23375) --- youtube_dl/extractor/twitch.py | 110 +++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 1f3df3112..a8c2502af 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -17,12 +17,10 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, - float_or_none, int_or_none, orderedSet, parse_duration, parse_iso8601, - qualities, try_get, unified_timestamp, update_url_query, @@ -676,63 +674,81 @@ class TwitchClipsIE(TwitchBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - status = self._download_json( - 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id, - video_id) + clip = self._download_json( + 'https://gql.twitch.tv/gql', video_id, data=json.dumps({ + 'query': '''{ + clip(slug: "%s") { + broadcaster { + displayName + } + createdAt + curator { + displayName + id + } + durationSeconds + id + tiny: thumbnailURL(width: 86, height: 45) + small: thumbnailURL(width: 260, height: 147) + medium: thumbnailURL(width: 480, height: 272) + title + videoQualities { + frameRate + quality + sourceURL + } + viewCount + } +}''' % video_id, + }).encode(), headers={ + 'Client-ID': self._CLIENT_ID, + })['data']['clip'] + + if not clip: + raise ExtractorError( + 'This clip is no longer available', expected=True) formats = [] - - for option in status['quality_options']: + for option in clip.get('videoQualities', []): if not isinstance(option, dict): continue - source = url_or_none(option.get('source')) + source = url_or_none(option.get('sourceURL')) if not source: continue formats.append({ 'url': source, 'format_id': option.get('quality'), 'height': int_or_none(option.get('quality')), - 'fps': int_or_none(option.get('frame_rate')), + 'fps': int_or_none(option.get('frameRate')), }) - self._sort_formats(formats) - info = { + thumbnails = [] + for thumbnail_id in ('tiny', 'small', 'medium'): + thumbnail_url = clip.get(thumbnail_id) + if not thumbnail_url: + continue + thumb = { + 'id': thumbnail_id, + 'url': thumbnail_url, + } + mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url) + if mobj: + thumb.update({ + 'height': int(mobj.group(2)), + 'width': int(mobj.group(1)), + }) + thumbnails.append(thumb) + + return { + 'id': clip.get('id') or video_id, + 'title': clip.get('title') or video_id, 'formats': formats, + 'duration': int_or_none(clip.get('durationSeconds')), + 'views': int_or_none(clip.get('viewCount')), + 'timestamp': unified_timestamp(clip.get('createdAt')), + 'thumbnails': thumbnails, + 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str), + 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str), + 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), } - - clip = self._call_api( - 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={ - 'Accept': 'application/vnd.twitchtv.v5+json', - }) - - if clip: - quality_key = qualities(('tiny', 'small', 'medium')) - thumbnails = [] - thumbnails_dict = clip.get('thumbnails') - if isinstance(thumbnails_dict, dict): - for thumbnail_id, thumbnail_url in thumbnails_dict.items(): - thumbnails.append({ - 'id': thumbnail_id, - 'url': thumbnail_url, - 'preference': quality_key(thumbnail_id), - }) - - info.update({ - 'id': clip.get('tracking_id') or video_id, - 'title': clip.get('title') or video_id, - 'duration': float_or_none(clip.get('duration')), - 'views': int_or_none(clip.get('views')), - 'timestamp': unified_timestamp(clip.get('created_at')), - 'thumbnails': thumbnails, - 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str), - 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str), - 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), - }) - else: - info.update({ - 'title': video_id, - 'id': video_id, - }) - - return info From b33a05d2213020fd4a74a1790db728a367f82517 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Dec 2019 19:29:04 +0100 Subject: [PATCH 16/20] [slideslive] fix extraction(closes #23413) --- youtube_dl/extractor/slideslive.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index ed84322c5..467af2cb3 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -14,9 +14,9 @@ class SlidesLiveIE(InfoExtractor): 'info_dict': { 'id': 'LMtgR8ba0b0', 'ext': 'mp4', - 'title': '38902413: external video', - 'description': '3890241320170925-9-1yd6ech.mp4', - 'uploader': 'SlidesLive Administrator', + 'title': 'GCC IA16 backend', + 'description': 'Watch full version of this video at https://slideslive.com/38902413.', + 'uploader': 'SlidesLive Videos - A', 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'upload_date': '20170925', } @@ -29,11 +29,18 @@ class SlidesLiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - url, video_id, headers={'Accept': 'application/json'}) + 'https://ben.slideslive.com/player/' + video_id, video_id) service_name = video_data['video_service_name'].lower() if service_name == 'youtube': yt_video_id = video_data['video_service_id'] - return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + return { + '_type': 'url_transparent', + 'ie_key': 'Youtube', + 'id': yt_video_id, + 'thumbnail': video_data.get('thumbnail'), + 'title': video_data.get('title'), + 'url': yt_video_id, + } else: raise ExtractorError( 'Unsupported service name: {0}'.format(service_name), expected=True) From 73d8f3a63426e8517143e3a5554e12d614c5cdec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Dec 2019 21:35:31 +0100 Subject: [PATCH 17/20] [slideslive] add support for url and vimeo service names(closes #23414) --- youtube_dl/extractor/slideslive.py | 41 ++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index 467af2cb3..d9ea76831 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import smuggle_url class SlidesLiveIE(InfoExtractor): @@ -24,6 +24,14 @@ class SlidesLiveIE(InfoExtractor): # video_service_name = youtube 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'only_matching': True, + }, { + # video_service_name = url + 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', + 'only_matching': True, + }, { + # video_service_name = vimeo + 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', + 'only_matching': True, }] def _real_extract(self, url): @@ -31,16 +39,23 @@ class SlidesLiveIE(InfoExtractor): video_data = self._download_json( 'https://ben.slideslive.com/player/' + video_id, video_id) service_name = video_data['video_service_name'].lower() - if service_name == 'youtube': - yt_video_id = video_data['video_service_id'] - return { - '_type': 'url_transparent', - 'ie_key': 'Youtube', - 'id': yt_video_id, - 'thumbnail': video_data.get('thumbnail'), - 'title': video_data.get('title'), - 'url': yt_video_id, - } + assert service_name in ('url', 'vimeo', 'youtube') + service_id = video_data['video_service_id'] + info = { + 'id': video_id, + 'thumbnail': video_data.get('thumbnail'), + 'url': service_id, + } + if service_name == 'url': + info['title'] = video_data['title'] else: - raise ExtractorError( - 'Unsupported service name: {0}'.format(service_name), expected=True) + info.update({ + '_type': 'url_transparent', + 'ie_key': service_name.capitalize(), + 'title': video_data.get('title'), + }) + if service_name == 'vimeo': + info['url'] = smuggle_url( + 'https://player.vimeo.com/video/' + service_id, + {'http_headers': {'Referer': url}}) + return info From 42db58ec7367e7ee6555e5f14107712add61d013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Dec 2019 23:15:24 +0700 Subject: [PATCH 18/20] [utils] Improve str_to_int --- test/test_utils.py | 5 +++++ youtube_dl/utils.py | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index fed94a906..0896f4150 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -500,6 +500,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) + # Python 3 has no long + if sys.version_info < (3, 0): + eval('self.assertEqual(str_to_int(123456L), 123456)') + self.assertEqual(str_to_int('noninteger'), None) + self.assertEqual(str_to_int([]), None) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 328f037a8..f6204692a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -46,6 +46,7 @@ from .compat import ( compat_html_entities, compat_html_entities_html5, compat_http_client, + compat_integer_types, compat_kwargs, compat_os_name, compat_parse_qs, @@ -3519,10 +3520,11 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if not isinstance(int_str, compat_str): + if isinstance(int_str, compat_integer_types): return int_str - int_str = re.sub(r'[,\.\+]', '', int_str) - return int(int_str) + elif isinstance(int_str, compat_str): + int_str = re.sub(r'[,\.\+]', '', int_str) + return int_or_none(int_str) def float_or_none(v, scale=1, invscale=1, default=None): From fab01080f402dbfad00122b73714d92b5d1deb24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Dec 2019 00:08:18 +0700 Subject: [PATCH 19/20] [tv2dk:bornholm:play] Add extractor (closes #23291) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/tv2dk.py | 74 +++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fd93730fa..376d07727 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1168,7 +1168,10 @@ from .tv2 import ( TV2ArticleIE, KatsomoIE, ) -from .tv2dk import TV2DKIE +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py index eb39424df..611fdc0c6 100644 --- a/youtube_dl/extractor/tv2dk.py +++ b/youtube_dl/extractor/tv2dk.py @@ -1,10 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor -from ..utils import extract_attributes +from ..utils import ( + determine_ext, + extract_attributes, + js_to_json, + url_or_none, +) class TV2DKIE(InfoExtractor): @@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor): 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', video_id=kaltura_id)) return self.playlist_result(entries) + + +class TV2DKBornholmPlayIE(InfoExtractor): + _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P\d+)' + _TEST = { + 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021', + 'info_dict': { + 'id': '781021', + 'ext': 'mp4', + 'title': '12Nyheder-27.11.19', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, + data=json.dumps({ + 'playlist_id': video_id, + 'serienavn': '', + }).encode(), headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'Content-Type': 'application/json; charset=UTF-8', + })['d'] + + # TODO: generalize flowplayer + title = self._search_regex( + r'title\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', video, 'title', + group='value') + sources = self._parse_json(self._search_regex( + r'(?s)sources:\s*(\[.+?\]),', video, 'sources'), + video_id, js_to_json) + + formats = [] + srcs = set() + for source in sources: + src = url_or_none(source.get('src')) + if not src: + continue + if src in srcs: + continue + srcs.add(src) + ext = determine_ext(src) + src_type = source.get('type') + if src_type == 'application/x-mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif src_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } From 2dbc0967f26425acc204395bc69c9446d9ebd682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Dec 2019 00:40:34 +0700 Subject: [PATCH 20/20] [ChangeLog] Actualize [ci skip] --- ChangeLog | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d4f809fc6..d2f17ee06 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,55 @@ +version + +Core +* [utils] Improve str_to_int ++ [downloader/hls] Add ability to override AES decryption key URL (#17521) + +Extractors ++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291) ++ [slideslive] Add support for url and vimeo service names (#23414) +* [slideslive] Fix extraction (#23413) +* [twitch:clips] Fix extraction (#23375) ++ [soundcloud] Add support for token protected embeds (#18954) +* [vk] Improve extraction + * Fix User Videos extraction (#23356) + * Extract all videos for lists with more than 1000 videos (#23356) + + Add support for video albums (#14327, #14492) +- [kontrtube] Remove extractor +- [videopremium] Remove extractor +- [musicplayon] Remove extractor (#9225) ++ [ufctv] Add support for ufcfightpass.imgdge.com and + ufcfightpass.imggaming.com (#23343) ++ [twitch] Extract m3u8 formats frame rate (#23333) ++ [imggaming] Add support for playlists and extract subtitles ++ [ufcarabia] Add support for UFC Arabia (#23312) +* [ufctv] Fix extraction +* [yahoo] Fix gyao brightcove player id (#23303) +* [vzaar] Override AES decryption key URL (#17521) ++ [vzaar] Add support for AES HLS manifests (#17521, #23299) +* [nrl] Fix extraction +* [teachingchannel] Fix extraction +* [nintendo] Fix extraction and partially add support for Nintendo Direct + videos (#4592) ++ [ooyala] Add better fallback values for domain and streams variables ++ [youtube] Add support youtubekids.com (#23272) +* [tv2] Detect DRM protection ++ [tv2] Add support for katsomo.fi and mtv.fi (#10543) +* [tv2] Fix tv2.no article extraction +* [msn] Improve extraction + + Add support for YouTube and NBCSports embeds + + Add support for articles with multiple videos + * Improve AOL embed support + * Improve format extraction +* [abcotvs] Relax URL regular expression and improve metadata extraction + (#18014) +* [channel9] Reduce response size +* [adobetv] Improve extaction + * Use OnDemandPagedList for list extractors + * Reduce show extraction requests + * Extract original video format and subtitles + + Add support for adobe tv embeds + + version 2019.11.28 Core @@ -583,7 +635,7 @@ Extractors version 2019.04.17 Extractors -* [openload] Randomize User-Agent (closes #20688) +* [openload] Randomize User-Agent (#20688) + [openload] Add support for oladblock domains (#20471) * [adn] Fix subtitle extraction (#12724) + [aol] Add support for localized websites @@ -1148,7 +1200,7 @@ Extractors + [youtube] Extract channel meta fields (#9676, #12939) * [porntube] Fix extraction (#17541) * [asiancrush] Fix extraction (#15630) -+ [twitch:clips] Extend URL regular expression (closes #17559) ++ [twitch:clips] Extend URL regular expression (#17559) + [vzaar] Add support for HLS * [tube8] Fix metadata extraction (#17520) * [eporner] Extract JSON-LD (#17519)