From c87f65e43dea0f9edf1b5ed8979e274902fb60a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jan 2019 22:21:53 +0700 Subject: [PATCH 01/84] [carambatv:page] Fix extraction (closes #18739) --- youtube_dl/extractor/carambatv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py index 9ba909a91..b57b86af7 100644 --- a/youtube_dl/extractor/carambatv.py +++ b/youtube_dl/extractor/carambatv.py @@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor): webpage = self._download_webpage(url, video_id) videomore_url = VideomoreIE._extract_url(webpage) + if not videomore_url: + videomore_id = self._search_regex( + r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id', + default=None) + if videomore_id: + videomore_url = 'videomore:%s' % videomore_id if videomore_url: title = self._og_search_title(webpage) return { From de0359c0af3667605464212e66ba4048b6ba093b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Jan 2019 03:40:41 +0700 Subject: [PATCH 02/84] [tvnow] Fix and rework extractors, prepare for a switch to the new API (closes #17245, closes #18499) --- youtube_dl/extractor/extractors.py | 4 +- youtube_dl/extractor/tvnow.py | 378 +++++++++++++++++++++-------- 2 files changed, 283 insertions(+), 99 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d72f52e36..3b1dfc451 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1193,7 +1193,9 @@ from .tvnet import TVNetIE from .tvnoe import TVNoeIE from .tvnow import ( TVNowIE, - TVNowListIE, + TVNowNewIE, + TVNowSeasonIE, + TVNowAnnualIE, TVNowShowIE, ) from .tvp import ( diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py index 60937616f..3c6a60c39 100644 --- a/youtube_dl/extractor/tvnow.py +++ b/youtube_dl/extractor/tvnow.py @@ -10,8 +10,9 @@ from ..utils import ( int_or_none, parse_iso8601, parse_duration, - try_get, + str_or_none, update_url_query, + urljoin, ) @@ -24,8 +25,7 @@ class TVNowBaseIE(InfoExtractor): def _call_api(self, path, video_id, query): return self._download_json( - 'https://api.tvnow.de/v3/' + path, - video_id, query=query) + 'https://api.tvnow.de/v3/' + path, video_id, query=query) def _extract_video(self, info, display_id): video_id = compat_str(info['id']) @@ -108,6 +108,11 @@ class TVNowIE(TVNowBaseIE): (?!(?:list|jahr)(?:/|$))(?P[^/?\#&]+) ''' + @classmethod + def suitable(cls, url): + return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url) + else super(TVNowIE, cls).suitable(url)) + _TESTS = [{ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', 'info_dict': { @@ -116,7 +121,6 @@ class TVNowIE(TVNowBaseIE): 'ext': 'mp4', 'title': 'Der neue Porsche 911 GT 3', 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', - 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1495994400, 'upload_date': '20170528', 'duration': 5283, @@ -161,136 +165,314 @@ class TVNowIE(TVNowBaseIE): info = self._call_api( 'movies/' + display_id, display_id, query={ 'fields': ','.join(self._VIDEO_FIELDS), - 'station': mobj.group(1), }) return self._extract_video(info, display_id) -class TVNowListBaseIE(TVNowBaseIE): - _SHOW_VALID_URL = r'''(?x) - (?P - https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/ - (?P[^/]+) - ) +class TVNowNewIE(InfoExtractor): + _VALID_URL = r'''(?x) + (?Phttps?:// + (?:www\.)?tvnow\.(?:de|at|ch)/ + (?:shows|serien))/ + (?P[^/]+)-\d+/ + [^/]+/ + episode-\d+-(?P[^/?$&]+)-(?P\d+) ''' - def _extract_list_info(self, display_id, show_id): - fields = list(self._SHOW_FIELDS) - fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) - fields.extend( - 'formatTabs.formatTabPages.container.movies.%s' % field - for field in self._VIDEO_FIELDS) - return self._call_api( - 'formats/seo', display_id, query={ - 'fields': ','.join(fields), - 'name': show_id + '.php' - }) - - -class TVNowListIE(TVNowListBaseIE): - _VALID_URL = r'%s/(?:list|jahr)/(?P[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL - - _SHOW_FIELDS = ('title', ) - _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) - _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', ) - _TESTS = [{ - 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell', - 'info_dict': { - 'id': '28296', - 'title': '30 Minuten Deutschland - Aktuell', - }, - 'playlist_mincount': 1, - }, { - 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14', - 'only_matching': True, - }, { - 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3', + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return (False if TVNowIE.suitable(url) - else super(TVNowListIE, cls).suitable(url)) + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url')) + show, episode = mobj.group('show', 'episode') + return self.url_result( + # Rewrite new URLs to the old format and use extraction via old API + # at api.tvnow.de as a loophole for bypassing premium content checks + '%s/%s/%s' % (base_url, show, episode), + ie=TVNowIE.ie_key(), video_id=mobj.group('id')) + + +class TVNowNewBaseIE(InfoExtractor): + def _call_api(self, path, video_id, query={}): + result = self._download_json( + 'https://apigw.tvnow.de/module/' + path, video_id, query=query) + error = result.get('error') + if error: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + return result + + +""" +TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it +when api.tvnow.de is shut down. This version can't bypass premium checks though. +class TVNowIE(TVNowNewBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/ + (?:shows|serien)/[^/]+/ + (?:[^/]+/)+ + (?P[^/?$&]+)-(?P\d+) + ''' + + _TESTS = [{ + # episode with annual navigation + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', + 'info_dict': { + 'id': '331082', + 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', + 'ext': 'mp4', + 'title': 'Der neue Porsche 911 GT 3', + 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1495994400, + 'upload_date': '20170528', + 'duration': 5283, + 'series': 'GRIP - Das Motormagazin', + 'season_number': 14, + 'episode_number': 405, + 'episode': 'Der neue Porsche 911 GT 3', + }, + }, { + # rtl2, episode with season navigation + 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124', + 'only_matching': True, + }, { + # rtlnitro + 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822', + 'only_matching': True, + }, { + # superrtl + 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120', + 'only_matching': True, + }, { + # ntv + 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630', + 'only_matching': True, + }, { + # vox + 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072', + 'only_matching': True, + }, { + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', + 'only_matching': True, + }] + + def _extract_video(self, info, url, display_id): + config = info['config'] + source = config['source'] + + video_id = compat_str(info.get('id') or source['videoId']) + title = source['title'].strip() + + paths = [] + for manifest_url in (info.get('manifest') or {}).values(): + if not manifest_url: + continue + manifest_url = update_url_query(manifest_url, {'filter': ''}) + path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') + if path in paths: + continue + paths.append(path) + + def url_repl(proto, suffix): + return re.sub( + r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( + r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', + '.ism/' + suffix, manifest_url)) + + formats = self._extract_mpd_formats( + url_repl('dash', '.mpd'), video_id, + mpd_id='dash', fatal=False) + formats.extend(self._extract_ism_formats( + url_repl('hss', 'Manifest'), + video_id, ism_id='mss', fatal=False)) + formats.extend(self._extract_m3u8_formats( + url_repl('hls', '.m3u8'), video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + if formats: + break + else: + if try_get(info, lambda x: x['rights']['isDrm']): + raise ExtractorError( + 'Video %s is DRM protected' % video_id, expected=True) + if try_get(config, lambda x: x['boards']['geoBlocking']['block']): + raise self.raise_geo_restricted() + if not info.get('free', True): + raise ExtractorError( + 'Video %s is not available for free' % video_id, expected=True) + self._sort_formats(formats) + + description = source.get('description') + thumbnail = url_or_none(source.get('poster')) + timestamp = unified_timestamp(source.get('previewStart')) + duration = parse_duration(source.get('length')) + + series = source.get('format') + season_number = int_or_none(self._search_regex( + r'staffel-(\d+)', url, 'season number', default=None)) + episode_number = int_or_none(self._search_regex( + r'episode-(\d+)', url, 'episode number', default=None)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, + 'episode': title, + 'formats': formats, + } def _real_extract(self, url): - base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = self._call_api('player/' + video_id, video_id) + return self._extract_video(info, video_id, display_id) +""" - list_info = self._extract_list_info(season_id, show_id) - season = next( - season for season in list_info['formatTabs']['items'] - if season.get('seoheadline') == season_id) +class TVNowListBaseIE(TVNowNewBaseIE): + _SHOW_VALID_URL = r'''(?x) + (?P + https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/ + [^/?#&]+-(?P\d+) + ) + ''' - title = list_info.get('title') - headline = season.get('headline') - if title and headline: - title = '%s - %s' % (title, headline) - else: - title = headline or title + @classmethod + def suitable(cls, url): + return (False if TVNowNewIE.suitable(url) + else super(TVNowListBaseIE, cls).suitable(url)) + + def _extract_items(self, url, show_id, list_id, query): + items = self._call_api( + 'teaserrow/format/episode/' + show_id, list_id, + query=query)['items'] entries = [] - for container in season['formatTabPages']['items']: - items = try_get( - container, lambda x: x['container']['movies']['items'], - list) or [] - for info in items: - seo_url = info.get('seoUrl') - if not seo_url: - continue - video_id = info.get('id') - entries.append(self.url_result( - '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(), - compat_str(video_id) if video_id else None)) + for item in items: + if not isinstance(item, dict): + continue + item_url = urljoin(url, item.get('url')) + if not item_url: + continue + video_id = str_or_none(item.get('id') or item.get('videoId')) + item_title = item.get('subheadline') or item.get('text') + entries.append(self.url_result( + item_url, ie=TVNowNewIE.ie_key(), video_id=video_id, + video_title=item_title)) - return self.playlist_result( - entries, compat_str(season.get('id') or season_id), title) + return self.playlist_result(entries, '%s/%s' % (show_id, list_id)) + + +class TVNowSeasonIE(TVNowListBaseIE): + _VALID_URL = r'%s/staffel-(?P\d+)' % TVNowListBaseIE._SHOW_VALID_URL + _TESTS = [{ + 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13', + 'info_dict': { + 'id': '1815/13', + }, + 'playlist_mincount': 22, + }] + + def _real_extract(self, url): + _, show_id, season_id = re.match(self._VALID_URL, url).groups() + return self._extract_items( + url, show_id, season_id, {'season': season_id}) + + +class TVNowAnnualIE(TVNowListBaseIE): + _VALID_URL = r'%s/(?P\d{4})-(?P\d{2})' % TVNowListBaseIE._SHOW_VALID_URL + _TESTS = [{ + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05', + 'info_dict': { + 'id': '1669/2017-05', + }, + 'playlist_mincount': 2, + }] + + def _real_extract(self, url): + _, show_id, year, month = re.match(self._VALID_URL, url).groups() + return self._extract_items( + url, show_id, '%s-%s' % (year, month), { + 'year': int(year), + 'month': int(month), + }) class TVNowShowIE(TVNowListBaseIE): _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL - - _SHOW_FIELDS = ('id', 'title', ) - _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) - _VIDEO_FIELDS = () - _TESTS = [{ - 'url': 'https://www.tvnow.at/vox/ab-ins-beet', + # annual navigationType + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669', 'info_dict': { - 'id': 'ab-ins-beet', - 'title': 'Ab ins Beet!', + 'id': '1669', }, - 'playlist_mincount': 7, + 'playlist_mincount': 73, }, { - 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list', - 'only_matching': True, - }, { - 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/', - 'only_matching': True, + # season navigationType + 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471', + 'info_dict': { + 'id': '11471', + }, + 'playlist_mincount': 3, }] @classmethod def suitable(cls, url): - return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url) + return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) else super(TVNowShowIE, cls).suitable(url)) def _real_extract(self, url): base_url, show_id = re.match(self._VALID_URL, url).groups() - list_info = self._extract_list_info(show_id, show_id) + result = self._call_api( + 'teaserrow/format/navigation/' + show_id, show_id) + + items = result['items'] entries = [] - for season_info in list_info['formatTabs']['items']: - season_url = season_info.get('seoheadline') - if not season_url: - continue - season_id = season_info.get('id') - entries.append(self.url_result( - '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(), - compat_str(season_id) if season_id else None, - season_info.get('headline'))) + navigation = result.get('navigationType') + if navigation == 'annual': + for item in items: + if not isinstance(item, dict): + continue + year = int_or_none(item.get('year')) + if year is None: + continue + months = item.get('months') + if not isinstance(months, list): + continue + for month_dict in months: + if not isinstance(month_dict, dict) or not month_dict: + continue + month_number = int_or_none(list(month_dict.keys())[0]) + if month_number is None: + continue + entries.append(self.url_result( + '%s/%04d-%02d' % (base_url, year, month_number), + ie=TVNowAnnualIE.ie_key())) + elif navigation == 'season': + for item in items: + if not isinstance(item, dict): + continue + season_number = int_or_none(item.get('season')) + if season_number is None: + continue + entries.append(self.url_result( + '%s/staffel-%d' % (base_url, season_number), + ie=TVNowSeasonIE.ie_key())) + else: + raise ExtractorError('Unknown navigationType') - return self.playlist_result(entries, show_id, list_info.get('title')) + return self.playlist_result(entries, show_id) From b7acc83550598b9facf1b31d3d9a1c2823b29cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jan 2019 00:55:39 +0700 Subject: [PATCH 03/84] [utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765) --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 62e769fd5..64b524d2f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2968,6 +2968,7 @@ class ISO639Utils(object): 'gv': 'glv', 'ha': 'hau', 'he': 'heb', + 'iw': 'heb', # Replaced by he in 1989 revision 'hi': 'hin', 'ho': 'hmo', 'hr': 'hrv', @@ -2977,6 +2978,7 @@ class ISO639Utils(object): 'hz': 'her', 'ia': 'ina', 'id': 'ind', + 'in': 'ind', # Replaced by id in 1989 revision 'ie': 'ile', 'ig': 'ibo', 'ii': 'iii', @@ -3091,6 +3093,7 @@ class ISO639Utils(object): 'wo': 'wol', 'xh': 'xho', 'yi': 'yid', + 'ji': 'yid', # Replaced by he yi 1989 revision 'yo': 'yor', 'za': 'zha', 'zh': 'zho', From 04fb6928da5e34fb3c7eaf50592a141400e00487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jan 2019 00:57:24 +0700 Subject: [PATCH 04/84] [postprocessor/ffmpeg] Embed subtitles with non-standard language codes (refs #18765) --- youtube_dl/postprocessor/ffmpeg.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 757b496a1..efcd39d57 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -384,9 +384,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): opts += ['-c:s', 'mov_text'] for (i, lang) in enumerate(sub_langs): opts.extend(['-map', '%d:0' % (i + 1)]) - lang_code = ISO639Utils.short2long(lang) - if lang_code is not None: - opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + lang_code = ISO639Utils.short2long(lang) or lang + opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) temp_filename = prepend_extension(filename, 'temp') self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename) From e9a50fba8603128dcb81fa1b59206a0cb106d540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jan 2019 01:02:34 +0700 Subject: [PATCH 05/84] [utils] Fix typo --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 64b524d2f..d2d3c1a9f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3093,7 +3093,7 @@ class ISO639Utils(object): 'wo': 'wol', 'xh': 'xho', 'yi': 'yid', - 'ji': 'yid', # Replaced by he yi 1989 revision + 'ji': 'yid', # Replaced by yi in 1989 revision 'yo': 'yor', 'za': 'zha', 'zh': 'zho', From bcc334a3c66f2465cd63be284e389cc4c7a78203 Mon Sep 17 00:00:00 2001 From: 4rensiker <4rensics@gmx.de> Date: Tue, 8 Jan 2019 02:44:42 +0100 Subject: [PATCH 06/84] [dtube] Fix extraction (closes #18741) --- youtube_dl/extractor/dtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 5887887e1..20190c9cc 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor): def canonical_url(h): if not h: return None - return 'https://ipfs.io/ipfs/' + h + return 'https://video.dtube.top/ipfs/' + h formats = [] for q in ('240', '480', '720', '1080', ''): From 0266854f63baaa9669b30ee49c5e7e9f353970dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 08:46:34 +0700 Subject: [PATCH 07/84] [dtube] Update test --- youtube_dl/extractor/dtube.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 20190c9cc..114d2dbe3 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -15,16 +15,16 @@ from ..utils import ( class DTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P[0-9a-z.-]+)/(?P[0-9a-z]{8})' _TEST = { - 'url': 'https://d.tube/#!/v/benswann/zqd630em', - 'md5': 'a03eaa186618ffa7a3145945543a251e', + 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1', + 'md5': '9f29088fa08d699a7565ee983f56a06e', 'info_dict': { - 'id': 'zqd630em', + 'id': 'x380jtr1', 'ext': 'mp4', - 'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom', - 'description': 'md5:700d164e066b87f9eac057949e4227c2', - 'uploader_id': 'benswann', - 'upload_date': '20180222', - 'timestamp': 1519328958, + 'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks', + 'description': 'md5:60be222088183be3a42f196f34235776', + 'uploader_id': 'broncnutz', + 'upload_date': '20190107', + 'timestamp': 1546854054, }, 'params': { 'format': '480p', From 8cb5c2181ac039e92a20247f266f48cee2411748 Mon Sep 17 00:00:00 2001 From: Awal Garg Date: Mon, 7 Jan 2019 17:41:12 +0530 Subject: [PATCH 08/84] [hungama] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hungama.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/hungama.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b1dfc451..5ff34216f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -469,6 +469,7 @@ from .hrti import ( ) from .huajiao import HuajiaoIE from .huffpost import HuffPostIE +from .hungama import HungamaIE from .hypem import HypemIE from .iconosquare import IconosquareIE from .ign import ( diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py new file mode 100644 index 000000000..e51440299 --- /dev/null +++ b/youtube_dl/extractor/hungama.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class HungamaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)hungama\.com/song/[\w\d-]+/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', + 'md5': '396fa7e8e7e67aa25da0edc4cac9b785', + 'info_dict': { + 'id': '2931166', + 'ext': 'mp4', + 'title': 'Kitni Haseen Zindagi', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_data = self._download_json('https://www.hungama.com/audio-player-data/track/%s?_country=IN' % video_id, video_id)[0] + title = player_data.get('song_name') or self._og_search_title(webpage) + track_data = self._download_json(player_data['file'], video_id) + media_url = track_data['response']['media_url'] + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_m3u8_formats(media_url, video_id, ext='mp4'), + } From 06b4b90c70636d8044b7a2f588503cadf8b202d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 09:09:49 +0700 Subject: [PATCH 09/84] [hungama] Fix code and extract more metadata (closes #18771) --- youtube_dl/extractor/hungama.py | 41 +++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py index e51440299..d9ad1281a 100644 --- a/youtube_dl/extractor/hungama.py +++ b/youtube_dl/extractor/hungama.py @@ -2,31 +2,54 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class HungamaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)hungama\.com/song/[\w\d-]+/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P[0-9]+)' _TEST = { 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', - 'md5': '396fa7e8e7e67aa25da0edc4cac9b785', + 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', 'info_dict': { 'id': '2931166', 'ext': 'mp4', - 'title': 'Kitni Haseen Zindagi', + 'title': 'Lucky Ali - Kitni Haseen Zindagi', + 'track': 'Kitni Haseen Zindagi', + 'artist': 'Lucky Ali', + 'album': 'Aks', + 'release_year': 2000, } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_data = self._download_json('https://www.hungama.com/audio-player-data/track/%s?_country=IN' % video_id, video_id)[0] - title = player_data.get('song_name') or self._og_search_title(webpage) - track_data = self._download_json(player_data['file'], video_id) - media_url = track_data['response']['media_url'] + data = self._download_json( + 'https://www.hungama.com/audio-player-data/track/%s' % video_id, + video_id, query={'_country': 'IN'})[0] + + track = data['song_name'] + artist = data.get('singer_name') + + m3u8_url = self._download_json( + data.get('file') or data['preview_link'], + video_id)['response']['media_url'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + title = '%s - %s' % (artist, track) if artist else track + thumbnail = data.get('img_src') or data.get('album_image') return { 'id': video_id, 'title': title, - 'formats': self._extract_m3u8_formats(media_url, video_id, ext='mp4'), + 'thumbnail': thumbnail, + 'track': track, + 'artist': artist, + 'album': data.get('album_name'), + 'release_year': int_or_none(data.get('date')), + 'formats': formats, } From 391256dc0ee5e6ac8d3022455de207b8b02a5b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 10:02:00 +0700 Subject: [PATCH 10/84] [extractor/common] Add support for movies in _json_ld --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e5f8136fc..f507400cc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1250,6 +1250,13 @@ class InfoExtractor(object): part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) + elif item_type == 'Movie': + info.update({ + 'title': unescapeHTML(e.get('name')), + 'description': unescapeHTML(e.get('description')), + 'duration': parse_duration(e.get('duration')), + 'timestamp': unified_timestamp(e.get('dateCreated')), + }) elif item_type in ('Article', 'NewsArticle'): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), From 440863ade10c70bec272c1ffe17b63c877e46ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 10:02:49 +0700 Subject: [PATCH 11/84] [extractor/common] Use episode name as title in _json_ld --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f507400cc..9e7febcad 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1239,11 +1239,14 @@ class InfoExtractor(object): if expected_type is not None and expected_type != item_type: return info if item_type in ('TVEpisode', 'Episode'): + episode_name = unescapeHTML(e.get('name')) info.update({ - 'episode': unescapeHTML(e.get('name')), + 'episode': episode_name, 'episode_number': int_or_none(e.get('episodeNumber')), 'description': unescapeHTML(e.get('description')), }) + if not info.get('title') and episode_name: + info['title'] = episode_name part_of_season = e.get('partOfSeason') if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) From 2543938bbe393ceef8dca6a69b441d54df099107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 10:03:44 +0700 Subject: [PATCH 12/84] [hungama] Add support for videos (closes #17402) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/hungama.py | 78 +++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ff34216f..ddeb70284 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -469,7 +469,10 @@ from .hrti import ( ) from .huajiao import HuajiaoIE from .huffpost import HuffPostIE -from .hungama import HungamaIE +from .hungama import ( + HungamaIE, + HungamaSongIE, +) from .hypem import HypemIE from .iconosquare import IconosquareIE from .ign import ( diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py index d9ad1281a..3fdaac5b6 100644 --- a/youtube_dl/extractor/hungama.py +++ b/youtube_dl/extractor/hungama.py @@ -2,11 +2,73 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + urlencode_postdata, +) class HungamaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P[0-9]+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?hungama\.com/ + (?: + (?:video|movie)/[^/]+/| + tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://www.hungama.com/video/krishna-chants/39349649/', + 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', + 'info_dict': { + 'id': '2931166', + 'ext': 'mp4', + 'title': 'Lucky Ali - Kitni Haseen Zindagi', + 'track': 'Kitni Haseen Zindagi', + 'artist': 'Lucky Ali', + 'album': 'Aks', + 'release_year': 2000, + } + }, { + 'url': 'https://www.hungama.com/movie/kahaani-2/44129919/', + 'only_matching': True, + }, { + 'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + info = self._search_json_ld(webpage, video_id) + + m3u8_url = self._download_json( + 'https://www.hungama.com/index.php', video_id, + data=urlencode_postdata({'content_id': video_id}), headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest', + }, query={ + 'c': 'common', + 'm': 'get_video_mdn_url', + })['stream_url'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + info.update({ + 'id': video_id, + 'formats': formats, + }) + return info + + +class HungamaSongIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P\d+)' _TEST = { 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', @@ -22,21 +84,21 @@ class HungamaIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + audio_id = self._match_id(url) data = self._download_json( - 'https://www.hungama.com/audio-player-data/track/%s' % video_id, - video_id, query={'_country': 'IN'})[0] + 'https://www.hungama.com/audio-player-data/track/%s' % audio_id, + audio_id, query={'_country': 'IN'})[0] track = data['song_name'] artist = data.get('singer_name') m3u8_url = self._download_json( data.get('file') or data['preview_link'], - video_id)['response']['media_url'] + audio_id)['response']['media_url'] formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) @@ -44,7 +106,7 @@ class HungamaIE(InfoExtractor): thumbnail = data.get('img_src') or data.get('album_image') return { - 'id': video_id, + 'id': audio_id, 'title': title, 'thumbnail': thumbnail, 'track': track, From 6089ff40e7cc7710e399db1be87fea103a190ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Jan 2019 00:37:01 +0700 Subject: [PATCH 13/84] [youporn] Fix title and description extraction (closes #18748) --- youtube_dl/extractor/youporn.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index ea0bce784..d4eccb4b2 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -68,11 +68,9 @@ class YouPornIE(InfoExtractor): request.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(request, display_id) - title = self._search_regex( - [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], - webpage, 'title', group='title', - default=None) or self._og_search_title( + title = self._html_search_regex( + r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', + webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or self._html_search_meta( 'title', webpage, fatal=True) @@ -134,7 +132,11 @@ class YouPornIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - description = self._og_search_description(webpage, default=None) + description = self._html_search_regex( + r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>', + webpage, 'description', + default=None) or self._og_search_description( + webpage, default=None) thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') From 3c1089dba41f137dcc6c373daacdc19a24aef81c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Jan 2019 14:23:26 +0100 Subject: [PATCH 14/84] [gaia] Add new extractor(#14605) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/gaia.py | 98 ++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 youtube_dl/extractor/gaia.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ddeb70284..c7d04d366 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -411,6 +411,7 @@ from .funk import ( from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE +from .gaia import GaiaIE from .gameinformer import GameInformerIE from .gameone import ( GameOneIE, diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py new file mode 100644 index 000000000..f2eef3f4c --- /dev/null +++ b/youtube_dl/extractor/gaia.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + str_or_none, + strip_or_none, + try_get, +) + + +class GaiaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)' + _TESTS = [{ + 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature', + 'info_dict': { + 'id': '89356', + 'ext': 'mp4', + 'title': 'Connecting with Universal Consciousness', + 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', + 'upload_date': '20151116', + 'timestamp': 1447707266, + 'duration': 936, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview', + 'info_dict': { + 'id': '89351', + 'ext': 'mp4', + 'title': 'Connecting with Universal Consciousness', + 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', + 'upload_date': '20151116', + 'timestamp': 1447707266, + 'duration': 53, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id, vtype = re.search(self._VALID_URL, url).groups() + node_id = self._download_json( + 'https://brooklyn.gaia.com/pathinfo', display_id, query={ + 'path': 'video/' + display_id, + })['id'] + node = self._download_json( + 'https://brooklyn.gaia.com/node/%d' % node_id, node_id) + vdata = node[vtype] + media_id = compat_str(vdata['nid']) + title = node['title'] + + media = self._download_json( + 'https://brooklyn.gaia.com/media/' + media_id, media_id) + formats = self._extract_m3u8_formats( + media['mediaUrls']['bcHLS'], media_id, 'mp4') + self._sort_formats(formats) + + subtitles = {} + text_tracks = media.get('textTracks', {}) + for key in ('captions', 'subtitles'): + for lang, sub_url in text_tracks.get(key, {}).items(): + subtitles.setdefault(lang, []).append({ + 'url': sub_url, + }) + + fivestar = node.get('fivestar', {}) + fields = node.get('fields', {}) + + def get_field_value(key, value_key='value'): + return try_get(fields, lambda x: x[key][0][value_key]) + + return { + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')), + 'timestamp': int_or_none(node.get('created')), + 'subtitles': subtitles, + 'duration': int_or_none(vdata.get('duration')), + 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])), + 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])), + 'comment_count': int_or_none(node.get('comment_count')), + 'series': try_get(node, lambda x: x['series']['title'], compat_str), + 'season_number': int_or_none(get_field_value('season')), + 'season_id': str_or_none(get_field_value('series_nid', 'nid')), + 'episode_number': int_or_none(get_field_value('episode')), + } From 65615be36874fe98d5a972546e8024cda3879c86 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Jan 2019 19:17:58 +0100 Subject: [PATCH 15/84] [globo] set GLBID cookie manually(closes #17346) --- youtube_dl/extractor/globo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index c2140c362..fb8f7679b 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -72,7 +72,7 @@ class GloboIE(InfoExtractor): return try: - self._download_json( + glb_id = (self._download_json( 'https://login.globo.com/api/authentication', None, data=json.dumps({ 'payload': { 'email': email, @@ -81,7 +81,9 @@ class GloboIE(InfoExtractor): }, }).encode(), headers={ 'Content-Type': 'application/json; charset=utf-8', - }) + }) or {}).get('glbId') + if glb_id: + self._set_cookie('.globo.com', 'GLBID', glb_id) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: resp = self._parse_json(e.cause.read(), None) From 4ad159c7b0ce476171aad1eaa8a36f7b9b1cce65 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Jan 2019 20:39:48 +0100 Subject: [PATCH 16/84] [playplustv] Add new extractor(closes #18789) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/playplustv.py | 109 +++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 youtube_dl/extractor/playplustv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c7d04d366..7d7e4247a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -861,6 +861,7 @@ from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE from .playfm import PlayFMIE +from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py new file mode 100644 index 000000000..419c2974b --- /dev/null +++ b/youtube_dl/extractor/playplustv.py @@ -0,0 +1,109 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + PUTRequest, +) + + +class PlayPlusTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playplus\.tv/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _TEST = { + 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', + 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', + 'info_dict': { + 'id': 'db8d274a5163424e967f35a30ddafb8e', + 'ext': 'mp4', + 'title': 'Capítulo 179 - Final', + 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', + 'timestamp': 1529992740, + 'upload_date': '20180626', + }, + 'skip': 'Requires account credential', + } + _NETRC_MACHINE = 'playplustv' + _GEO_COUNTRIES = ['BR'] + _token = None + _profile_id = None + + def _call_api(self, resource, video_id=None, query=None): + return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ + 'Authorization': 'Bearer ' + self._token, + }, query=query) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + req = PUTRequest( + 'https://api.playplus.tv/api/web/login', json.dumps({ + 'email': email, + 'password': password, + }).encode(), { + 'Content-Type': 'application/json; charset=utf-8', + }) + + try: + self._token = self._download_json(req, None)['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(self._parse_json( + e.cause.read(), None)['errorMessage'], expected=True) + raise + + self._profile = self._call_api('Profiles')['list'][0]['_id'] + + def _real_extract(self, url): + project_id, media_id = re.match(self._VALID_URL, url).groups() + media = self._call_api( + 'Media', media_id, { + 'profileId': self._profile, + 'projectId': project_id, + 'mediaId': media_id, + })['obj'] + title = media['title'] + + formats = [] + for f in media.get('files', []): + f_url = f.get('url') + if not f_url: + continue + file_info = f.get('fileInfo') or {} + formats.append({ + 'url': f_url, + 'width': int_or_none(file_info.get('width')), + 'height': int_or_none(file_info.get('height')), + }) + self._sort_formats(formats) + + thumbnails = [] + for thumb in media.get('thumbs', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + return { + 'id': media_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': clean_html(media.get('description')) or media.get('shortDescription'), + 'timestamp': int_or_none(media.get('publishDate'), 1000), + 'view_count': int_or_none(media.get('numberOfViews')), + 'comment_count': int_or_none(media.get('numberOfComments')), + 'tags': media.get('tags'), + } From 96c186e1fd8af4943eeaa8c858f9cb0f15e8b265 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 09:05:00 +0100 Subject: [PATCH 17/84] [fox] add support National Geographic(closes #17985)(closes #15333)(closes #14698) --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/fox.py | 93 +++++++------- youtube_dl/extractor/nationalgeographic.py | 135 --------------------- 3 files changed, 48 insertions(+), 186 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7d7e4247a..a01d99b85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -687,11 +687,7 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .nationalgeographic import ( - NationalGeographicVideoIE, - NationalGeographicIE, - NationalGeographicEpisodeGuideIE, -) +from .nationalgeographic import NationalGeographicVideoIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 11d6c9c32..b1c91f095 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +# import json +# import uuid + from .adobepass import AdobePassIE -from .uplynk import UplynkPreplayIE -from ..compat import compat_str from ..utils import ( - HEADRequest, int_or_none, parse_age_limit, parse_duration, @@ -16,7 +16,7 @@ from ..utils import ( class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' + _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)' _TESTS = [{ # clip 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', @@ -43,41 +43,47 @@ class FOXIE(AdobePassIE): # episode, geo-restricted, tv provided required 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, + }, { + 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', + 'only_matching': True, }] + # _access_token = None + + # def _call_api(self, path, video_id, data=None): + # headers = { + # 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + # } + # if self._access_token: + # headers['Authorization'] = 'Bearer ' + self._access_token + # return self._download_json( + # 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers) + + # def _real_initialize(self): + # self._access_token = self._call_api( + # 'login', None, json.dumps({ + # 'deviceId': compat_str(uuid.uuid4()), + # }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( - 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id, + 'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id, video_id, headers={ 'apikey': 'abdcbed02c124d393b39e818a4312055', 'Content-Type': 'application/json', 'Referer': url, }) + # video = self._call_api('vodplayer/' + video_id, video_id) title = video['name'] release_url = video['videoRelease']['url'] - - description = video.get('description') - duration = int_or_none(video.get('durationInSeconds')) or int_or_none( - video.get('duration')) or parse_duration(video.get('duration')) - timestamp = unified_timestamp(video.get('datePublished')) - rating = video.get('contentRating') - age_limit = parse_age_limit(rating) + # release_url = video['url'] data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} - creator = data.get('brand') or data.get('network') or video.get('network') - - series = video.get('seriesName') or data.get( - 'seriesName') or data.get('show') - season_number = int_or_none(video.get('seasonNumber')) - episode = video.get('name') - episode_number = int_or_none(video.get('episodeNumber')) - release_year = int_or_none(video.get('releaseYear')) - + rating = video.get('contentRating') if data.get('authRequired'): resource = self._get_mvpd_resource( 'fbc-fox', title, video.get('guid'), rating) @@ -86,6 +92,18 @@ class FOXIE(AdobePassIE): 'auth': self._extract_mvpd_auth( url, video_id, 'fbc-fox', resource) }) + m3u8_url = self._download_json(release_url, video_id)['playURL'] + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( + video.get('duration')) or parse_duration(video.get('duration')) + timestamp = unified_timestamp(video.get('datePublished')) + creator = data.get('brand') or data.get('network') or video.get('network') + series = video.get('seriesName') or data.get( + 'seriesName') or data.get('show') subtitles = {} for doc_rel in video.get('documentReleases', []): @@ -98,36 +116,19 @@ class FOXIE(AdobePassIE): }] break - info = { + return { 'id': video_id, 'title': title, - 'description': description, + 'formats': formats, + 'description': video.get('description'), 'duration': duration, 'timestamp': timestamp, - 'age_limit': age_limit, + 'age_limit': parse_age_limit(rating), 'creator': creator, 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'release_year': release_year, + 'season_number': int_or_none(video.get('seasonNumber')), + 'episode': video.get('name'), + 'episode_number': int_or_none(video.get('episodeNumber')), + 'release_year': int_or_none(video.get('releaseYear')), 'subtitles': subtitles, } - - urlh = self._request_webpage(HEADRequest(release_url), video_id) - video_url = compat_str(urlh.geturl()) - - if UplynkPreplayIE.suitable(video_url): - info.update({ - '_type': 'url_transparent', - 'url': video_url, - 'ie_key': UplynkPreplayIE.ie_key(), - }) - else: - m3u8_url = self._download_json(release_url, video_id)['playURL'] - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) - info['formats'] = formats - return info diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 4d2ee6408..165964ca0 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -1,15 +1,9 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from .adobepass import AdobePassIE -from .theplatform import ThePlatformIE from ..utils import ( smuggle_url, url_basename, - update_url_query, - get_element_by_class, ) @@ -64,132 +58,3 @@ class NationalGeographicVideoIE(InfoExtractor): {'force_smil_url': True}), 'id': guid, } - - -class NationalGeographicIE(ThePlatformIE, AdobePassIE): - IE_NAME = 'natgeo' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)' - - _TESTS = [ - { - 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/', - 'md5': '518c9aa655686cf81493af5cc21e2a04', - 'info_dict': { - 'id': 'vKInpacll2pC', - 'ext': 'mp4', - 'title': 'Uncovering a Universal Knowledge', - 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', - 'timestamp': 1458680907, - 'upload_date': '20160322', - 'uploader': 'NEWA-FNG-NGTV', - }, - 'add_ie': ['ThePlatform'], - }, - { - 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/', - 'md5': 'c4912f656b4cbe58f3e000c489360989', - 'info_dict': { - 'id': 'Pok5lWCkiEFA', - 'ext': 'mp4', - 'title': 'The Stunning Red Bird of Paradise', - 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', - 'timestamp': 1459362152, - 'upload_date': '20160330', - 'uploader': 'NEWA-FNG-NGTV', - }, - 'add_ie': ['ThePlatform'], - }, - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - release_url = self._search_regex( - r'video_auth_playlist_url\s*=\s*"([^"]+)"', - webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') - video_id = theplatform_path.split('/')[-1] - query = { - 'mbr': 'true', - } - is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False) - if is_auth == 'auth': - auth_resource_id = self._search_regex( - r"video_auth_resourceId\s*=\s*'([^']+)'", - webpage, 'auth resource id') - query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id) - - formats = [] - subtitles = {} - for key, value in (('switch', 'http'), ('manifest', 'm3u')): - tp_query = query.copy() - tp_query.update({ - key: value, - }) - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value) - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) - self._sort_formats(formats) - - info = self._extract_theplatform_metadata(theplatform_path, display_id) - info.update({ - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'display_id': display_id, - }) - return info - - -class NationalGeographicEpisodeGuideIE(InfoExtractor): - IE_NAME = 'natgeo:episodeguide' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide' - _TESTS = [ - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/', - 'info_dict': { - 'id': 'the-story-of-god-with-morgan-freeman-season-1', - 'title': 'The Story of God with Morgan Freeman - Season 1', - }, - 'playlist_mincount': 6, - }, - { - 'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2', - 'info_dict': { - 'id': 'underworld-inc-season-2', - 'title': 'Underworld, Inc. - Season 2', - }, - 'playlist_mincount': 7, - }, - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - show = get_element_by_class('show', webpage) - selected_season = self._search_regex( - r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>', - webpage, 'selected season') - entries = [ - self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic') - for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)] - return self.playlist_result( - entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')), - '%s - %s' % (show, selected_season)) From 7c072f00d66f3b3f44d301059443480a5c83d3c0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 10:50:18 +0100 Subject: [PATCH 18/84] [jwplatform] use JW Platform Delivery API V2 and add support for more urls --- youtube_dl/extractor/jwplatform.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 63d0dc998..d19a6a774 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,8 +7,8 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' - _TEST = { + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' + _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', 'info_dict': { @@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor): 'upload_date': '20081127', 'timestamp': 1227796140, } - } + }, { + 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js', + 'only_matching': True, + }] @staticmethod def _extract_url(webpage): @@ -34,5 +37,5 @@ class JWPlatformIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id) + json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id) return self._parse_jwplayer_data(json_data, video_id) From 432aba1c5e68cf8efccb868107785a15617d5109 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 10:54:46 +0100 Subject: [PATCH 19/84] [outsidetv] Add new extractor(closes #18774) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/outsidetv.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 youtube_dl/extractor/outsidetv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a01d99b85..de38c6641 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -829,6 +829,7 @@ from .orf import ( ORFOE1IE, ORFIPTVIE, ) +from .outsidetv import OutsideTVIE from .packtpub import ( PacktPubIE, PacktPubCourseIE, diff --git a/youtube_dl/extractor/outsidetv.py b/youtube_dl/extractor/outsidetv.py new file mode 100644 index 000000000..c5333b08c --- /dev/null +++ b/youtube_dl/extractor/outsidetv.py @@ -0,0 +1,28 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OutsideTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})' + _TESTS = [{ + 'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4', + 'md5': '192d968fedc10b2f70ec31865ffba0da', + 'info_dict': { + 'id': 'Hdg0jukV', + 'ext': 'mp4', + 'title': 'Home - Jackson Ep 1 | Arbor Snowboards', + 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd', + 'upload_date': '20181225', + 'timestamp': 1545742800, + } + }, { + 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4', + 'only_matching': True, + }] + + def _real_extract(self, url): + jw_media_id = self._match_id(url) + return self.url_result( + 'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id) From c3e543893bfba7faa7c13e53fbe6b60f936b81f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Jan 2019 22:46:53 +0700 Subject: [PATCH 20/84] [youtube] Extract live HLS URL from player response (closes #18799) --- youtube_dl/extractor/youtube.py | 55 +++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 539ff6ff2..29773877e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1931,31 +1931,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'http_chunk_size': 10485760, } formats.append(dct) - elif video_info.get('hlsvp'): - manifest_url = video_info['hlsvp'][0] - formats = [] - m3u8_formats = self._extract_m3u8_formats( - manifest_url, video_id, 'mp4', fatal=False) - for a_format in m3u8_formats: - itag = self._search_regex( - r'/itag/(\d+)/', a_format['url'], 'itag', default=None) - if itag: - a_format['format_id'] = itag - if itag in self._formats: - dct = self._formats[itag].copy() - dct.update(a_format) - a_format = dct - a_format['player_url'] = player_url - # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming - a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' - formats.append(a_format) else: - error_message = clean_html(video_info.get('reason', [None])[0]) - if not error_message: - error_message = extract_unavailable_message() - if error_message: - raise ExtractorError(error_message, expected=True) - raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') + manifest_url = ( + url_or_none(try_get( + player_response, + lambda x: x['streamingData']['hlsManifestUrl'], + compat_str)) or + url_or_none(try_get( + video_info, lambda x: x['hlsvp'][0], compat_str))) + if manifest_url: + formats = [] + m3u8_formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', fatal=False) + for a_format in m3u8_formats: + itag = self._search_regex( + r'/itag/(\d+)/', a_format['url'], 'itag', default=None) + if itag: + a_format['format_id'] = itag + if itag in self._formats: + dct = self._formats[itag].copy() + dct.update(a_format) + a_format = dct + a_format['player_url'] = player_url + # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming + a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' + formats.append(a_format) + else: + error_message = clean_html(video_info.get('reason', [None])[0]) + if not error_message: + error_message = extract_unavailable_message() + if error_message: + raise ExtractorError(error_message, expected=True) + raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info') # uploader video_uploader = try_get( From a4491dd55c5feaf02ac8969caafd637265cbeede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Jan 2019 23:23:19 +0700 Subject: [PATCH 21/84] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 41d190a72..c7d090c6c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version <unreleased> + +Core +* [extractor/common] Use episode name as title in _json_ld ++ [extractor/common] Add support for movies in _json_ld +* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes + (#18765) ++ [utils] Add language codes replaced in 1989 revision of ISO 639 + to ISO639Utils (#18765) + +Extractors +* [youtube] Extract live HLS URL from player response (#18799) ++ [outsidetv] Add support for outsidetv.com (#18774) +* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs ++ [fox] Add support National Geographic (#17985, #15333, #14698) ++ [playplustv] Add support for playplus.tv (#18789) +* [globo] Set GLBID cookie manually (#17346) ++ [gaia] Add support for gaia.com (#14605) +* [youporn] Fix title and description extraction (#18748) ++ [hungama] Add support for hungama.com (#17402, #18771) +* [dtube] Fix extraction (#18741) +* [tvnow] Fix and rework extractors and prepare for a switch to the new API + (#17245, #18499) +* [carambatv:page] Fix extraction (#18739) + + version 2019.01.02 Extractors From b64f6e690ff84052751c593a8826a40a63cb1d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Jan 2019 23:26:54 +0700 Subject: [PATCH 22/84] release 2019.01.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 11 ++++++++--- youtube_dl/version.py | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3986bbc8e..d6abdbcb7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.02 +[debug] youtube-dl version 2019.01.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c7d090c6c..3d60754c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.10 Core * [extractor/common] Use episode name as title in _json_ld diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e92064432..c01409419 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -320,6 +320,7 @@ - **Fusion** - **Fux** - **FXNetworks** + - **Gaia** - **GameInformer** - **GameOne** - **gameone:playlist** @@ -370,6 +371,8 @@ - **HRTiPlaylist** - **Huajiao**: 花椒直播 - **HuffPost**: Huffington Post + - **Hungama** + - **HungamaSong** - **Hypem** - **Iconosquare** - **ign.com** @@ -540,8 +543,6 @@ - **MyviEmbed** - **MyVisionTV** - **n-tv.de** - - **natgeo** - - **natgeo:episodeguide** - **natgeo:video** - **Naver** - **NBA** @@ -642,6 +643,7 @@ - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek - **OsnatelTV** + - **OutsideTV** - **PacktPub** - **PacktPubCourse** - **PandaTV**: 熊猫TV @@ -666,6 +668,7 @@ - **Pinkbike** - **Pladform** - **play.fm** + - **PlayPlusTV** - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** @@ -934,7 +937,9 @@ - **TVNet** - **TVNoe** - **TVNow** - - **TVNowList** + - **TVNowAnnual** + - **TVNowNew** + - **TVNowSeason** - **TVNowShow** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 07a444706..5ba61f489 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.02' +__version__ = '2019.01.10' From c469e8808c723d6ba090c0b33c292622a3cb555e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 18:48:52 +0100 Subject: [PATCH 23/84] [playplustv] add support for playplus.com(#18789) --- youtube_dl/extractor/playplustv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py index 419c2974b..1e30ab23a 100644 --- a/youtube_dl/extractor/playplustv.py +++ b/youtube_dl/extractor/playplustv.py @@ -15,7 +15,7 @@ from ..utils import ( class PlayPlusTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?playplus\.tv/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' _TEST = { 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', From a64646e417615173c9436887116402c9c4c9f6ee Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 11 Jan 2019 15:09:44 +0100 Subject: [PATCH 24/84] [postprocessor/ffmpeg] sanitize ffmpeg version for Ubuntu and Arch Linux systems(closes #18813) --- youtube_dl/postprocessor/ffmpeg.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index efcd39d57..173bdbe68 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -79,6 +79,19 @@ class FFmpegPostProcessor(PostProcessor): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] prefer_ffmpeg = True + def get_ffmpeg_version(path): + ver = get_exe_version(path, args=['-version']) + if ver: + regexs = [ + r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu + r'n([0-9.]+)$', # Arch Linux + ] + for regex in regexs: + mobj = re.match(regex, ver) + if mobj: + ver = mobj.group(1) + return ver + self.basename = None self.probe_basename = None @@ -110,11 +123,10 @@ class FFmpegPostProcessor(PostProcessor): self._paths = dict( (p, os.path.join(location, p)) for p in programs) self._versions = dict( - (p, get_exe_version(self._paths[p], args=['-version'])) - for p in programs) + (p, get_ffmpeg_version(self._paths[p])) for p in programs) if self._versions is None: self._versions = dict( - (p, get_exe_version(p, args=['-version'])) for p in programs) + (p, get_ffmpeg_version(p)) for p in programs) self._paths = dict((p, p) for p in programs) if prefer_ffmpeg is False: From 5caa531a1a7e070e3bef8cc60c3d00c3f7ab1e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 11 Jan 2019 23:47:23 +0700 Subject: [PATCH 25/84] [postprocessor/ffmpeg] PEP 8 --- youtube_dl/postprocessor/ffmpeg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 173bdbe68..a9fafa363 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -83,8 +83,8 @@ class FFmpegPostProcessor(PostProcessor): ver = get_exe_version(path, args=['-version']) if ver: regexs = [ - r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu - r'n([0-9.]+)$', # Arch Linux + r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu + r'n([0-9.]+)$', # Arch Linux ] for regex in regexs: mobj = re.match(regex, ver) From cbdc688c41876714f1d7e2f82ba9d11f6695448a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 12 Jan 2019 00:30:06 +0700 Subject: [PATCH 26/84] [postprocessor/ffmpeg] Relax ubuntu ffmpeg version regex --- youtube_dl/postprocessor/ffmpeg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index a9fafa363..39a905380 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -83,8 +83,9 @@ class FFmpegPostProcessor(PostProcessor): ver = get_exe_version(path, args=['-version']) if ver: regexs = [ - r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu + r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] r'n([0-9.]+)$', # Arch Linux + # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/ ] for regex in regexs: mobj = re.match(regex, ver) From 60a899bb7ed583b4c40289925c8127e746c683ea Mon Sep 17 00:00:00 2001 From: Atlas Sullivan <AtlasJan@gmx.com> Date: Fri, 11 Jan 2019 19:15:48 +0000 Subject: [PATCH 27/84] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 901595444..70bcfaccf 100644 --- a/README.md +++ b/README.md @@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names. **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are: - `id` (string): Video identifier - `title` (string): Video title From ed8db0a25c93f5611dfae81ee8f15cbc177bd0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 12 Jan 2019 04:56:17 +0700 Subject: [PATCH 28/84] [wistia] Extend _VALID_URL (closes #18823) --- youtube_dl/extractor/wistia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 01a51275e..fa142b974 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -12,7 +12,7 @@ from ..utils import ( class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -38,6 +38,9 @@ class WistiaIE(InfoExtractor): }, { 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', 'only_matching': True, + }, { + 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', + 'only_matching': True, }] @staticmethod From d65f6e734b5c00acbb396f4569907f7957e1cbef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 13 Jan 2019 03:57:31 +0700 Subject: [PATCH 29/84] [bitchute] Check formats (#18833) --- youtube_dl/extractor/bitchute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index aa034355a..4f39424f5 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -55,6 +55,7 @@ class BitChuteIE(InfoExtractor): formats = [ {'url': format_url} for format_url in orderedSet(format_urls)] + self._check_formats(formats, video_id) self._sort_formats(formats) description = self._html_search_regex( From f1ab3b7de7667c0382bf6eb6364ccab4260c0654 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 13 Jan 2019 10:01:26 +0100 Subject: [PATCH 30/84] [downloader/hls] fix uplynk ad skipping(closes #18824) --- youtube_dl/downloader/hls.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index fd304527e..4def8e2d5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -75,10 +75,14 @@ class HlsFD(FragmentFD): fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) - def is_ad_fragment(s): + def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + def is_ad_fragment_end(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or + s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + media_frags = 0 ad_frags = 0 ad_frag_next = False @@ -87,12 +91,13 @@ class HlsFD(FragmentFD): if not line: continue if line.startswith('#'): - if is_ad_fragment(line): - ad_frags += 1 + if is_ad_fragment_start(line): ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False continue if ad_frag_next: - ad_frag_next = False + ad_frags += 1 continue media_frags += 1 @@ -123,7 +128,6 @@ class HlsFD(FragmentFD): if line: if not line.startswith('#'): if ad_frag_next: - ad_frag_next = False continue frag_index += 1 if frag_index <= ctx['fragment_index']: @@ -196,8 +200,10 @@ class HlsFD(FragmentFD): 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } - elif is_ad_fragment(line): + elif is_ad_fragment_start(line): ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False self._finish_frag_download(ctx) From 3b983ee471305946709dcb83fa3799fc26a7db03 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 13 Jan 2019 15:46:54 +0100 Subject: [PATCH 31/84] [curiositystream] add support for non app urls --- youtube_dl/extractor/curiositystream.py | 56 +++++++++++++------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index 35b1e7a34..e4a7fca6c 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor): self._handle_errors(result) self._auth_token = result['message']['auth_token'] - def _extract_media_info(self, media): - video_id = compat_str(media['id']) + +class CuriosityStreamIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/video/2', + 'md5': '262bb2f257ff301115f1973540de8983', + 'info_dict': { + 'id': '2', + 'ext': 'mp4', + 'title': 'How Did You Develop The Internet?', + 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('media/' + video_id, video_id) title = media['title'] formats = [] @@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor): } -class CuriosityStreamIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream' - _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' - _TEST = { - 'url': 'https://app.curiositystream.com/video/2', - 'md5': '262bb2f257ff301115f1973540de8983', - 'info_dict': { - 'id': '2', - 'ext': 'mp4', - 'title': 'How Did You Develop The Internet?', - 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - media = self._call_api('media/' + video_id, video_id) - return self._extract_media_info(media) - - class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)' + _TESTS = [{ 'url': 'https://app.curiositystream.com/collection/2', 'info_dict': { 'id': '2', 'title': 'Curious Minds: The Internet', 'description': 'How is the internet shaping our lives in the 21st Century?', }, - 'playlist_mincount': 12, - } + 'playlist_mincount': 17, + }, { + 'url': 'https://curiositystream.com/series/2', + 'only_matching': True, + }] def _real_extract(self, url): collection_id = self._match_id(url) @@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): 'collections/' + collection_id, collection_id) entries = [] for media in collection.get('media', []): - entries.append(self._extract_media_info(media)) + media_id = compat_str(media.get('id')) + entries.append(self.url_result( + 'https://curiositystream.com/video/' + media_id, + CuriosityStreamIE.ie_key(), media_id)) return self.playlist_result( entries, collection_id, collection.get('title'), collection.get('description')) From 10026329c2534635876921da229f382098b8f8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 14 Jan 2019 23:23:51 +0700 Subject: [PATCH 32/84] [skylinewebcams] Fix extraction (closes #18853) --- youtube_dl/extractor/skylinewebcams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py index 5b4aaac6f..b7f8ac736 100644 --- a/youtube_dl/extractor/skylinewebcams.py +++ b/youtube_dl/extractor/skylinewebcams.py @@ -26,7 +26,7 @@ class SkylineWebcamsIE(InfoExtractor): webpage = self._download_webpage(url, video_id) stream_url = self._search_regex( - r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, + r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, 'stream url', group='url') title = self._og_search_title(webpage) From 929ba3997b026fd9fafe15d447732d8ccdf367bb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Jan 2019 10:23:59 +0100 Subject: [PATCH 33/84] [funimation] fix extraction(closes #14089) --- youtube_dl/extractor/funimation.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 07d01caec..8bbedca26 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import random +import string + from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor): video_id = title_data.get('id') or self._search_regex([ r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", - r'<iframe[^>]+src="/player/(\d+)"', + r'<iframe[^>]+src="/player/(\d+)', ], webpage, 'video_id', default=None) if not video_id: player_url = self._html_search_meta([ @@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor): if self._TOKEN: headers['Authorization'] = 'Token %s' % self._TOKEN sources = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id, - video_id, headers=headers)['items'] + 'https://www.funimation.com/api/showexperience/%s/' % video_id, + video_id, headers=headers, query={ + 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), + })['items'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: error = self._parse_json(e.cause.read(), video_id)['errors'][0] From 561b456e2d10e60b74a1bee6154bb7fdb0bfaf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 01:12:58 +0700 Subject: [PATCH 34/84] [youtube] Extract DASH formats from player response (closes #18804) --- youtube_dl/extractor/youtube.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 29773877e..cca149107 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1545,6 +1545,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if dash_mpd and dash_mpd[0] not in dash_mpds: dash_mpds.append(dash_mpd[0]) + def add_dash_mpd_pr(pl_response): + dash_mpd = url_or_none(try_get( + pl_response, lambda x: x['streamingData']['dashManifestUrl'], + compat_str)) + if dash_mpd and dash_mpd not in dash_mpds: + dash_mpds.append(dash_mpd) + is_live = None view_count = None @@ -1602,6 +1609,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if isinstance(pl_response, dict): player_response = pl_response if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): + add_dash_mpd_pr(player_response) # We also try looking in get_video_info since it may contain different dashmpd # URL that points to a DASH manifest with possibly different itag set (some itags # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH @@ -1633,6 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): pl_response = get_video_info.get('player_response', [None])[0] if isinstance(pl_response, dict): player_response = pl_response + add_dash_mpd_pr(player_response) add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) From 2f483bc1c389709623117079439708783122b5ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 01:28:50 +0700 Subject: [PATCH 35/84] [youtube] Skip unsupported adaptive stream type (#18804) --- youtube_dl/extractor/youtube.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cca149107..5e93b5329 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1827,6 +1827,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url_data = compat_parse_qs(url_data_str) if 'itag' not in url_data or 'url' not in url_data: continue + stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) + # Unsupported FORMAT_STREAM_TYPE_OTF + if stream_type == 3: + continue format_id = url_data['itag'][0] url = url_data['url'][0] From a16c7c033a161f310b69d444edc9dbf67cfc49ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:17:49 +0700 Subject: [PATCH 36/84] [test/helper] Add support for maxcount and count collection len test checkers --- test/helper.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/test/helper.py b/test/helper.py index aa9a1c9b2..e62aab11e 100644 --- a/test/helper.py +++ b/test/helper.py @@ -153,15 +153,27 @@ def expect_value(self, got, expected, field): isinstance(got, compat_str), 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) got = 'md5:' + md5(got) - elif isinstance(expected, compat_str) and expected.startswith('mincount:'): + elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected): self.assertTrue( isinstance(got, (list, dict)), 'Expected field %s to be a list or a dict, but it is of type %s' % ( field, type(got).__name__)) - expected_num = int(expected.partition(':')[2]) - assertGreaterEqual( + op, _, expected_num = expected.partition(':') + expected_num = int(expected_num) + if op == 'mincount': + assert_func = assertGreaterEqual + msg_tmpl = 'Expected %d items in field %s, but only got %d' + elif op == 'maxcount': + assert_func = assertLessEqual + msg_tmpl = 'Expected maximum %d items in field %s, but got %d' + elif op == 'count': + assert_func = assertEqual + msg_tmpl = 'Expected exactly %d items in field %s, but got %d' + else: + assert False + assert_func( self, len(got), expected_num, - 'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got))) + msg_tmpl % (expected_num, field, len(got))) return self.assertEqual( expected, got, @@ -237,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None): self.assertTrue(got >= expected, msg) +def assertLessEqual(self, got, expected, msg=None): + if not (got <= expected): + if msg is None: + msg = '%r not less than or equal to %r' % (got, expected) + self.assertTrue(got <= expected, msg) + + +def assertEqual(self, got, expected, msg=None): + if not (got == expected): + if msg is None: + msg = '%r not equal to %r' % (got, expected) + self.assertTrue(got == expected, msg) + + def expect_warnings(ydl, warnings_re): real_warning = ydl.report_warning From 4fe54c128a11d394874505af75aaa5a2276aa3ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:18:27 +0700 Subject: [PATCH 37/84] [youtube] Update tests and add a tests for #18804 --- youtube_dl/extractor/youtube.py | 57 +++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5e93b5329..730935657 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -498,7 +498,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', - 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], @@ -527,7 +526,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', - 'license': 'Standard YouTube License', 'creator': 'Icona Pop', 'track': 'I Love It (feat. Charli XCX)', 'artist': 'Icona Pop', @@ -540,14 +538,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': '07FYdnEawAQ', 'ext': 'mp4', 'upload_date': '20130703', - 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', + 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)', 'alt_title': 'Tunnel Vision', - 'description': 'md5:64249768eec3bc4276236606ea996373', + 'description': 'md5:07dab3356cde4199048e4c7cd93471e1', 'duration': 419, 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', - 'license': 'Standard YouTube License', 'creator': 'Justin Timberlake', 'track': 'Tunnel Vision', 'artist': 'Justin Timberlake', @@ -566,7 +563,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'SET India', 'uploader_id': 'setindia', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia', - 'license': 'Standard YouTube License', 'age_limit': 18, } }, @@ -581,7 +577,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', - 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], @@ -605,7 +600,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 'description': '', 'uploader': '8KVIDEO', - 'license': 'Standard YouTube License', 'title': 'UHDTV TEST 8K VIDEO.mp4' }, 'params': { @@ -620,13 +614,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'IB3lcPjvWLA', 'ext': 'm4a', - 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', - 'description': 'md5:1900ed86ee514927b9e00fbead6969a5', + 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson', + 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf', 'duration': 244, 'uploader': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', - 'license': 'Standard YouTube License', }, 'params': { 'youtube_include_dash_manifest': True, @@ -640,13 +633,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'nfWlot6h_JM', 'ext': 'm4a', 'title': 'Taylor Swift - Shake It Off', - 'alt_title': 'Shake It Off', - 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', + 'description': 'md5:bec2185232c05479482cb5a9b82719bf', 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', - 'license': 'Standard YouTube License', 'creator': 'Taylor Swift', }, 'params': { @@ -662,10 +653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'duration': 219, 'upload_date': '20100909', - 'uploader': 'TJ Kirk', + 'uploader': 'Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', - 'license': 'Standard YouTube License', 'title': 'Burning Everyone\'s Koran', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', } @@ -683,7 +673,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'WitcherGame', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'upload_date': '20140605', - 'license': 'Standard YouTube License', 'age_limit': 18, }, }, @@ -692,7 +681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU', 'info_dict': { 'id': '6kLq3WMV1nU', - 'ext': 'webm', + 'ext': 'mp4', 'title': 'Dedication To My Ex (Miss That) (Lyric Video)', 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', 'duration': 246, @@ -700,7 +689,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'LloydVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', 'upload_date': '20110629', - 'license': 'Standard YouTube License', 'age_limit': 18, }, }, @@ -718,7 +706,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'deadmau5', 'description': 'md5:12c56784b8032162bb936a5f76d55360', 'uploader': 'deadmau5', - 'license': 'Standard YouTube License', 'title': 'Deadmau5 - Some Chords (HD)', 'alt_title': 'Some Chords', }, @@ -736,7 +723,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', - 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympic', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', @@ -758,7 +744,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 'uploader': '孫ᄋᄅ', - 'license': 'Standard YouTube License', 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', }, }, @@ -792,7 +777,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'dorappi2000', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', - 'license': 'Standard YouTube License', 'formats': 'mincount:31', }, 'skip': 'not actual anymore', @@ -808,7 +792,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Airtek', 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', - 'license': 'Standard YouTube License', 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', }, 'params': { @@ -881,6 +864,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video is not available.', }, { # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536) @@ -917,7 +901,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'IronSoulElf', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 'uploader': 'IronSoulElf', - 'license': 'Standard YouTube License', 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', @@ -1021,13 +1004,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'iqKdEhx-dD4', 'ext': 'mp4', 'title': 'Isolation - Mind Field (Ep 1)', - 'description': 'md5:25b78d2f64ae81719f5c96319889b736', + 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f', 'duration': 2085, 'upload_date': '20170118', 'uploader': 'Vsauce', 'uploader_id': 'Vsauce', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', - 'license': 'Standard YouTube License', 'series': 'Mind Field', 'season_number': 1, 'episode_number': 1, @@ -1053,7 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'New Century Foundation', 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg', - 'license': 'Standard YouTube License', }, 'params': { 'skip_download': True, @@ -1081,6 +1062,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # DRM protected 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', 'only_matching': True, + }, + { + # Video with unsupported adaptive stream type formats + 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U', + 'info_dict': { + 'id': 'Z4Vy8R84T1U', + 'ext': 'mp4', + 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'duration': 433, + 'upload_date': '20130923', + 'uploader': 'Amelia Putri Harwita', + 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q', + 'formats': 'maxcount:10', + }, + 'params': { + 'skip_download': True, + 'youtube_include_dash_manifest': False, + }, } ] From b0d73a745656443baf3e2134d9ca03bd3a4a934c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:20:10 +0700 Subject: [PATCH 38/84] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3d60754c5..d25169074 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core ++ [test/helper] Add support for maxcount and count collection len checkers +* [downloader/hls] Fix uplynk ad skipping (#18824) +* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813) + +Extractors +* [youtube] Skip unsupported adaptive stream type (#18804) ++ [youtube] Extract DASH formats from player response (#18804) +* [funimation] Fix extraction (#14089) +* [skylinewebcams] Fix extraction (#18853) ++ [curiositystream] Add support for non app URLs ++ [bitchute] Check formats (#18833) +* [wistia] Extend URL regular expression (#18823) ++ [playplustv] Add support for playplus.com (#18789) + + version 2019.01.10 Core From bfc8eeea57251796d44f1e10b61d06760690bc25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:24:08 +0700 Subject: [PATCH 39/84] release 2019.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d6abdbcb7..650f78511 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.10 +[debug] youtube-dl version 2019.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d25169074..13019bf2b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.16 Core + [test/helper] Add support for maxcount and count collection len checkers diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5ba61f489..c13f3a38a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.10' +__version__ = '2019.01.16' From fa4ac365f69cbd51e4c9801984ebea49a12825b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 17 Jan 2019 10:24:44 +0700 Subject: [PATCH 40/84] [youtube] Extend JS player signature function name regexes (closes #18890, closes #18891, closes #18893) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 730935657..c8bf98b58 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1198,8 +1198,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') From f53cecd796dbb698abbde6ac2f7f973dba78f8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 17 Jan 2019 10:25:50 +0700 Subject: [PATCH 41/84] [ChangeLog] Actualize [ci skip] --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 13019bf2b..171034a75 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version <unreleased> + +Extractors +* [youtube] Extend JS player signature function name regular expressions + (#18890, #18891, #18893) + + version 2019.01.16 Core From 29639b363ddab7903ceae096912a0227c8017533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 17 Jan 2019 10:27:17 +0700 Subject: [PATCH 42/84] release 2019.01.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 650f78511..841bca914 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.16 +[debug] youtube-dl version 2019.01.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 171034a75..902301765 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.17 Extractors * [youtube] Extend JS player signature function name regular expressions diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c13f3a38a..ea3f62928 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.16' +__version__ = '2019.01.17' From 79fec976b0c250446ea9a9eb7323fb2045ee37fe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Jan 2019 09:44:08 +0100 Subject: [PATCH 43/84] [vimeo] fix extraction for password protected player URLs(closes #18889) --- youtube_dl/extractor/vimeo.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5e15f060b..fd37f919b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 import json import re import itertools @@ -392,6 +393,22 @@ class VimeoIE(VimeoBaseInfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://player.vimeo.com/video/68375962', + 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', + 'info_dict': { + 'id': '68375962', + 'ext': 'mp4', + 'title': 'youtube-dl password protected test video', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', + 'uploader_id': 'user18948128', + 'uploader': 'Jaime Marquínez Ferrándiz', + 'duration': 10, + }, + 'params': { + 'videopassword': 'youtube-dl', + }, + }, { 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 'only_matching': True, @@ -452,7 +469,9 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') - data = urlencode_postdata({'password': password}) + data = urlencode_postdata({ + 'password': base64.b64encode(password.encode()), + }) pass_url = url + '/check-password' password_request = sanitized_Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') From e2dd132f054df5b6c09b7c274752a77d8ba44f8d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Jan 2019 23:56:37 +0100 Subject: [PATCH 44/84] [cartoonnetwork] fix extraction(closes #15664)(closes #17224) --- youtube_dl/extractor/cartoonnetwork.py | 56 +++++++++++++++++--------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 6aeebd7b3..48b33617f 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -1,20 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .turner import TurnerBaseIE +from ..utils import int_or_none class CartoonNetworkIE(TurnerBaseIE): _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' _TEST = { - 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html', + 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html', 'info_dict': { - 'id': '8a250ab04ed07e6c014ef3f1e2f9016c', + 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e', 'ext': 'mp4', - 'title': 'Starfire the Cat Lady', - 'description': 'Robin decides to become a cat so that Starfire will finally love him.', + 'title': 'How to Draw Upgrade', + 'description': 'md5:2061d83776db7e8be4879684eefe8c0f', }, 'params': { # m3u8 download @@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() - query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id - return self._extract_cvp_info( - 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', - 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile', - }, - }, { + + def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False): + metadata_re = '' + if content_re: + metadata_re = r'|video_metadata\.content_' + content_re + return self._search_regex( + r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + webpage, name, fatal=fatal) + + media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) + title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True) + + info = self._extract_ngtv_info( + media_id, {'networkId': 'cartoonnetwork'}, { 'url': url, 'site_name': 'CartoonNetwork', - 'auth_required': self._search_regex( - r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);', - webpage, 'auth required', default='false') == 'true', + 'auth_required': find_field('authType', 'auth type') != 'unauth', }) + + series = find_field( + 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage) + info.update({ + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'description': self._html_search_meta('description', webpage), + 'series': series, + 'episode': title, + }) + + for field in ('season', 'episode'): + field_name = field + 'Number' + info[field + '_number'] = int_or_none(find_field( + field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage)) + + return info From 2bfc1d9d68dec097fd8093dc0284dd0cd64beb2e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 19 Jan 2019 21:25:15 +0100 Subject: [PATCH 45/84] [extractor/common] imporove HLS video only format detection(closes #18923) --- youtube_dl/extractor/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9e7febcad..af621b74b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1596,6 +1596,7 @@ class InfoExtractor(object): # References: # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21 # 2. https://github.com/rg3/youtube-dl/issues/12211 + # 3. https://github.com/rg3/youtube-dl/issues/18923 # We should try extracting formats only from master playlists [1, 4.3.4], # i.e. playlists that describe available qualities. On the other hand @@ -1667,11 +1668,16 @@ class InfoExtractor(object): rendition = stream_group[0] return rendition.get('NAME') or stream_group_id + # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the + # chance to detect video only formats when EXT-X-STREAM-INF tags + # precede EXT-X-MEDIA tags in HLS manifest such as [3]. + for line in m3u8_doc.splitlines(): + if line.startswith('#EXT-X-MEDIA:'): + extract_media(line) + for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_stream_inf = parse_m3u8_attributes(line) - elif line.startswith('#EXT-X-MEDIA:'): - extract_media(line) elif line.startswith('#') or not line.strip(): continue else: From f28363ad1fb45b4450ae6f09ff9aff8f93227e40 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 19 Jan 2019 21:25:53 +0100 Subject: [PATCH 46/84] [ted] correct acodec for http formats(#18923) --- youtube_dl/extractor/ted.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d3e4205f5..645942dfd 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -265,6 +265,8 @@ class TEDIE(InfoExtractor): 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) + if f.get('acodec') == 'none': + del f['acodec'] formats.append(f) audio_download = talk_info.get('audioDownload') From 379306ef55b64c966392c072b17a450831fec252 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 19 Jan 2019 21:35:02 +0100 Subject: [PATCH 47/84] [extractor/common] fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index af621b74b..6e36e6778 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1668,7 +1668,7 @@ class InfoExtractor(object): rendition = stream_group[0] return rendition.get('NAME') or stream_group_id - # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the + # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the # chance to detect video only formats when EXT-X-STREAM-INF tags # precede EXT-X-MEDIA tags in HLS manifest such as [3]. for line in m3u8_doc.splitlines(): From 2cc779f497ae20d6e0e28fc546a25723cfea631a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 13:48:09 +0700 Subject: [PATCH 48/84] [YoutubeDL] Add negation support for string comparisons in format selection expressions (closes #18600, closes #18805) --- README.md | 3 ++- test/test_YoutubeDL.py | 46 +++++++++++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 9 +++++--- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 70bcfaccf..886696015 100644 --- a/README.md +++ b/README.md @@ -667,13 +667,14 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `asr`: Audio sampling rate in Hertz - `fps`: Frame rate -Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields: +Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format +Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f0f5a8470..df8994b84 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -239,6 +239,52 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + def test_format_selection_string_ops(self): + formats = [ + {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # equals (=) + ydl = YDL({'format': '[format_id=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not equal (!=) + ydl = YDL({'format': '[format_id!=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # starts with (^=) + ydl = YDL({'format': '[format_id^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not start with (!^=) + ydl = YDL({'format': '[format_id!^=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # ends with ($=) + ydl = YDL({'format': '[format_id$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not end with (!$=) + ydl = YDL({'format': '[format_id!$=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # contains (*=) + ydl = YDL({'format': '[format_id*=-]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=-]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + def test_youtube_format_selection(self): order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4493fd0e1..a827414dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1063,21 +1063,24 @@ class YoutubeDL(object): if not m: STR_OPERATORS = { '=': operator.eq, - '!=': operator.ne, '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id) - \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? + \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)? \s*(?P<value>[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) if m: comparison_value = m.group('value') - op = STR_OPERATORS[m.group('op')] + str_op = STR_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not str_op + else: + op = str_op if not m: raise ValueError('Invalid filter specification %r' % filter_spec) From 4e58d9fabbfef80b2ecc0d20959bdf9dd3705e73 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 20 Jan 2019 14:23:35 +0700 Subject: [PATCH 49/84] [README.md] Fix formatting --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 886696015..4ba982907 100644 --- a/README.md +++ b/README.md @@ -674,6 +674,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format + Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. From fc746c3fdd7d40935a11e72b5cb69a8aee840e94 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 20 Jan 2019 09:04:51 +0100 Subject: [PATCH 50/84] [test/test_InfoExtractor] add test for #18923 --- test/test_InfoExtractor.py | 59 ++++++++++++++++++++++++++++++- test/testdata/m3u8/ted_18923.m3u8 | 28 +++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 test/testdata/m3u8/ted_18923.m3u8 diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 06be72616..75fa0bbb7 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -497,7 +497,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1280, 'height': 720, }] - ) + ), + ( + # https://github.com/rg3/youtube-dl/issues/18923 + # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa + 'ted_18923', + 'http://hls.ted.com/talks/31241.m3u8', + [{ + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '600k-Audio', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '68', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '163', + 'acodec': 'none', + 'width': 320, + 'height': 180, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '481', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '769', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '984', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1255', + 'acodec': 'none', + 'width': 640, + 'height': 360, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1693', + 'acodec': 'none', + 'width': 853, + 'height': 480, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '2462', + 'acodec': 'none', + 'width': 1280, + 'height': 720, + }] + ), ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: diff --git a/test/testdata/m3u8/ted_18923.m3u8 b/test/testdata/m3u8/ted_18923.m3u8 new file mode 100644 index 000000000..52a27118b --- /dev/null +++ b/test/testdata/m3u8/ted_18923.m3u8 @@ -0,0 +1,28 @@ +#EXTM3U +#EXT-X-VERSION:4 +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360 +/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180 +/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480 +/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720 +/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES +/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b + +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" + +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400 From 15870747f02effba9376fd1a1500c32d36d1b811 Mon Sep 17 00:00:00 2001 From: aviperes <avipr24@gmail.com> Date: Sun, 20 Jan 2019 10:15:01 +0200 Subject: [PATCH 51/84] [odnoklassniki] Detect paid videos --- youtube_dl/extractor/odnoklassniki.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 190d8af4d..114b93c07 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -115,6 +115,10 @@ class OdnoklassnikiIE(InfoExtractor): }, { 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', 'only_matching': True, + }, { + # Paid video + 'url': 'https://ok.ru/video/954886983203', + 'only_matching': True, }] def _real_extract(self, url): @@ -244,6 +248,11 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'flv', }) + if not formats: + payment_info = metadata.get('paymentInfo') + if payment_info: + raise ExtractorError('This video is paid, subscribe to download it', expected=True) + self._sort_formats(formats) info['formats'] = formats From 31fbedc06a349bc555ab934588544f75734e3a55 Mon Sep 17 00:00:00 2001 From: jhwgh1968 <jhwgh1968@users.noreply.github.com> Date: Sun, 20 Jan 2019 09:10:46 +0000 Subject: [PATCH 52/84] [instagram] Add base extractor for playlists and tag extractor --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/instagram.py | 136 +++++++++++++++++++++-------- 2 files changed, 105 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de38c6641..24361def4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -494,7 +494,11 @@ from .ina import InaIE from .inc import IncIE from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE -from .instagram import InstagramIE, InstagramUserIE +from .instagram import ( + InstagramIE, + InstagramUserIE, + InstagramTagIE, +) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 7e0e838f0..ffd87b55f 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -227,44 +227,37 @@ class InstagramIE(InfoExtractor): } -class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' - IE_DESC = 'Instagram user profile' - IE_NAME = 'instagram:user' - _TEST = { - 'url': 'https://instagram.com/porsche', - 'info_dict': { - 'id': 'porsche', - 'title': 'porsche', - }, - 'playlist_count': 5, - 'params': { - 'extract_flat': True, - 'skip_download': True, - 'playlistend': 5, - } - } +class InstagramPlaylistIE(InfoExtractor): + # A superclass for handling any kind of query based on GraphQL which + # results in a playlist. - _gis_tmpl = None + _gis_tmpl = None # used to cache GIS request type - def _entries(self, data): + def _parse_graphql(self, webpage, item_id): + # Reads a webpage and returns its GraphQL data. + return self._parse_json( + self._search_regex( + r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), + item_id) + + def _extract_graphql(self, data, url): + # Parses GraphQL queries containing videos and generates a playlist. def get_count(suffix): return int_or_none(try_get( node, lambda x: x['edge_media_' + suffix]['count'])) - uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + uploader_id = self._match_id(url) csrf_token = data['config']['csrf_token'] rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' - self._set_cookie('instagram.com', 'ig_pr', '1') - cursor = '' for page_num in itertools.count(1): - variables = json.dumps({ - 'id': uploader_id, + variables = { 'first': 12, 'after': cursor, - }) + } + variables.update(self._query_vars_for(data)) + variables = json.dumps(variables) if self._gis_tmpl: gis_tmpls = [self._gis_tmpl] @@ -276,21 +269,26 @@ class InstagramUserIE(InfoExtractor): '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), ] + # try all of the ways to generate a GIS query, and not only use the + # first one that works, but cache it for future requests for gis_tmpl in gis_tmpls: try: - media = self._download_json( + json_data = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, 'Downloading JSON page %d' % page_num, headers={ 'X-Requested-With': 'XMLHttpRequest', 'X-Instagram-GIS': hashlib.md5( ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), }, query={ - 'query_hash': '42323d64886122307be10013ad2dcc44', + 'query_hash': self._QUERY_HASH, 'variables': variables, - })['data']['user']['edge_owner_to_timeline_media'] + }) + media = self._parse_timeline_from(json_data) self._gis_tmpl = gis_tmpl break except ExtractorError as e: + # if it's an error caused by a bad query, and there are + # more GIS templates to try, ignore it and keep trying if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if gis_tmpl != gis_tmpls[-1]: continue @@ -348,14 +346,80 @@ class InstagramUserIE(InfoExtractor): break def _real_extract(self, url): - username = self._match_id(url) + user_or_tag = self._match_id(url) + webpage = self._download_webpage(url, user_or_tag) + data = self._parse_graphql(webpage, user_or_tag) - webpage = self._download_webpage(url, username) - - data = self._parse_json( - self._search_regex( - r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), - username) + self._set_cookie('instagram.com', 'ig_pr', '1') return self.playlist_result( - self._entries(data), username, username) + self._extract_graphql(data, url), user_or_tag, user_or_tag) + + +class InstagramUserIE(InstagramPlaylistIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' + IE_DESC = 'Instagram user profile' + IE_NAME = 'instagram:user' + _TEST = { + 'url': 'https://instagram.com/porsche', + 'info_dict': { + 'id': 'porsche', + 'title': 'porsche', + }, + 'playlist_count': 5, + 'params': { + 'extract_flat': True, + 'skip_download': True, + 'playlistend': 5, + } + } + + _QUERY_HASH = '42323d64886122307be10013ad2dcc44', + + @staticmethod + def _parse_timeline_from(data): + # extracts the media timeline data from a GraphQL result + return data['data']['user']['edge_owner_to_timeline_media'] + + @staticmethod + def _query_vars_for(data): + # returns a dictionary of variables to add to the timeline query based + # on the GraphQL of the original page + return { + 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + } + + +class InstagramTagIE(InstagramPlaylistIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)' + IE_DESC = 'Instagram hashtag search' + IE_NAME = 'instagram:tag' + _TEST = { + 'url': 'https://instagram.com/explore/tags/lolcats', + 'info_dict': { + 'id': 'lolcats', + 'title': 'lolcats', + }, + 'playlist_count': 50, + 'params': { + 'extract_flat': True, + 'skip_download': True, + 'playlistend': 50, + } + } + + _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', + + @staticmethod + def _parse_timeline_from(data): + # extracts the media timeline data from a GraphQL result + return data['data']['hashtag']['edge_hashtag_to_media'] + + @staticmethod + def _query_vars_for(data): + # returns a dictionary of variables to add to the timeline query based + # on the GraphQL of the original page + return { + 'tag_name': + data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] + } From 6ca3fa898cd066422daea3d5be53efdae22187d8 Mon Sep 17 00:00:00 2001 From: yonaikerlol <lawlietrs7@gmail.com> Date: Sun, 20 Jan 2019 05:24:21 -0400 Subject: [PATCH 53/84] [streamango] Add support for fruithosts.net --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index fcaa5ac0b..efb259f96 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -38,6 +38,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', 'only_matching': True, + }, { + 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', + 'only_matching': True, }] def _real_extract(self, url): From 289ef490f77cb35c43d98b9d2ea4cf529c24895e Mon Sep 17 00:00:00 2001 From: Anthony Fok <foka@debian.org> Date: Sun, 30 Dec 2018 02:44:40 -0700 Subject: [PATCH 54/84] [hketv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hketv.py | 185 +++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 youtube_dl/extractor/hketv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24361def4..574a47e6d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -452,6 +452,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hgtv import HGTVComShowIE +from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py new file mode 100644 index 000000000..b5790cdee --- /dev/null +++ b/youtube_dl/extractor/hketv.py @@ -0,0 +1,185 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + merge_dicts, + str_or_none, + str_to_int, + try_get, + unified_strdate, + urlencode_postdata, + urljoin, +) + + +class HKETVIE(InfoExtractor): + IE_NAME = 'hketv' + IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['HK'] + _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.hkedcity.net/etv/resource/2932360618', + 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7', + 'info_dict': { + 'id': '2932360618', + 'ext': 'mp4', + 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', + 'description': '本節目輯錄了「閱讀滿Fun嘉年華」和「二○一八響應世界閱讀日――悅愛閱讀・愈讀愈愛」的活動花絮,並由學者、作家、演藝界人士等,分享培養子女閱讀興趣和習慣的方法,以及呼籲大家一同分享閱讀的樂趣。', + 'upload_date': '20181024', + 'duration': 900, + 'subtitles': { + 'en': [{ + 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=en', + 'ext': 'srt', + }], + 'zh-Hant': [{ + 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=qmt', + 'ext': 'srt', + }], + } + }, + }, { + 'url': 'https://www.hkedcity.net/etv/resource/972641418', + 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', + 'info_dict': { + 'id': '972641418', + 'ext': 'mp4', + 'title': '衣冠楚楚 (天使系列之一)', + 'description': '天國仙境,有兩位可愛的天使小姐妹。她們對幾千年來天使衣著一成不變頗有不滿。她們下望人世間:只見人們穿著七彩繽紛、款式各異的服裝,漂亮極了。天使姐妹決定下凡考察衣著,以設計天使新裝。 下到人間,姐妹試穿各式各樣的衣著,引發連串奇特有趣的情節:她們穿著校服在街上閒逛時,被女警誤認為逃學而送回學校,到校後又被體育老師誤認為是新同學,匆匆忙忙換上運動服後在操場上大顯神通。她們穿著護士服在醫院散步時,又被誤認為當班護士,而投入追尋失蹤病童、治病救人的工作中去。姐妹倆還到過玩具店,與布娃娃們談論衣著。她們也去過服裝設計學校,被當成小模特兒而試穿各式服裝。最令姐妹倆興奮的是一場盛大的民族服裝表演會。身穿盛裝的十二個民族的少女在台上翩翩起舞,各種服飾七彩繽紛、美不勝收。姐妹們情不自禁地穿上民族服裝,小天使變成了少數民族姑娘……最後天使姐妹回到天上,對於天使究竟穿甚麼樣的衣服好,她們還是拿不定主意。 節目通過天使姐妹的奇特經歷,反復示範各式衣服鞋襪的正確讀音及談論衣著時的常用句式,並以盛大的民族服裝表演活動,帶出有關服裝的文化知識。內容豐富而饒有趣味。', + 'upload_date': '20070109', + 'duration': 907, + 'subtitles': {}, + }, + }] + + _CC_LANGS = { + '中文(繁體中文)': 'zh-Hant', + '中文(简体中文)': 'zh-Hans', + 'English': 'en', + 'Bahasa Indonesia': 'id', + '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi', + '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne', + 'Tagalog': 'tl', + '\u0e44\u0e17\u0e22': 'th', + '\u0627\u0631\u062f\u0648': 'ur', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('ed_title', webpage, fatal=True) + + file_id = self._html_search_regex(r'post_var\["file_id"\]\s*=\s*(.+?);', webpage, 'file ID') + curr_url = self._html_search_regex(r'post_var\["curr_url"\]\s*=\s*"(.+?)";', webpage, 'curr URL') + data = { + 'action': 'get_info', + 'curr_url': curr_url, + 'file_id': file_id, + 'video_url': file_id, + } + _APPS_BASE_URL = 'https://apps.hkedcity.net' + handler_url = _APPS_BASE_URL + '/media/play/handler.php' + + response = self._download_json( + handler_url, video_id, + data=urlencode_postdata(data), + headers=merge_dicts({'Content-Type': 'application/x-www-form-urlencoded'}, + self.geo_verification_headers())) + + result = response['result'] + + formats = [] + subtitles = {} + + if response.get('success') and response.get('access'): + width = int_or_none(result.get('width')) + height = int_or_none(result.get('height')) + + playlist0 = try_get(result, lambda x: x['playlist'][0], dict) + fmts = playlist0.get('sources') + for fmt in fmts: + file_path = fmt.get('file') + if file_path: + file_url = urljoin(_APPS_BASE_URL, file_path) + # If we ever wanted to provide the final resolved URL that + # does not require cookies, albeit with a shorter lifespan: + # urlh = self._downloader.urlopen(file_url) + # resolved_url = urlh.geturl() + + label = fmt.get('label') + w = None + h = None + if label == 'HD': + h = 720 + elif label == 'SD': + h = 360 + if h: + if width and height: + w = h * width // height + else: + w = h * 4 // 3 + + formats.append({ + 'format_id': label, + 'ext': fmt.get('type'), + 'url': file_url, + 'width': w, + 'height': h, + }) + + tracks = playlist0.get('tracks', []) + for track in tracks: + if not isinstance(track, dict): + continue + track_kind = str_or_none(track.get('kind')) + if not track_kind or not isinstance(track_kind, compat_str): + continue + if track_kind.lower() not in ('captions', 'subtitles'): + continue + track_url = urljoin(_APPS_BASE_URL, track.get('file')) + if not track_url: + continue + track_label = track.get('label') + subtitles.setdefault(self._CC_LANGS.get(track_label, track_label), []).append({ + 'url': self._proto_relative_url(track_url), + 'ext': 'srt', + }) + + else: + error = clean_html(response.get('access_err_msg')) + if 'Video streaming is not available in your country' in error: + self.raise_geo_restricted(msg=error, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError(error) + + # Likes + emotion = self._download_json( + 'https://emocounter.hkedcity.net/handler.php', + video_id, + data=urlencode_postdata({ + 'action': 'get_emotion', + 'data[bucket_id]': 'etv', + 'data[identifier]': video_id, + }), + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + fatal=False) + like_count = int_or_none(try_get(emotion, lambda x: x['data']['emotion_data'][0]['count'])) + + return { + 'id': video_id, + 'title': title, + 'description': self._html_search_meta('description', webpage, fatal=False), + 'upload_date': unified_strdate(self._html_search_meta('ed_date', webpage, fatal=False), day_first=False), + 'duration': int_or_none(result.get('length')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': urljoin(_APPS_BASE_URL, result.get('image')), + 'view_count': str_to_int(result.get('view_count')), + 'like_count': like_count, + } From 73c19aaa9f74e9383a7aaf0dfb3c608727d5b6b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 17:42:25 +0700 Subject: [PATCH 55/84] [hketv] Improve and simplify (closes #18696) --- youtube_dl/extractor/hketv.py | 180 ++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py index b5790cdee..b57927fc1 100644 --- a/youtube_dl/extractor/hketv.py +++ b/youtube_dl/extractor/hketv.py @@ -8,8 +8,8 @@ from ..utils import ( ExtractorError, int_or_none, merge_dicts, + parse_count, str_or_none, - str_to_int, try_get, unified_strdate, urlencode_postdata, @@ -30,20 +30,12 @@ class HKETVIE(InfoExtractor): 'id': '2932360618', 'ext': 'mp4', 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', - 'description': '本節目輯錄了「閱讀滿Fun嘉年華」和「二○一八響應世界閱讀日――悅愛閱讀・愈讀愈愛」的活動花絮,並由學者、作家、演藝界人士等,分享培養子女閱讀興趣和習慣的方法,以及呼籲大家一同分享閱讀的樂趣。', + 'description': 'md5:d5286d05219ef50e0613311cbe96e560', 'upload_date': '20181024', 'duration': 900, - 'subtitles': { - 'en': [{ - 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=en', - 'ext': 'srt', - }], - 'zh-Hant': [{ - 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=qmt', - 'ext': 'srt', - }], - } + 'subtitles': 'count:2', }, + 'skip': 'Geo restricted to HK', }, { 'url': 'https://www.hkedcity.net/etv/resource/972641418', 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', @@ -51,11 +43,15 @@ class HKETVIE(InfoExtractor): 'id': '972641418', 'ext': 'mp4', 'title': '衣冠楚楚 (天使系列之一)', - 'description': '天國仙境,有兩位可愛的天使小姐妹。她們對幾千年來天使衣著一成不變頗有不滿。她們下望人世間:只見人們穿著七彩繽紛、款式各異的服裝,漂亮極了。天使姐妹決定下凡考察衣著,以設計天使新裝。 下到人間,姐妹試穿各式各樣的衣著,引發連串奇特有趣的情節:她們穿著校服在街上閒逛時,被女警誤認為逃學而送回學校,到校後又被體育老師誤認為是新同學,匆匆忙忙換上運動服後在操場上大顯神通。她們穿著護士服在醫院散步時,又被誤認為當班護士,而投入追尋失蹤病童、治病救人的工作中去。姐妹倆還到過玩具店,與布娃娃們談論衣著。她們也去過服裝設計學校,被當成小模特兒而試穿各式服裝。最令姐妹倆興奮的是一場盛大的民族服裝表演會。身穿盛裝的十二個民族的少女在台上翩翩起舞,各種服飾七彩繽紛、美不勝收。姐妹們情不自禁地穿上民族服裝,小天使變成了少數民族姑娘……最後天使姐妹回到天上,對於天使究竟穿甚麼樣的衣服好,她們還是拿不定主意。 節目通過天使姐妹的奇特經歷,反復示範各式衣服鞋襪的正確讀音及談論衣著時的常用句式,並以盛大的民族服裝表演活動,帶出有關服裝的文化知識。內容豐富而饒有趣味。', + 'description': 'md5:10bb3d659421e74f58e5db5691627b0f', 'upload_date': '20070109', 'duration': 907, 'subtitles': {}, }, + 'params': { + 'geo_verification_proxy': '<HK proxy here>', + }, + 'skip': 'Geo restricted to HK', }] _CC_LANGS = { @@ -69,117 +65,127 @@ class HKETVIE(InfoExtractor): '\u0e44\u0e17\u0e22': 'th', '\u0627\u0631\u062f\u0648': 'ur', } + _FORMAT_HEIGHTS = { + 'SD': 360, + 'HD': 720, + } + _APPS_BASE_URL = 'https://apps.hkedcity.net' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('ed_title', webpage, fatal=True) - file_id = self._html_search_regex(r'post_var\["file_id"\]\s*=\s*(.+?);', webpage, 'file ID') - curr_url = self._html_search_regex(r'post_var\["curr_url"\]\s*=\s*"(.+?)";', webpage, 'curr URL') + title = ( + self._html_search_meta( + ('ed_title', 'search.ed_title'), webpage, default=None) or + self._search_regex( + r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'title', default=None, group='url') or + self._html_search_regex( + r'<h1>([^<]+)</h1>', webpage, 'title', default=None) or + self._og_search_title(webpage) + ) + + file_id = self._search_regex( + r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);', + webpage, 'file ID') + curr_url = self._search_regex( + r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";', + webpage, 'curr URL') data = { 'action': 'get_info', 'curr_url': curr_url, 'file_id': file_id, 'video_url': file_id, } - _APPS_BASE_URL = 'https://apps.hkedcity.net' - handler_url = _APPS_BASE_URL + '/media/play/handler.php' response = self._download_json( - handler_url, video_id, + self._APPS_BASE_URL + '/media/play/handler.php', video_id, data=urlencode_postdata(data), - headers=merge_dicts({'Content-Type': 'application/x-www-form-urlencoded'}, - self.geo_verification_headers())) + headers=merge_dicts({ + 'Content-Type': 'application/x-www-form-urlencoded'}, + self.geo_verification_headers())) result = response['result'] + if not response.get('success') or not response.get('access'): + error = clean_html(response.get('access_err_msg')) + if 'Video streaming is not available in your country' in error: + self.raise_geo_restricted( + msg=error, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError(error, expected=True) + formats = [] + + width = int_or_none(result.get('width')) + height = int_or_none(result.get('height')) + + playlist0 = result['playlist'][0] + for fmt in playlist0['sources']: + file_url = urljoin(self._APPS_BASE_URL, fmt.get('file')) + if not file_url: + continue + # If we ever wanted to provide the final resolved URL that + # does not require cookies, albeit with a shorter lifespan: + # urlh = self._downloader.urlopen(file_url) + # resolved_url = urlh.geturl() + label = fmt.get('label') + h = self._FORMAT_HEIGHTS.get(label) + w = h * width // height if h and width and height else None + formats.append({ + 'format_id': label, + 'ext': fmt.get('type'), + 'url': file_url, + 'width': w, + 'height': h, + }) + self._sort_formats(formats) + subtitles = {} - - if response.get('success') and response.get('access'): - width = int_or_none(result.get('width')) - height = int_or_none(result.get('height')) - - playlist0 = try_get(result, lambda x: x['playlist'][0], dict) - fmts = playlist0.get('sources') - for fmt in fmts: - file_path = fmt.get('file') - if file_path: - file_url = urljoin(_APPS_BASE_URL, file_path) - # If we ever wanted to provide the final resolved URL that - # does not require cookies, albeit with a shorter lifespan: - # urlh = self._downloader.urlopen(file_url) - # resolved_url = urlh.geturl() - - label = fmt.get('label') - w = None - h = None - if label == 'HD': - h = 720 - elif label == 'SD': - h = 360 - if h: - if width and height: - w = h * width // height - else: - w = h * 4 // 3 - - formats.append({ - 'format_id': label, - 'ext': fmt.get('type'), - 'url': file_url, - 'width': w, - 'height': h, - }) - - tracks = playlist0.get('tracks', []) - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = str_or_none(track.get('kind')) - if not track_kind or not isinstance(track_kind, compat_str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(_APPS_BASE_URL, track.get('file')) - if not track_url: - continue - track_label = track.get('label') - subtitles.setdefault(self._CC_LANGS.get(track_label, track_label), []).append({ + tracks = try_get(playlist0, lambda x: x['tracks'], list) or [] + for track in tracks: + if not isinstance(track, dict): + continue + track_kind = str_or_none(track.get('kind')) + if not track_kind or not isinstance(track_kind, compat_str): + continue + if track_kind.lower() not in ('captions', 'subtitles'): + continue + track_url = urljoin(self._APPS_BASE_URL, track.get('file')) + if not track_url: + continue + track_label = track.get('label') + subtitles.setdefault(self._CC_LANGS.get( + track_label, track_label), []).append({ 'url': self._proto_relative_url(track_url), 'ext': 'srt', }) - else: - error = clean_html(response.get('access_err_msg')) - if 'Video streaming is not available in your country' in error: - self.raise_geo_restricted(msg=error, countries=self._GEO_COUNTRIES) - else: - raise ExtractorError(error) - # Likes emotion = self._download_json( - 'https://emocounter.hkedcity.net/handler.php', - video_id, + 'https://emocounter.hkedcity.net/handler.php', video_id, data=urlencode_postdata({ 'action': 'get_emotion', 'data[bucket_id]': 'etv', 'data[identifier]': video_id, }), headers={'Content-Type': 'application/x-www-form-urlencoded'}, - fatal=False) - like_count = int_or_none(try_get(emotion, lambda x: x['data']['emotion_data'][0]['count'])) + fatal=False) or {} + like_count = int_or_none(try_get( + emotion, lambda x: x['data']['emotion_data'][0]['count'])) return { 'id': video_id, 'title': title, - 'description': self._html_search_meta('description', webpage, fatal=False), - 'upload_date': unified_strdate(self._html_search_meta('ed_date', webpage, fatal=False), day_first=False), + 'description': self._html_search_meta( + 'description', webpage, fatal=False), + 'upload_date': unified_strdate(self._html_search_meta( + 'ed_date', webpage, fatal=False), day_first=False), 'duration': int_or_none(result.get('length')), 'formats': formats, 'subtitles': subtitles, - 'thumbnail': urljoin(_APPS_BASE_URL, result.get('image')), - 'view_count': str_to_int(result.get('view_count')), + 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')), + 'view_count': parse_count(result.get('view_count')), 'like_count': like_count, } From a1a460759815414c6194bc921ac77a5533b6e02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 18:21:31 +0700 Subject: [PATCH 56/84] [vimeo] Fix video password verification for videos protected by Referer HTTP header --- youtube_dl/extractor/vimeo.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index fd37f919b..6215b3258 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -435,6 +435,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/160743502/abd0e13fb4', 'only_matching': True, } + # https://gettingthingsdone.com/workflowmap/ + # vimeo embed with check-password page protected by Referer header ] @staticmethod @@ -465,20 +467,22 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = VimeoIE._extract_urls(url, webpage) return urls[0] if urls else None - def _verify_player_video_password(self, url, video_id): + def _verify_player_video_password(self, url, video_id, headers): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') data = urlencode_postdata({ 'password': base64.b64encode(password.encode()), }) - pass_url = url + '/check-password' - password_request = sanitized_Request(pass_url, data) - password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - password_request.add_header('Referer', url) - return self._download_json( - password_request, video_id, - 'Verifying the password', 'Wrong password') + headers = merge_dicts(headers, { + 'Content-Type': 'application/x-www-form-urlencoded', + }) + checked = self._download_json( + url + '/check-password', video_id, + 'Verifying the password', data=data, headers=headers) + if checked is False: + raise ExtractorError('Wrong video password', expected=True) + return checked def _real_initialize(self): self._login() @@ -591,7 +595,7 @@ class VimeoIE(VimeoBaseInfoExtractor): cause=e) else: if config.get('view') == 4: - config = self._verify_player_video_password(redirect_url, video_id) + config = self._verify_player_video_password(redirect_url, video_id, headers) vod = config.get('video', {}).get('vod', {}) From 29cfcb43da8ac60e6c2eddad095a41c800d4d95a Mon Sep 17 00:00:00 2001 From: Alexandre Huot <alexandre.huot@usherbrooke.ca> Date: Sun, 20 Jan 2019 06:33:09 -0500 Subject: [PATCH 57/84] [radiocanada] Relax DRM check --- youtube_dl/extractor/radiocanada.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index b952e59b4..302f67d96 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -49,6 +49,16 @@ class RadioCanadaIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, + { + # with protectionType but not actually DRM protected + 'url': 'radiocanada:toutv:140872', + 'info_dict': { + 'id': '140872', + 'title': 'Épisode 1', + 'series': 'District 31', + }, + 'only_matching': True, } ] @@ -67,8 +77,10 @@ class RadioCanadaIE(InfoExtractor): el = find_xpath_attr(metadata, './/Meta', 'name', name) return el.text if el is not None else None + # protectionType does not necessarily mean the video is DRM protected (see + # https://github.com/rg3/youtube-dl/pull/18609). if get_meta('protectionType'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_warning('This video is probably DRM protected.') device_types = ['ipad'] if not smuggled_data: From 6945b9e78f38284eb4e440b7badea2fc60b66c2f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 20 Jan 2019 13:31:41 +0100 Subject: [PATCH 58/84] [extractor/common] improve jwplayer relative url handling(closes #18892) --- youtube_dl/extractor/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6e36e6778..95456b291 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2630,7 +2630,7 @@ class InfoExtractor(object): 'id': this_video_id, 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, @@ -2657,12 +2657,9 @@ class InfoExtractor(object): for source in jwplayer_sources_data: if not isinstance(source, dict): continue - source_url = self._proto_relative_url(source.get('file')) - if not source_url: - continue - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - if source_url in urls: + source_url = urljoin( + base_url, self._proto_relative_url(source.get('file'))) + if not source_url or source_url in urls: continue urls.append(source_url) source_type = source.get('type') or '' From fad4ceb53404227f471af2f3544c4c14a5df4acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 20:21:24 +0700 Subject: [PATCH 59/84] [utils] Fix urljoin for paths with non-http(s) schemes --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9e28e008f..409482c3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') + self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') def test_url_or_none(self): self.assertEqual(url_or_none(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2d3c1a9f..d0cb65814 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1868,7 +1868,7 @@ def urljoin(base, path): path = path.decode('utf-8') if not isinstance(path, compat_str) or not path: return None - if re.match(r'^(?:https?:)?//', path): + if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): base = base.decode('utf-8') From 07f9febc4b86a9cd819329f3a7daafdbe9455f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 22:07:01 +0700 Subject: [PATCH 60/84] [tnaflix] Pass Referer in metadata request (closes #18925) --- youtube_dl/extractor/tnaflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 6798ef4c3..b3573c6e0 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -96,7 +96,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', - transform_source=fix_xml_ampersands) + transform_source=fix_xml_ampersands, headers={'Referer': url}) formats = [] From 19d6991312405f5af108af28b3721966720fc72d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Jan 2019 03:03:53 +0700 Subject: [PATCH 61/84] [videomore] Improve extraction and fix season extractor (closes #18908) --- youtube_dl/extractor/videomore.py | 96 ++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 9b56630de..e3eda3327 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -4,8 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, + orderedSet, + parse_duration, + str_or_none, + unified_strdate, + url_or_none, xpath_element, xpath_text, ) @@ -13,7 +19,19 @@ from ..utils import ( class VideomoreIE(InfoExtractor): IE_NAME = 'videomore' - _VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)' + _VALID_URL = r'''(?x) + videomore:(?P<sid>\d+)$| + https?://(?:player\.)?videomore\.ru/ + (?: + (?: + embed| + [^/]+/[^/]+ + )/| + [^/]*\?.*?\btrack_id= + ) + (?P<id>\d+) + (?:[/?#&]|\.(?:xml|json)|$) + ''' _TESTS = [{ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', 'md5': '44455a346edc0d509ac5b5a5b531dc35', @@ -79,6 +97,9 @@ class VideomoreIE(InfoExtractor): }, { 'url': 'videomore:367617', 'only_matching': True, + }, { + 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=', + 'only_matching': True, }] @staticmethod @@ -136,7 +157,7 @@ class VideomoreIE(InfoExtractor): class VideomoreVideoIE(InfoExtractor): IE_NAME = 'videomore:video' - _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)[/?#&]*$' + _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$' _TESTS = [{ # single video with og:video:iframe 'url': 'http://videomore.ru/elki_3', @@ -176,6 +197,9 @@ class VideomoreVideoIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so', + 'only_matching': True, }] @classmethod @@ -196,13 +220,16 @@ class VideomoreVideoIE(InfoExtractor): r'track-id=["\'](\d+)', r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id') video_url = 'videomore:%s' % video_id + else: + video_id = None - return self.url_result(video_url, VideomoreIE.ie_key()) + return self.url_result( + video_url, ie=VideomoreIE.ie_key(), video_id=video_id) class VideomoreSeasonIE(InfoExtractor): IE_NAME = 'videomore:season' - _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)[/?#&]*$' + _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$' _TESTS = [{ 'url': 'http://videomore.ru/molodezhka/sezon_promo', 'info_dict': { @@ -210,8 +237,16 @@ class VideomoreSeasonIE(InfoExtractor): 'title': 'Молодежка Промо', }, 'playlist_mincount': 12, + }, { + 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url)) + else super(VideomoreSeasonIE, cls).suitable(url)) + def _real_extract(self, url): display_id = self._match_id(url) @@ -219,9 +254,54 @@ class VideomoreSeasonIE(InfoExtractor): title = self._og_search_title(webpage) - entries = [ - self.url_result(item) for item in re.findall( - r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' - % display_id, webpage)] + data = self._parse_json( + self._html_search_regex( + r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1', + webpage, 'data', default='{}', group='value'), + display_id, fatal=False) + + entries = [] + + if data: + episodes = data.get('episodes') + if isinstance(episodes, list): + for ep in episodes: + if not isinstance(ep, dict): + continue + ep_id = int_or_none(ep.get('id')) + ep_url = url_or_none(ep.get('url')) + if ep_id: + e = { + 'url': 'videomore:%s' % ep_id, + 'id': compat_str(ep_id), + } + elif ep_url: + e = {'url': ep_url} + else: + continue + e.update({ + '_type': 'url', + 'ie_key': VideomoreIE.ie_key(), + 'title': str_or_none(ep.get('title')), + 'thumbnail': url_or_none(ep.get('image')), + 'duration': parse_duration(ep.get('duration')), + 'episode_number': int_or_none(ep.get('number')), + 'upload_date': unified_strdate(ep.get('date')), + }) + entries.append(e) + + if not entries: + entries = [ + self.url_result( + 'videomore:%s' % video_id, ie=VideomoreIE.ie_key(), + video_id=video_id) + for video_id in orderedSet(re.findall( + r':(?:id|key)=["\'](\d+)["\']', webpage))] + + if not entries: + entries = [ + self.url_result(item) for item in re.findall( + r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' + % display_id, webpage)] return self.playlist_result(entries, display_id, title) From 4b85f0f9db9329ef1774a66c3e2fd4da558a5201 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Jan 2019 14:38:29 +0100 Subject: [PATCH 62/84] [vrv] add support for authentication(closes #14307) --- youtube_dl/extractor/vrv.py | 95 ++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 483a3be3a..014513051 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -11,10 +11,12 @@ import time from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_parse_urlencode, compat_urllib_parse, ) from ..utils import ( + ExtractorError, float_or_none, int_or_none, ) @@ -24,29 +26,41 @@ class VRVBaseIE(InfoExtractor): _API_DOMAIN = None _API_PARAMS = {} _CMS_SIGNING = {} + _TOKEN = None + _TOKEN_SECRET = '' def _call_api(self, path, video_id, note, data=None): + # https://tools.ietf.org/html/rfc5849#section-3 base_url = self._API_DOMAIN + '/core/' + path - encoded_query = compat_urllib_parse_urlencode({ + query = { 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'oauth_signature_method': 'HMAC-SHA1', 'oauth_timestamp': int(time.time()), - 'oauth_version': '1.0', - }) + } + if self._TOKEN: + query['oauth_token'] = self._TOKEN + encoded_query = compat_urllib_parse_urlencode(query) headers = self.geo_verification_headers() if data: data = json.dumps(data).encode() headers['Content-Type'] = 'application/json' - method = 'POST' if data else 'GET' - base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')]) + base_string = '&'.join([ + 'POST' if data else 'GET', + compat_urllib_parse.quote(base_url, ''), + compat_urllib_parse.quote(encoded_query, '')]) oauth_signature = base64.b64encode(hmac.new( - (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), + (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), base_string.encode(), hashlib.sha1).digest()).decode() encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') - return self._download_json( - '?'.join([base_url, encoded_query]), video_id, - note='Downloading %s JSON metadata' % note, headers=headers, data=data) + try: + return self._download_json( + '?'.join([base_url, encoded_query]), video_id, + note='Downloading %s JSON metadata' % note, headers=headers, data=data) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True) + raise def _call_cms(self, path, video_id, note): if not self._CMS_SIGNING: @@ -55,19 +69,22 @@ class VRVBaseIE(InfoExtractor): self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) - def _set_api_params(self, webpage, video_id): - if not self._API_PARAMS: - self._API_PARAMS = self._parse_json(self._search_regex( - r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>', - webpage, 'api config'), video_id)['cxApiParams'] - self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') - def _get_cms_resource(self, resource_key, video_id): return self._call_api( 'cms_resource', video_id, 'resource path', data={ 'resource_key': resource_key, })['__links__']['cms_resource']['href'] + def _real_initialize(self): + webpage = self._download_webpage( + 'https://vrv.co/', None, headers=self.geo_verification_headers()) + self._API_PARAMS = self._parse_json(self._search_regex( + [ + r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)', + r'window\.__APP_CONFIG__\s*=\s*({.+})' + ], webpage, 'app config'), None)['cxApiParams'] + self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') + class VRVIE(VRVBaseIE): IE_NAME = 'vrv' @@ -86,6 +103,22 @@ class VRVIE(VRVBaseIE): 'skip_download': True, }, }] + _NETRC_MACHINE = 'vrv' + + def _real_initialize(self): + super(VRVIE, self)._real_initialize() + + email, password = self._get_login_info() + if email is None: + return + + token_credentials = self._call_api( + 'authenticate/by:credentials', None, 'Token Credentials', data={ + 'email': email, + 'password': password, + }) + self._TOKEN = token_credentials['oauth_token'] + self._TOKEN_SECRET = token_credentials['oauth_token_secret'] def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): if not url or stream_format not in ('hls', 'dash'): @@ -116,28 +149,16 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, - headers=self.geo_verification_headers()) - media_resource = self._parse_json(self._search_regex( - [ - r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:</script>|;)', - r'window\.__INITIAL_STATE__\s*=\s*({.+})' - ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} - video_data = media_resource.get('json') - if not video_data: - self._set_api_params(webpage, video_id) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + episode_path = self._get_cms_resource( + 'cms:/episodes/' + video_id, video_id) + video_data = self._call_cms(episode_path, video_id, 'video') title = video_data['title'] - streams_json = media_resource.get('streams', {}).get('json', {}) - if not streams_json: - self._set_api_params(webpage, video_id) - streams_path = video_data['__links__']['streams']['href'] - streams_json = self._call_cms(streams_path, video_id, 'streams') + streams_path = video_data['__links__'].get('streams', {}).get('href') + if not streams_path: + self.raise_login_required() + streams_json = self._call_cms(streams_path, video_id, 'streams') audio_locale = streams_json.get('audio_locale') formats = [] @@ -202,11 +223,7 @@ class VRVSeriesIE(VRVBaseIE): def _real_extract(self, url): series_id = self._match_id(url) - webpage = self._download_webpage( - url, series_id, - headers=self.geo_verification_headers()) - self._set_api_params(webpage, series_id) seasons_path = self._get_cms_resource( 'cms:/seasons?series_id=' + series_id, series_id) seasons_data = self._call_cms(seasons_path, series_id, 'seasons') From 503b604a316837b9dd6ef32045e4e9bbfb6a1363 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Jan 2019 18:21:37 +0100 Subject: [PATCH 63/84] [vrv] fix oauth signing for python 2(#14307) --- youtube_dl/extractor/vrv.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 014513051..6c060ae76 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -32,14 +32,14 @@ class VRVBaseIE(InfoExtractor): def _call_api(self, path, video_id, note, data=None): # https://tools.ietf.org/html/rfc5849#section-3 base_url = self._API_DOMAIN + '/core/' + path - query = { - 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], - 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), - 'oauth_signature_method': 'HMAC-SHA1', - 'oauth_timestamp': int(time.time()), - } + query = [ + ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), + ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), + ('oauth_signature_method', 'HMAC-SHA1'), + ('oauth_timestamp', int(time.time())), + ] if self._TOKEN: - query['oauth_token'] = self._TOKEN + query.append(('oauth_token', self._TOKEN)) encoded_query = compat_urllib_parse_urlencode(query) headers = self.geo_verification_headers() if data: From 278d061a0c5eae20963c0a6df4b9b13fd1537186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 03:51:29 +0700 Subject: [PATCH 64/84] [pornhub] Bypass scrape detection (closes #5930) --- youtube_dl/extractor/pornhub.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e377de196..f5f3e6593 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -10,7 +10,9 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_str, + compat_urllib_request, ) +from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, @@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r'<body\b[^>]+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + @staticmethod def _extract_urls(webpage): return re.findall( From 6510a3aa971c00525969040ad654249c0c73f125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 03:55:41 +0700 Subject: [PATCH 65/84] [crunchyroll] Extend _VALID_URL (closes #18955) --- youtube_dl/extractor/crunchyroll.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4a68d092b..5e2cbe41d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -144,7 +144,7 @@ class CrunchyrollBaseIE(InfoExtractor): class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): IE_NAME = 'crunchyroll' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { @@ -269,6 +269,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): }, { 'url': 'http://www.crunchyroll.com/media-723735', 'only_matching': True, + }, { + 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921', + 'only_matching': True, }] _FORMAT_IDS = { From 71a1f61700789fb0d61fc6ad9681b6f0899d2f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 04:12:06 +0700 Subject: [PATCH 66/84] [pornhub] Apply scrape detection bypass for all extractors --- youtube_dl/extractor/pornhub.py | 46 +++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index f5f3e6593..be93d5d48 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -24,7 +24,29 @@ from ..utils import ( ) -class PornHubIE(InfoExtractor): +class PornHubBaseIE(InfoExtractor): + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r'<body\b[^>]+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + +class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' _VALID_URL = r'''(?x) https?:// @@ -128,26 +150,6 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] - def _download_webpage_handle(self, *args, **kwargs): - def dl(*args, **kwargs): - return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs) - - webpage, urlh = dl(*args, **kwargs) - - if any(re.search(p, webpage) for p in ( - r'<body\b[^>]+\bonload=["\']go\(\)', - r'document\.cookie\s*=\s*["\']RNKEY=', - r'document\.location\.reload\(true\)')): - url_or_request = args[0] - url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) - else url_or_request) - phantom = PhantomJSwrapper(self, required_version='2.0') - phantom.get(url, html=webpage) - webpage, urlh = dl(*args, **kwargs) - - return webpage, urlh - @staticmethod def _extract_urls(webpage): return re.findall( @@ -329,7 +331,7 @@ class PornHubIE(InfoExtractor): } -class PornHubPlaylistBaseIE(InfoExtractor): +class PornHubPlaylistBaseIE(PornHubBaseIE): def _extract_entries(self, webpage, host): # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see From 0670bdd8f2bca39fdeadc63e1cd53b5d5b3e638c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 04:43:55 +0700 Subject: [PATCH 67/84] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 902301765..687796a0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version <unreleased> + +Core +* [utils] Fix urljoin for paths with non-http(s) schemes +* [extractor/common] Improve jwplayer relative URL handling (#18892) ++ [YoutubeDL] Add negation support for string comparisons in format selection + expressions (#18600, #18805) +* [extractor/common] Improve HLS video-only format detection (#18923) + +Extractors +* [crunchyroll] Extend URL regular expression (#18955) +* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722, + #17197, #18338 #18842, #18899) ++ [vrv] Add support for authentication (#14307) +* [videomore:season] Fix extraction +* [videomore] Improve extraction (#18908) ++ [tnaflix] Pass Referer in metadata request (#18925) +* [radiocanada] Relax DRM check (#18608, #18609) +* [vimeo] Fix video password verification for videos protected by + Referer HTTP header ++ [hketv] Add support for hkedcity.net (#18696) ++ [streamango] Add support for fruithosts.net (#18710) ++ [instagram] Add support for tags (#18757) ++ [odnoklassniki] Detect paid videos (#18876) +* [ted] Correct acodec for HTTP formats (#18923) +* [cartoonnetwork] Fix extraction (#15664, #17224) +* [vimeo] Fix extraction for password protected player URLs (#18889) + + version 2019.01.17 Extractors From 435e382423f860aca82a58d7c3db58cbfa242b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 04:46:55 +0700 Subject: [PATCH 68/84] release 2019.01.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 841bca914..db3ebaeed 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.17 +[debug] youtube-dl version 2019.01.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 687796a0e..55ad44315 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.23 Core * [utils] Fix urljoin for paths with non-http(s) schemes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c01409419..d759d0273 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -361,6 +361,7 @@ - **hitbox** - **hitbox:live** - **HitRecord** + - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - **HornBunny** - **HotNewHipHop** - **hotstar** @@ -386,6 +387,7 @@ - **IndavideoEmbed** - **InfoQ** - **Instagram** + - **instagram:tag**: Instagram hashtag search - **instagram:user**: Instagram user profile - **Internazionale** - **InternetVideoArchive** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea3f62928..d77949eed 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.17' +__version__ = '2019.01.23' From e118a8794ffe5a3a414afd489726f34d753b0b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 01:34:41 +0700 Subject: [PATCH 69/84] [YoutubeDL] Fix typo in string negation implementation and add more tests (closes #18961) --- test/test_YoutubeDL.py | 30 +++++++++++++++++++++++++++--- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index df8994b84..1d7452744 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -242,6 +242,7 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_string_ops(self): formats = [ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, ] info_dict = _make_result(formats) @@ -253,6 +254,11 @@ class TestFormatSelection(unittest.TestCase): # does not equal (!=) ydl = YDL({'format': '[format_id!=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # starts with (^=) @@ -262,7 +268,12 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'abc-cba') # does not start with (!^=) - ydl = YDL({'format': '[format_id!^=abc-cba]'}) + ydl = YDL({'format': '[format_id!^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # ends with ($=) @@ -272,16 +283,29 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'abc-cba') # does not end with (!$=) - ydl = YDL({'format': '[format_id!$=abc-cba]'}) + ydl = YDL({'format': '[format_id!$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # contains (*=) - ydl = YDL({'format': '[format_id*=-]'}) + ydl = YDL({'format': '[format_id*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + ydl = YDL({'format': '[format_id!*=-]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a827414dc..80ed8d7e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1078,7 +1078,7 @@ class YoutubeDL(object): comparison_value = m.group('value') str_op = STR_OPERATORS[m.group('op')] if m.group('negation'): - op = lambda attr, value: not str_op + op = lambda attr, value: not str_op(attr, value) else: op = str_op From 7d311586eda9eae9430da3f6d18932d79127daa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 01:44:09 +0700 Subject: [PATCH 70/84] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 55ad44315..a28c6e951 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [YoutubeDL] Fix negation for string operators in format selection (#18961) + + version 2019.01.23 Core From a1e171233d86a064865353cc820969c10cb1f251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 01:46:23 +0700 Subject: [PATCH 71/84] release 2019.01.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index db3ebaeed..63aefe013 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.23 +[debug] youtube-dl version 2019.01.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a28c6e951..1fda747bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.24 Core * [YoutubeDL] Fix negation for string operators in format selection (#18961) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d77949eed..18c1f8d4c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.23' +__version__ = '2019.01.24' From 9713d1d1e0a7eff5c1b9873a2f4f054111a568ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 02:30:12 +0700 Subject: [PATCH 72/84] [openload] Add support for oload.club (closes #18969) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index cf51e4770..b713e78b8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club) ) )/ (?:f|embed)/ @@ -334,6 +334,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.fun/f/gb6G1H4sHXY', 'only_matching': True, + }, { + 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 118afcf52ff23726e5f0c436083710f5c63230fa Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 23 Jan 2019 22:16:21 +0100 Subject: [PATCH 73/84] [go] fix adobe pass requests for Disney Now(closes #18901) --- youtube_dl/extractor/go.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index ec9dd6e3a..206d89e82 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -25,15 +25,15 @@ class GoIE(AdobePassIE): }, 'watchdisneychannel': { 'brand': '004', - 'requestor_id': 'Disney', + 'resource_id': 'Disney', }, 'watchdisneyjunior': { 'brand': '008', - 'requestor_id': 'DisneyJunior', + 'resource_id': 'DisneyJunior', }, 'watchdisneyxd': { 'brand': '009', - 'requestor_id': 'DisneyXD', + 'resource_id': 'DisneyXD', } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ @@ -130,8 +130,8 @@ class GoIE(AdobePassIE): 'device': '001', } if video_data.get('accesslevel') == '1': - requestor_id = site_info['requestor_id'] - resource = self._get_mvpd_resource( + requestor_id = site_info.get('requestor_id', 'DisneyChannels') + resource = site_info.get('resource_id') or self._get_mvpd_resource( requestor_id, title, video_id, None) auth = self._extract_mvpd_auth( url, video_id, requestor_id, resource) From eb35b163adf61f8ff0ee6c504e98bc94db16e705 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 24 Jan 2019 20:23:04 +0100 Subject: [PATCH 74/84] [postprocessor/ffmpeg] fallback to ffmpeg/avconv for audio codec detection(closes #681) --- youtube_dl/postprocessor/ffmpeg.py | 53 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 39a905380..b952b0970 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -9,9 +9,6 @@ import re from .common import AudioConversionError, PostProcessor -from ..compat import ( - compat_subprocess_get_DEVNULL, -) from ..utils import ( encodeArgument, encodeFilename, @@ -165,27 +162,45 @@ class FFmpegPostProcessor(PostProcessor): return self._paths[self.probe_basename] def get_audio_codec(self, path): - if not self.probe_available: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + if not self.probe_available and not self.available: + raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.') try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self.probe_available: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams')] + else: + cmd = [ + encodeFilename(self.executable, True), + encodeArgument('-i')] + cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: + self._downloader.to_screen( + '[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen( + cmd, stderr=subprocess.PIPE, + stdout=subprocess.PIPE, stdin=subprocess.PIPE) + stdout_data, stderr_data = handle.communicate() + expected_ret = 0 if self.probe_available else 1 + if handle.wait() != expected_ret: return None except (IOError, OSError): return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec + output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore') + if self.probe_available: + audio_codec = None + for line in output.split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + else: + # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME + mobj = re.search( + r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)', + output) + if mobj: + return mobj.group(1) return None def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): From 0eba178fce80923515f4a9ac411e46648a19d78c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Jan 2019 04:04:58 +0700 Subject: [PATCH 75/84] [nhk] Extend _VALID_URL (closes #18968) --- youtube_dl/extractor/nhk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 5c8cd76dc..d4acbcc3e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -5,8 +5,8 @@ from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/(?:vod|ondemand)/(?P<id>[^/]+/[^/?#&]+)' + _TESTS = [{ # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', @@ -19,7 +19,10 @@ class NhkVodIE(InfoExtractor): 'episode': 'The Kimono as Global Fashion', }, 'skip': 'Videos available only for a limited period of time', - } + }, { + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', + 'only_matching': True, + }] _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): From 1602a240a7742f9e0a02b3f0effd215d00d859f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Jan 2019 04:16:49 +0700 Subject: [PATCH 76/84] [drtv] Fix extraction (closes #18989) --- youtube_dl/extractor/drtv.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index f757745ba..8d63ca433 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -77,10 +77,9 @@ class DRTVIE(InfoExtractor): r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') - programcard = self._download_json( - 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, - video_id, 'Downloading video JSON') - data = programcard['Data'][0] + data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id, + video_id, 'Downloading video JSON', query={'expanded': 'true'}) title = remove_end(self._og_search_title( webpage, default=None), ' | TV | DR') or data['Title'] @@ -97,7 +96,7 @@ class DRTVIE(InfoExtractor): formats = [] subtitles = {} - for asset in data['Assets']: + for asset in [data['PrimaryAsset']]: kind = asset.get('Kind') if kind == 'Image': thumbnail = asset.get('Uri') From ae18d58297c16a300a89693360efd19be4a97e92 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 25 Jan 2019 11:01:27 +0100 Subject: [PATCH 77/84] [usatoday] fix extraction for videos with custom brightcove partner id(closes #18990) --- youtube_dl/extractor/usatoday.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py index e5678dc78..b2103448d 100644 --- a/youtube_dl/extractor/usatoday.py +++ b/youtube_dl/extractor/usatoday.py @@ -3,21 +3,23 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, get_element_by_attribute, parse_duration, + try_get, update_url_query, - ExtractorError, ) from ..compat import compat_str class USATodayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)' - _TEST = { + _TESTS = [{ + # Brightcove Partner ID = 29906170001 'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/', - 'md5': '4d40974481fa3475f8bccfd20c5361f8', + 'md5': '033587d2529dc3411a1ab3644c3b8827', 'info_dict': { - 'id': '81729424', + 'id': '4799374959001', 'ext': 'mp4', 'title': 'US, France warn Syrian regime ahead of new peace talks', 'timestamp': 1457891045, @@ -25,8 +27,20 @@ class USATodayIE(InfoExtractor): 'uploader_id': '29906170001', 'upload_date': '20160313', } - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s' + }, { + # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001 + 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/', + 'info_dict': { + 'id': '5824495846001', + 'ext': 'mp4', + 'title': 'Yellowstone more likely to crack rather than explode', + 'timestamp': 1534790612, + 'description': 'md5:3715e7927639a4f16b474e9391687c62', + 'uploader_id': '28911775001', + 'upload_date': '20180820', + } + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) @@ -35,10 +49,11 @@ class USATodayIE(InfoExtractor): if not ui_video_data: raise ExtractorError('no video on the webpage', expected=True) video_data = self._parse_json(ui_video_data, display_id) + item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {} return { '_type': 'url_transparent', - 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'], + 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']), 'id': compat_str(video_data['id']), 'title': video_data['title'], 'thumbnail': video_data.get('thumbnail'), From 252abb1e8b881aa9d3942c436711ac33235b37cd Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sat, 26 Jan 2019 15:29:19 +0700 Subject: [PATCH 78/84] [README.md] Mention more convenience extraction functions --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ba982907..c1572f771 100644 --- a/README.md +++ b/README.md @@ -1213,7 +1213,7 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` -### Use safe conversion functions +### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. @@ -1221,6 +1221,8 @@ Use `url_or_none` for safe URL processing. Use `try_get` for safe metadata extraction from parsed JSON. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples From 845333acf6280761d19f91b3e018c418d922a0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 04:14:54 +0700 Subject: [PATCH 79/84] [wakanim] Add extractor (closes #14374) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wakanim.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/wakanim.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 574a47e6d..2ffcffa9e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1373,6 +1373,7 @@ from .vuclip import VuClipIE from .vvvvid import VVVVIDIE from .vyborymos import VyboryMosIE from .vzaar import VzaarIE +from .wakanim import WakanimIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py new file mode 100644 index 000000000..1d588bdd6 --- /dev/null +++ b/youtube_dl/extractor/wakanim.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + merge_dicts, + urljoin, +) + + +class WakanimIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' + _TEST = { + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', + 'info_dict': { + 'id': '2997', + 'ext': 'mp4', + 'title': 'Episode 02', + 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d', + 'series': 'The Asterisk War (OmU.)', + 'season_number': 1, + 'episode': 'Episode 02', + 'episode_number': 2, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + m3u8_url = urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url', + group='url')) + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + info = self._search_json_ld(webpage, video_id, default={}) + + title = self._search_regex( + (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, group='title') + + return merge_dicts(info, { + 'id': video_id, + 'title': title, + 'formats': formats, + }) From 458fd30f56785d514862dcb8a604a329d8e29ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 04:36:58 +0700 Subject: [PATCH 80/84] [extractor/common] Extract season in _json_ld --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95456b291..c4ea2882f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1249,7 +1249,10 @@ class InfoExtractor(object): info['title'] = episode_name part_of_season = e.get('partOfSeason') if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): - info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) + info.update({ + 'season': unescapeHTML(part_of_season.get('name')), + 'season_number': int_or_none(part_of_season.get('seasonNumber')), + }) part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) From 30cd1a5f3920d7485225a5d57b6ce41be4cde672 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Jan 2019 22:52:54 +0100 Subject: [PATCH 81/84] [wakanim] detect DRM protected videos --- youtube_dl/extractor/wakanim.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py index 1d588bdd6..f9a2395d9 100644 --- a/youtube_dl/extractor/wakanim.py +++ b/youtube_dl/extractor/wakanim.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, merge_dicts, urljoin, ) @@ -10,7 +11,7 @@ from ..utils import ( class WakanimIE(InfoExtractor): _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', 'info_dict': { 'id': '2997', @@ -26,7 +27,11 @@ class WakanimIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, - } + }, { + # DRM Protected + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -36,6 +41,12 @@ class WakanimIE(InfoExtractor): m3u8_url = urljoin(url, self._search_regex( r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url', group='url')) + # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls + encryption = self._search_regex( + r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', + m3u8_url, 'encryption', default=None) + if encryption and encryption in ('cenc', 'cbcs-aapl'): + raise ExtractorError('This video is DRM protected.', expected=True) formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', From 1fcc91663bc84a599cc613ff1fa0e4bc15f42a9e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Jan 2019 10:53:38 +0100 Subject: [PATCH 82/84] [vice] fix extraction for locked videos(closes #16248) --- youtube_dl/extractor/vice.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 538258617..8fdfd743d 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -94,7 +94,6 @@ class ViceIE(AdobePassIE): 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1', 'only_matching': True, }] - _PREPLAY_HOST = 'vms.vice' @staticmethod def _extract_urls(webpage): @@ -158,9 +157,8 @@ class ViceIE(AdobePassIE): }) try: - host = 'www.viceland' if is_locked else self._PREPLAY_HOST preplay = self._download_json( - 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id), + 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id), video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401): From bf8ebc9cfe1ae2b62baf2116f84748db03c4df7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 21:25:43 +0700 Subject: [PATCH 83/84] [ChangeLog] Actualize [ci skip] --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1fda747bb..9d5c25273 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version <unreleased> + +Core ++ [extractor/common] Extract season in _json_ld +* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection + (#681) + +Extractors +* [vice] Fix extraction for locked videos (#16248) ++ [wakanim] Detect DRM protected videos ++ [wakanim] Add support for wakanim.tv (#14374) +* [usatoday] Fix extraction for videos with custom brightcove partner id + (#18990) +* [drtv] Fix extraction (#18989) +* [nhk] Extend URL regular expression (#18968) +* [go] Fix Adobe Pass requests for Disney Now (#18901) ++ [openload] Add support for oload.club (#18969) + + version 2019.01.24 Core From e71be6ee9f239308765443d49d91358fa306e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 21:28:09 +0700 Subject: [PATCH 84/84] release 2019.01.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 4 +++- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 63aefe013..f529e3f4b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.24 +[debug] youtube-dl version 2019.01.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a71b045d0..6c1739860 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -339,7 +339,7 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` -### Use safe conversion functions +### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. @@ -347,6 +347,8 @@ Use `url_or_none` for safe URL processing. Use `try_get` for safe metadata extraction from parsed JSON. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/ChangeLog b/ChangeLog index 9d5c25273..d94fe36ec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.27 Core + [extractor/common] Extract season in _json_ld diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d759d0273..6377bf815 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1069,6 +1069,7 @@ - **VVVVID** - **VyboryMos** - **Vzaar** + - **Wakanim** - **Walla** - **WalyTV** - **washingtonpost** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 18c1f8d4c..ec89cfc64 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.24' +__version__ = '2019.01.27'