From 15ed5a27840e748d9f786c50b78a4c6326e9f186 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Tue, 20 Nov 2018 20:50:40 +0100 Subject: [PATCH 01/18] [nzz] Relax kaltura regex --- youtube_dl/extractor/nzz.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py index 2d352f53f..61ee77adb 100644 --- a/youtube_dl/extractor/nzz.py +++ b/youtube_dl/extractor/nzz.py @@ -11,20 +11,27 @@ from ..utils import ( class NZZIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', 'info_dict': { 'id': '9153', }, 'playlist_mincount': 6, - } + }, { + 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112', + 'info_dict': { + 'id': '1368112', + }, + 'playlist_count': 1, + }] def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) entries = [] - for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage): + for player_element in re.findall( + r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage): player_params = extract_attributes(player_element) if player_params.get('data-type') not in ('kaltura_singleArticle',): self.report_warning('Unsupported player type') From 05bd5e9c77e0e8acb95f47396be4c970fc9f39c4 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 26 Oct 2018 19:15:44 -0700 Subject: [PATCH 02/18] [ciscolive] Add extractor --- youtube_dl/extractor/ciscolive.py | 136 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 137 insertions(+) create mode 100644 youtube_dl/extractor/ciscolive.py diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py new file mode 100644 index 000000000..2db7aad2c --- /dev/null +++ b/youtube_dl/extractor/ciscolive.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, + compat_parse_qs +) +from ..utils import ( + clean_html, + int_or_none, + try_get, + urlencode_postdata, +) + + +class CiscoLiveIE(InfoExtractor): + IE_NAME = 'ciscolive' + _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' + _TESTS = [ + { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', + 'timestamp': 1530305395, + 'uploader_id': '5647924234001', + 'upload_date': '20180629', + 'location': '16B Mezz.', + }, + }, + { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'md5': '993d4cf051f6174059328b1dce8e94bd', + 'info_dict': { + 'upload_date': '20180629', + 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', + 'timestamp': 1530316421, + 'uploader_id': '5647924234001', + 'id': '5803751616001', + 'description': 'md5:5f144575cd6848117fe2f756855b038b', + 'location': 'WoS, DevNet Theater', + 'ext': 'mp4', + }, + }, + { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'md5': '80e0c3b87e373fe3a3316b934b8915bf', + 'info_dict': { + 'upload_date': '20180629', + 'title': 'Beating the CCIE Routing & Switching', + 'timestamp': 1530311842, + 'uploader_id': '5647924234001', + 'id': '5803735679001', + 'description': 'md5:e71970799e92d7f5ff57ae23f64b0929', + 'location': 'Tulúm 02', + 'ext': 'mp4', + }, + } + ] + + # These appear to be constant across all Cisco Live presentations + # and are not tied to any user session or event + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + + def _parse_rf_item(self, rf_item): + ''' Parses metadata and passes to Brightcove extractor ''' + event_name = rf_item.get('eventName') + title = rf_item['title'] + description = clean_html(rf_item.get('abstract')) + presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) + bc_id = rf_item['videos'][0]['url'] + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id + duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) + location = try_get(rf_item, lambda x: x['times'][0]['room']) + + if duration: + duration = duration * 60 + + return { + '_type': 'url_transparent', + 'creator': presenter_name, + 'description': description, + 'duration': duration, + 'ie_key': 'BrightcoveNew', + 'location': location, + 'series': event_name, + 'title': title, + 'url': bc_url, + } + + def _check_bc_id_exists(self, rf_item): + ''' Checks for the existence of a Brightcove URL in an API result ''' + bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) + if bc_id: + if bc_id.strip().isdigit(): + return rf_item + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + HEADERS = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, + 'rfWidgetId': self.RAINFOCUS_WIDGETID, + 'Referer': url, + } + # Single session URL (single video) + if mobj.group('id'): + rf_id = mobj.group('id') + request = self.RAINFOCUS_API_URL % 'session' + data = urlencode_postdata({'id': rf_id}) + rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS) + rf_item = self._check_bc_id_exists(rf_result['items'][0]) + return self._parse_rf_item(rf_item) + else: + # Filter query URL (multiple videos) + rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query)) + rf_query['type'] = 'session' + rf_query['size'] = 1000 + data = urlencode_postdata(rf_query) + request = self.RAINFOCUS_API_URL % 'search' + rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS) + entries = [ + self._parse_rf_item(rf_item) + for rf_item + in rf_results['sectionList'][0]['items'] + if self._check_bc_id_exists(rf_item) + ] + return self.playlist_result(entries, 'Filter query') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 87c7d8b0c..2c5988a14 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,6 +194,7 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE +from .ciscolive import CiscoLiveIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From 6a6d7f064178427d28986884524bd3434f0ca957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 21 Nov 2018 05:25:43 +0700 Subject: [PATCH 03/18] [ciscolive] Fix issues and improve extraction (closes #17984) --- youtube_dl/extractor/ciscolive.py | 176 ++++++++++++++--------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 87 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 2db7aad2c..32f645713 100644 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -1,84 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import re from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urllib_parse_urlparse, - compat_parse_qs ) from ..utils import ( clean_html, + float_or_none, int_or_none, try_get, urlencode_postdata, ) -class CiscoLiveIE(InfoExtractor): - IE_NAME = 'ciscolive' - _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' - _TESTS = [ - { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', - 'md5': 'c98acf395ed9c9f766941c70f5352e22', - 'info_dict': { - 'id': '5803694304001', - 'ext': 'mp4', - 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', - 'description': 'md5:ec4a436019e09a918dec17714803f7cc', - 'timestamp': 1530305395, - 'uploader_id': '5647924234001', - 'upload_date': '20180629', - 'location': '16B Mezz.', - }, - }, - { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', - 'md5': '993d4cf051f6174059328b1dce8e94bd', - 'info_dict': { - 'upload_date': '20180629', - 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', - 'timestamp': 1530316421, - 'uploader_id': '5647924234001', - 'id': '5803751616001', - 'description': 'md5:5f144575cd6848117fe2f756855b038b', - 'location': 'WoS, DevNet Theater', - 'ext': 'mp4', - }, - }, - { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', - 'md5': '80e0c3b87e373fe3a3316b934b8915bf', - 'info_dict': { - 'upload_date': '20180629', - 'title': 'Beating the CCIE Routing & Switching', - 'timestamp': 1530311842, - 'uploader_id': '5647924234001', - 'id': '5803735679001', - 'description': 'md5:e71970799e92d7f5ff57ae23f64b0929', - 'location': 'Tulúm 02', - 'ext': 'mp4', - }, - } - ] - +class CiscoLiveBaseIE(InfoExtractor): # These appear to be constant across all Cisco Live presentations # and are not tied to any user session or event RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' - RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' - RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + HEADERS = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID, + 'rfWidgetId': RAINFOCUS_WIDGET_ID, + } + + def _call_api(self, ep, rf_id, query, referrer): + headers = self.HEADERS.copy() + headers['Referer'] = referrer + return self._download_json( + self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query), + headers=headers) + def _parse_rf_item(self, rf_item): - ''' Parses metadata and passes to Brightcove extractor ''' event_name = rf_item.get('eventName') title = rf_item['title'] description = clean_html(rf_item.get('abstract')) presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) bc_id = rf_item['videos'][0]['url'] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id - duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) + duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) location = try_get(rf_item, lambda x: x['times'][0]['room']) if duration: @@ -86,51 +51,76 @@ class CiscoLiveIE(InfoExtractor): return { '_type': 'url_transparent', - 'creator': presenter_name, + 'url': bc_url, + 'ie_key': 'BrightcoveNew', + 'title': title, 'description': description, 'duration': duration, - 'ie_key': 'BrightcoveNew', + 'creator': presenter_name, 'location': location, 'series': event_name, - 'title': title, - 'url': bc_url, } - def _check_bc_id_exists(self, rf_item): - ''' Checks for the existence of a Brightcove URL in an API result ''' - bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) - if bc_id: - if bc_id.strip().isdigit(): - return rf_item + +class CiscoLiveSessionIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P[^/?&]+)' + _TEST = { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', + 'timestamp': 1530305395, + 'upload_date': '20180629', + 'uploader_id': '5647924234001', + 'location': '16B Mezz.', + }, + 'params': { + 'proxy': '127.0.0.1:8118', + } + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - HEADERS = { - 'Origin': 'https://ciscolive.cisco.com', - 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, - 'rfWidgetId': self.RAINFOCUS_WIDGETID, - 'Referer': url, + rf_id = self._match_id(url) + rf_result = self._call_api('session', rf_id, {'id': rf_id}, url) + return self._parse_rf_item(rf_result['items'][0]) + + +class CiscoLiveSearchIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'info_dict': { + 'title': 'Filter query', + }, + 'playlist_count': 5, + 'params': { + 'proxy': '127.0.0.1:8118', } - # Single session URL (single video) - if mobj.group('id'): - rf_id = mobj.group('id') - request = self.RAINFOCUS_API_URL % 'session' - data = urlencode_postdata({'id': rf_id}) - rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS) - rf_item = self._check_bc_id_exists(rf_result['items'][0]) - return self._parse_rf_item(rf_item) - else: - # Filter query URL (multiple videos) - rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query)) - rf_query['type'] = 'session' - rf_query['size'] = 1000 - data = urlencode_postdata(rf_query) - request = self.RAINFOCUS_API_URL % 'search' - rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS) - entries = [ - self._parse_rf_item(rf_item) - for rf_item - in rf_results['sectionList'][0]['items'] - if self._check_bc_id_exists(rf_item) - ] - return self.playlist_result(entries, 'Filter query') + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + + @staticmethod + def _check_bc_id_exists(rf_item): + return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + + def _real_extract(self, url): + rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + rf_query['type'] = 'session' + rf_query['size'] = 1000 + rf_results = self._call_api('search', None, rf_query, url) + entries = [ + self._parse_rf_item(rf_item) + for rf_item + in rf_results['sectionList'][0]['items'] + if self._check_bc_id_exists(rf_item) + ] + return self.playlist_result(entries, playlist_title='Filter query') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2c5988a14..60e6175b1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,7 +194,10 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE -from .ciscolive import CiscoLiveIE +from .ciscolive import ( + CiscoLiveSessionIE, + CiscoLiveSearchIE, +) from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From 183417a50fd68c0c63b1d0621c6a0b44fbf2ac52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 21 Nov 2018 06:04:34 +0700 Subject: [PATCH 04/18] [ciscolive:search] Add support for pagination --- youtube_dl/extractor/ciscolive.py | 58 ++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 32f645713..c99b6ee58 100644 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools + from .common import InfoExtractor from ..compat import ( compat_parse_qs, @@ -29,12 +31,12 @@ class CiscoLiveBaseIE(InfoExtractor): 'rfWidgetId': RAINFOCUS_WIDGET_ID, } - def _call_api(self, ep, rf_id, query, referrer): + def _call_api(self, ep, rf_id, query, referrer, note=None): headers = self.HEADERS.copy() headers['Referer'] = referrer return self._download_json( - self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query), - headers=headers) + self.RAINFOCUS_API_URL % ep, rf_id, note=note, + data=urlencode_postdata(query), headers=headers) def _parse_rf_item(self, rf_item): event_name = rf_item.get('eventName') @@ -77,9 +79,6 @@ class CiscoLiveSessionIE(CiscoLiveBaseIE): 'uploader_id': '5647924234001', 'location': '16B Mezz.', }, - 'params': { - 'proxy': '127.0.0.1:8118', - } } def _real_extract(self, url): @@ -93,12 +92,9 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', 'info_dict': { - 'title': 'Filter query', + 'title': 'Search query', }, 'playlist_count': 5, - 'params': { - 'proxy': '127.0.0.1:8118', - } }, { 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', 'only_matching': True, @@ -112,15 +108,35 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): def _check_bc_id_exists(rf_item): return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + def _entries(self, query, url): + query['size'] = 50 + query['from'] = 0 + for page_num in itertools.count(1): + results = self._call_api( + 'search', None, query, url, + 'Downloading search JSON page %d' % page_num) + sl = try_get(results, lambda x: x['sectionList'][0], dict) + if sl: + results = sl + items = results.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + if not self._check_bc_id_exists(item): + continue + yield self._parse_rf_item(item) + size = int_or_none(results.get('size')) + if size is not None: + query['size'] = size + total = int_or_none(results.get('total')) + if total is not None and query['from'] + query['size'] > total: + break + query['from'] += query['size'] + def _real_extract(self, url): - rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - rf_query['type'] = 'session' - rf_query['size'] = 1000 - rf_results = self._call_api('search', None, rf_query, url) - entries = [ - self._parse_rf_item(rf_item) - for rf_item - in rf_results['sectionList'][0]['items'] - if self._check_bc_id_exists(rf_item) - ] - return self.playlist_result(entries, playlist_title='Filter query') + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query['type'] = 'session' + return self.playlist_result( + self._entries(query, url), playlist_title='Search query') From 6c882aa8991383e1c39a6457cbde5dcab260bff5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Nov 2018 09:44:10 +0100 Subject: [PATCH 05/18] [loc] relax _VALID_URL regex and improve formats extraction --- youtube_dl/extractor/libraryofcongress.py | 37 +++++++++++++++-------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 40295a30b..1e5c82c66 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -16,7 +16,7 @@ from ..utils import ( class LibraryOfCongressIE(InfoExtractor): IE_NAME = 'loc' IE_DESC = 'Library of Congress' - _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9a-z_.]+)' _TESTS = [{ # embedded via
.+?)\1', r']+id=(["\'])uuid-(?P.+?)\1', r']+data-uuid=(["\'])(?P.+?)\1', - r'mediaObjectId\s*:\s*(["\'])(?P.+?)\1'), + r'mediaObjectId\s*:\s*(["\'])(?P.+?)\1', + r'data-tab="share-media-(?P[0-9A-F]{32})"'), webpage, 'media id', group='id') data = self._download_json( 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, - video_id)['mediaObject'] + media_id)['mediaObject'] derivative = data['derivatives'][0] media_url = derivative['derivativeUrl'] @@ -89,25 +96,29 @@ class LibraryOfCongressIE(InfoExtractor): if ext not in ('mp4', 'mp3'): media_url += '.mp4' if is_video else '.mp3' - if 'vod/mp4:' in media_url: - formats = [{ - 'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8', + formats = [] + if '/vod/mp4:' in media_url: + formats.append({ + 'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8', 'format_id': 'hls', 'ext': 'mp4', 'protocol': 'm3u8_native', 'quality': 1, - }] - elif 'vod/mp3:' in media_url: - formats = [{ - 'url': media_url.replace('vod/mp3:', ''), - 'vcodec': 'none', - }] + }) + http_format = { + 'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url), + 'format_id': 'http', + 'quality': 1, + } + if not is_video: + http_format['vcodec'] = 'none' + formats.append(http_format) download_urls = set() for m in re.finditer( r']+value=(["\'])(?P.+?)\1[^>]+data-file-download=[^>]+>\s*(?P.+?)(?:(?: |\s+)\((?P.+?)\))?\s*<', webpage): format_id = m.group('id').lower() - if format_id == 'gif': + if format_id in ('gif', 'jpeg'): continue download_url = m.group('url') if download_url in download_urls: From 35328915b5fe5c8915b924cfbc54bbdd6d6d1430 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Nov 2018 09:46:13 +0100 Subject: [PATCH 06/18] [foxsports] fix extraction(closes #17543) --- youtube_dl/extractor/foxsports.py | 17 +++-------------- youtube_dl/extractor/theplatform.py | 5 +++-- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index 985542727..596fded20 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -8,7 +8,7 @@ from ..utils import ( class FoxSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P\d+)' _TEST = { 'url': 'http://www.foxsports.com/tennessee/video/432609859715', @@ -28,16 +28,5 @@ class FoxSportsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - config = self._parse_json( - self._html_search_regex( - r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""", - webpage, 'data player config'), - video_id) - - return self.url_result(smuggle_url(update_url_query( - config['releaseURL'], { - 'mbr': 'true', - 'switch': 'http', - }), {'force_smil_url': True})) + return self.url_result( + 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed') diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 181620615..90b351cbb 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -343,7 +343,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None): real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) entry = self._download_json(real_url, video_id)['entries'][0] - main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None + main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl') formats = [] subtitles = {} @@ -356,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration')) - for asset_type in item['plfile$assetTypes']: + file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes'] + for asset_type in file_asset_types: if asset_type in asset_types: continue asset_types.append(asset_type) From 4e33e0792a3e134b494bd71f257a674294cca8d9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Nov 2018 12:00:50 +0100 Subject: [PATCH 07/18] [loc] update test --- youtube_dl/extractor/libraryofcongress.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 1e5c82c66..03f205144 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -20,12 +20,11 @@ class LibraryOfCongressIE(InfoExtractor): _TESTS = [{ # embedded via
Date: Wed, 21 Nov 2018 12:08:46 +0100 Subject: [PATCH 08/18] [foxsports] update test --- youtube_dl/extractor/foxsports.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index 596fded20..2b2cb6c6f 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -1,10 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - smuggle_url, - update_url_query, -) class FoxSportsIE(InfoExtractor): @@ -14,14 +10,19 @@ class FoxSportsIE(InfoExtractor): 'url': 'http://www.foxsports.com/tennessee/video/432609859715', 'md5': 'b49050e955bebe32c301972e4012ac17', 'info_dict': { - 'id': 'bwduI3X_TgUB', + 'id': '432609859715', 'ext': 'mp4', 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', 'description': 'Courtney Lee talks about Memphis being focused.', - 'upload_date': '20150423', - 'timestamp': 1429761109, + # TODO: fix timestamp + 'upload_date': '19700101', # '20150423', + # 'timestamp': 1429761109, 'uploader': 'NEWA-FNG-FOXSPORTS', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['ThePlatform'], } From a843464a7e0608b679651f913cbd9447a7b928c0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Nov 2018 12:10:06 +0100 Subject: [PATCH 09/18] [nbc] fix NBCNews article extraction(closes #16194) --- youtube_dl/extractor/nbc.py | 91 ++++++++----------------------------- 1 file changed, 19 insertions(+), 72 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 765c46fd2..3282f84ee 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,10 +9,8 @@ from .theplatform import ThePlatformIE from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( - find_xpath_attr, smuggle_url, try_get, - unescapeHTML, update_url_query, int_or_none, ) @@ -269,27 +267,14 @@ class CSNNEIE(InfoExtractor): class NBCNewsIE(ThePlatformIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ - (?:video/.+?/(?P\d+)| - ([^/]+/)*(?:.*-)?(?P[^/?]+)) - ''' + _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P[^/?]+)' _TESTS = [ - { - 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', - 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', - 'info_dict': { - 'id': '52753292', - 'ext': 'flv', - 'title': 'Crew emerges after four-month Mars food study', - 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', - }, - }, { 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'md5': 'af1adfa51312291a017720403826bb64', 'info_dict': { - 'id': 'p_tweet_snow_140529', + 'id': '269389891880', 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', @@ -313,7 +298,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'md5': '73135a2e0ef819107bbb55a5a9b2a802', 'info_dict': { - 'id': 'nn_netcast_150204', + 'id': '394064451844', 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', @@ -326,7 +311,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'md5': 'a49e173825e5fcd15c13fc297fced39d', 'info_dict': { - 'id': 'x_lon_vwhorn_150922', + 'id': '529953347624', 'ext': 'mp4', 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', 'description': 'md5:c8be487b2d80ff0594c005add88d8351', @@ -339,7 +324,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', 'md5': '118d7ca3f0bea6534f119c68ef539f71', 'info_dict': { - 'id': 'tdy_al_space_160420', + 'id': '669831235788', 'ext': 'mp4', 'title': 'See the aurora borealis from space in stunning new NASA video', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', @@ -352,7 +337,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', 'info_dict': { - 'id': 'n_hayes_Aimm_140801_272214', + 'id': '314487875924', 'ext': 'mp4', 'title': 'The chaotic GOP immigration vote', 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', @@ -374,60 +359,22 @@ class NBCNewsIE(ThePlatformIE): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - if video_id is not None: - all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) - info = all_info.find('video') - - return { - 'id': video_id, - 'title': info.find('headline').text, - 'ext': 'flv', - 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, - 'description': info.find('caption').text, - 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, - } - else: - # "feature" and "nightly-news" pages use theplatform.com - video_id = mobj.group('mpx_id') + video_id = self._match_id(url) + if not video_id.isdigit(): webpage = self._download_webpage(url, video_id) - filter_param = 'byId' - bootstrap_json = self._search_regex( - [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', - r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"', - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'], - webpage, 'bootstrap json', default=None) - if bootstrap_json: - bootstrap = self._parse_json( - bootstrap_json, video_id, transform_source=unescapeHTML) + data = self._parse_json(self._search_regex( + r'window\.__data\s*=\s*({.+});', webpage, + 'bootstrap json'), video_id) + video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id'] - info = None - if 'results' in bootstrap: - info = bootstrap['results'][0]['video'] - elif 'video' in bootstrap: - info = bootstrap['video'] - elif 'msnbcVideoInfo' in bootstrap: - info = bootstrap['msnbcVideoInfo']['meta'] - elif 'msnbcThePlatform' in bootstrap: - info = bootstrap['msnbcThePlatform']['videoPlayer']['video'] - else: - info = bootstrap - - if 'guid' in info: - video_id = info['guid'] - filter_param = 'byGuid' - elif 'mpxId' in info: - video_id = info['mpxId'] - - return { - '_type': 'url_transparent', - 'id': video_id, - # http://feed.theplatform.com/f/2E2eJC/nbcnews also works - 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}), - 'ie_key': 'ThePlatformFeed', - } + return { + '_type': 'url_transparent', + 'id': video_id, + # http://feed.theplatform.com/f/2E2eJC/nbcnews also works + 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}), + 'ie_key': 'ThePlatformFeed', + } class NBCOlympicsIE(InfoExtractor): From af60e81e3c557ace943aab35c1364d3d03d5a3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Nov 2018 02:00:38 +0700 Subject: [PATCH 10/18] [setup.py] Add more relevant classifiers --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index 7dbb5805f..a1a08f1e2 100644 --- a/setup.py +++ b/setup.py @@ -124,6 +124,8 @@ setup( 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'License :: Public Domain', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', @@ -132,6 +134,12 @@ setup( 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: Implementation', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: IronPython', + 'Programming Language :: Python :: Implementation :: Jython', + 'Programming Language :: Python :: Implementation :: PyPy', ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From bd2d553c7b1529f793c2b7343c514a558543fc0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Nov 2018 02:01:22 +0700 Subject: [PATCH 11/18] [travis] Add python 3.7 build --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 92f326860..1ea640071 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,12 @@ env: - YTDL_TEST_SET=download matrix: include: + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - env: JYTHON=true; YTDL_TEST_SET=download fast_finish: true From 157eef3e635230cbba0dd0c74f7115029867533e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Nov 2018 02:08:41 +0700 Subject: [PATCH 12/18] [setup.py] Add python 3.8 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a1a08f1e2..dfb669ad2 100644 --- a/setup.py +++ b/setup.py @@ -135,6 +135,7 @@ setup( 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: IronPython', From 305ce767d586e8796d873270abf771e69ff5586c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Nov 2018 02:34:35 +0700 Subject: [PATCH 13/18] [travis] Add python 3.8-dev build --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 1ea640071..79287ccf6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,12 @@ matrix: - python: 3.7 dist: xenial env: YTDL_TEST_SET=download + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - env: JYTHON=true; YTDL_TEST_SET=download fast_finish: true From 560020da3049bec19e5714e9e24fc90fadd06582 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Nov 2018 23:19:46 +0100 Subject: [PATCH 14/18] [mixcloud] fallback to hardcoded decryption key(closes #18016) --- youtube_dl/extractor/mixcloud.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index b7bccb504..a2d19d3ef 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -161,11 +161,17 @@ class MixcloudIE(InfoExtractor): stream_info = info_json['streamInfo'] formats = [] + def decrypt_url(f_url): + for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): + decrypted_url = self._decrypt_xor_cipher(k, compat_b64decode(f_url)) + if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): + return decrypted_url + for url_key in ('url', 'hlsUrl', 'dashUrl'): format_url = stream_info.get(url_key) if not format_url: continue - decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url)) + decrypted = decrypt_url(format_url) if not decrypted: continue if url_key == 'hlsUrl': From 6f2883a2df45ca89d272bc8a0975f09758af5eb3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Nov 2018 23:25:38 +0100 Subject: [PATCH 15/18] [mixcloud] base64 decode before decryption --- youtube_dl/extractor/mixcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index a2d19d3ef..bcac13ec5 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -163,7 +163,7 @@ class MixcloudIE(InfoExtractor): def decrypt_url(f_url): for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): - decrypted_url = self._decrypt_xor_cipher(k, compat_b64decode(f_url)) + decrypted_url = self._decrypt_xor_cipher(k, f_url) if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): return decrypted_url @@ -171,7 +171,7 @@ class MixcloudIE(InfoExtractor): format_url = stream_info.get(url_key) if not format_url: continue - decrypted = decrypt_url(format_url) + decrypted = decrypt_url(compat_b64decode(format_url)) if not decrypted: continue if url_key == 'hlsUrl': From 66173211c4177d36612486acfd99fc4634b8004e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Nov 2018 00:14:43 +0700 Subject: [PATCH 16/18] [ChangeLog] Actualize [ci skip] --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 0083c4631..beb002041 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core ++ [setup.py] Add more relevant classifiers + +Extractors +* [mixcloud] Fallback to hardcoded decryption key (#18016) +* [nbc:news] Fix article extraction (#16194) +* [foxsports] Fix extraction (#17543) +* [loc] Relax regular expression and improve formats extraction ++ [ciscolive] Add support for ciscolive.cisco.com (#17984) +* [nzz] Relax kaltura regex (#18228) +* [sixplay] Fix formats extraction +* [bitchute] Improve title extraction +* [kaltura] Limit requested MediaEntry fields ++ [americastestkitchen] Add support for zype embeds (#18225) ++ [pornhub] Add pornhub.net alias +* [nova:embed] Fix extraction (#18222) + + version 2018.11.18 Extractors From d861a9d5814408973e0715bb9160fb7db34fbcd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Nov 2018 00:16:45 +0700 Subject: [PATCH 17/18] release 2018.11.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 905576364..35cc8d6d0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.11.18 +[debug] youtube-dl version 2018.11.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index beb002041..f82c7ea35 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.11.23 Core + [setup.py] Add more relevant classifiers diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9009f7e9e..7d72ad82d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -163,6 +163,8 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** + - **CiscoLiveSearch** + - **CiscoLiveSession** - **CJSW** - **cliphunter** - **Clippit** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7f5ad7bf4..4956365d0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.11.18' +__version__ = '2018.11.23' From 6864855eb111dbf6e0efe9ed086f48efa1d9f209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Nov 2018 00:43:42 +0700 Subject: [PATCH 18/18] [tests] Fix invalid escape sequences --- test/test_compat.py | 2 +- test/test_postprocessors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index d6c54e135..51fe6aa0b 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase): def test_compat_expanduser(self): old_home = os.environ.get('HOME') - test_str = 'C:\Documents and Settings\тест\Application Data' + test_str = r'C:\Documents and Settings\тест\Application Data' compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) compat_setenv('HOME', old_home or '') diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index addb69d6f..4209d1d9a 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP class TestMetadataFromTitle(unittest.TestCase): def test_format_to_regex(self): pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s') - self.assertEqual(pp._titleregex, '(?P.+)\ \-\ (?P<artist>.+)') + self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')