From 3d08f63dc57f000384703f26b7dcb4b683e18c05 Mon Sep 17 00:00:00 2001 From: Mohammed Yaseen Mowzer Date: Thu, 14 Jun 2018 17:12:33 +0200 Subject: [PATCH 0001/1201] [generic] Skip unsuccessful jwplayer extraction (closes #16735) --- youtube_dl/extractor/generic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 229dfda1b..1db154c4f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3150,9 +3150,13 @@ class GenericIE(InfoExtractor): jwplayer_data = self._find_jwplayer_data( webpage, video_id, transform_source=js_to_json) if jwplayer_data: - info = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, base_url=url) - return merge_dicts(info, info_dict) + try: + info = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=url) + return merge_dicts(info, info_dict) + except ExtractorError: + # See https://github.com/rg3/youtube-dl/pull/16735 + pass # Video.js embed mobj = re.search( From 0a9a8118ce834c206434df04c18187934acbf608 Mon Sep 17 00:00:00 2001 From: Hormoz K Date: Sat, 4 Aug 2018 09:47:58 -0400 Subject: [PATCH 0002/1201] [radiojavan] Fix extraction --- youtube_dl/extractor/radiojavan.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index a53ad97a5..4124bcd45 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -6,11 +6,13 @@ from .common import InfoExtractor from ..utils import ( unified_strdate, str_to_int, + urlencode_postdata, ) class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' + _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', @@ -31,8 +33,18 @@ class RadioJavanIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + download_host = self._download_json( + self._HOST_TRACKER_URL, + video_id, + data=urlencode_postdata({'id': video_id}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + } + )['host'] + formats = [{ - 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, + 'url': '%s/%s' % (download_host, video_path), 'format_id': '%sp' % height, 'height': int(height), } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] From 93284ff2ea4eb84c25b8d496012398a056ba89ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 3 Sep 2018 02:53:26 +0700 Subject: [PATCH 0003/1201] [radiojavan] Improve extraction (closes #17151) --- youtube_dl/extractor/radiojavan.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 4124bcd45..3f74f0c01 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -4,15 +4,16 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, + parse_resolution, str_to_int, + unified_strdate, urlencode_postdata, + urljoin, ) class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' - _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', @@ -31,23 +32,26 @@ class RadioJavanIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - download_host = self._download_json( - self._HOST_TRACKER_URL, - video_id, + 'https://www.radiojavan.com/videos/video_host', video_id, data=urlencode_postdata({'id': video_id}), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url, - } - )['host'] + }).get('host', 'https://host1.rjmusicmedia.com') - formats = [{ - 'url': '%s/%s' % (download_host, video_path), - 'format_id': '%sp' % height, - 'height': int(height), - } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + webpage = self._download_webpage(url, video_id) + + formats = [] + for format_id, _, video_path in re.findall( + r'RJ\.video(?P\d+[pPkK])\s*=\s*(["\'])(?P(?:(?!\2).)+)\2', + webpage): + f = parse_resolution(format_id) + f.update({ + 'url': urljoin(download_host, video_path), + 'format_id': format_id, + }) + formats.append(f) self._sort_formats(formats) title = self._og_search_title(webpage) From aa1d5eb9053b4c39661415ef411fb3746ee11f41 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 4 Sep 2018 10:37:11 +0100 Subject: [PATCH 0004/1201] [slideslive] make the check for video_service_name case-insensitive(closes #17429) --- youtube_dl/extractor/slideslive.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index 104576033..ed84322c5 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -8,6 +8,7 @@ from ..utils import ExtractorError class SlidesLiveIE(InfoExtractor): _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' _TESTS = [{ + # video_service_name = YOUTUBE 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', 'info_dict': { @@ -19,14 +20,18 @@ class SlidesLiveIE(InfoExtractor): 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'upload_date': '20170925', } + }, { + # video_service_name = youtube + 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( url, video_id, headers={'Accept': 'application/json'}) - service_name = video_data['video_service_name'] - if service_name == 'YOUTUBE': + service_name = video_data['video_service_name'].lower() + if service_name == 'youtube': yt_video_id = video_data['video_service_id'] return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) else: From 09322cccdb655954c63724459556bd3ebb585d48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Sep 2018 00:22:30 +0700 Subject: [PATCH 0005/1201] [iprima] Confirm adult check (closes #17437) --- youtube_dl/extractor/iprima.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index a29e6a5ba..3c4b7e48b 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -38,6 +38,8 @@ class IPrimaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1') + webpage = self._download_webpage(url, video_id) video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') From 2d4fe594c64249c263d739c8094b445c67fa09f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Sep 2018 00:51:20 +0700 Subject: [PATCH 0006/1201] [pornhub:uservideos] Add support for new URLs (closes #17388) --- youtube_dl/extractor/pornhub.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ffc4405a8..6782848d9 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -254,7 +254,7 @@ class PornHubIE(InfoExtractor): self._sort_formats(formats) video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) view_count = self._extract_count( @@ -346,7 +346,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P[^/]+)/videos' + _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' _TESTS = [{ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { @@ -378,6 +378,12 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'only_matching': True, }] def _real_extract(self, url): From 9a47fa35dd9dd2d53f4d3f088811ea29295991e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 03:36:10 +0700 Subject: [PATCH 0007/1201] [youtube] Fix extraction (closes #17457, closes #17464) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0442906df..27047425d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1178,7 +1178,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\.sig\|\|(?P[a-zA-Z0-9$]+)\('), + r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From a41a506077936b2574cbb34afa83f8215accc3a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 03:40:06 +0700 Subject: [PATCH 0008/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4632133a3..b378f0a0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version + +Extractors +* [youtube] Fix extraction (#17457, #17464) ++ [pornhub:uservideos] Add support for new URLs (#17388) +* [iprima] Confirm adult check (#17437) +* [slideslive] Make check for video service name case-insensitive (#17429) +* [radiojavan] Fix extraction (#17151) +* [generic] Skip unsuccessful jwplayer extraction (#16735) + + version 2018.09.01 Core From ad98d2eb748b593dc044296493904607c3058e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 03:42:28 +0700 Subject: [PATCH 0009/1201] release 2018.09.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 16d8f36d7..2d67247e6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.01 +[debug] youtube-dl version 2018.09.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b378f0a0e..ac95c9b71 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.08 Extractors * [youtube] Fix extraction (#17457, #17464) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e655e0050..716d9ffe0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.01' +__version__ = '2018.09.08' From d0c5fabc1215fddd354fd91cd65082e45df1165e Mon Sep 17 00:00:00 2001 From: Timendum Date: Sat, 8 Sep 2018 09:44:06 +0200 Subject: [PATCH 0010/1201] [nbc] Fix extraction of percent encoded URLs (closes #17374) --- youtube_dl/extractor/nbc.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index c843f8649..765c46fd2 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE from .adobepass import AdobePassIE +from ..compat import compat_urllib_parse_unquote from ..utils import ( find_xpath_attr, smuggle_url, @@ -75,11 +76,16 @@ class NBCIE(AdobePassIE): 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'only_matching': True, }, + { + # Percent escaped url + 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189', + 'only_matching': True, + } ] def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() - permalink = 'http' + permalink + permalink = 'http' + compat_urllib_parse_unquote(permalink) response = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ 'filter[permalink]': permalink, From d0de6a287ac64b52630e5b21c3db52a4312675fd Mon Sep 17 00:00:00 2001 From: Jens Rutschmann Date: Sat, 1 Sep 2018 19:43:34 +0200 Subject: [PATCH 0011/1201] [tele5] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tele5.py | 39 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/tele5.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 995af9988..7dc569724 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1086,6 +1086,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py new file mode 100644 index 000000000..cfa8d2475 --- /dev/null +++ b/youtube_dl/extractor/tele5.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .nexx import NexxIE + + +class Tele5IE(InfoExtractor): + _VALID_URL = r'https://www\.tele5\.de/(?:mediathek/filme-online/videos\?vid=|tv/)(?P[\w-]+)' + + _TESTS = [{ + 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', + 'info_dict': { + 'id': '1550589', + 'ext': 'mp4', + 'upload_date': '20180822', + 'timestamp': 1534927316, + 'title': 'SchleFaZ: Atomic Shark', + } + }, { + 'url': 'https://www.tele5.de/tv/dark-matter/videos', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'id\s*=\s*["\']video-player["\']\s*data-id\s*=\s*["\']([0-9]+)["\']', + webpage, 'video_id') + + return self.url_result( + 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, + ie=NexxIE.ie_key(), video_id=video_id) From ae2384ff5f9d6a18209139a4b2e2cd33921be63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 16:04:39 +0700 Subject: [PATCH 0012/1201] [tele5] Improve extraction (closes #7805, closes #7922, closes #17331, closes #17414) --- youtube_dl/extractor/tele5.py | 39 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index cfa8d2475..25573e49f 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -1,38 +1,43 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from .nexx import NexxIE +from ..compat import compat_urlparse class Tele5IE(InfoExtractor): - _VALID_URL = r'https://www\.tele5\.de/(?:mediathek/filme-online/videos\?vid=|tv/)(?P[\w-]+)' - + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P[^?#&]+)' _TESTS = [{ - 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', + 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', 'info_dict': { - 'id': '1550589', + 'id': '1549416', 'ext': 'mp4', - 'upload_date': '20180822', - 'timestamp': 1534927316, - 'title': 'SchleFaZ: Atomic Shark', - } + 'upload_date': '20180814', + 'timestamp': 1534290623, + 'title': 'Pandorum', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', + 'only_matching': True, }, { 'url': 'https://www.tele5.de/tv/dark-matter/videos', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] - webpage = self._download_webpage(url, display_id) - - video_id = self._html_search_regex( - r'id\s*=\s*["\']video-player["\']\s*data-id\s*=\s*["\']([0-9]+)["\']', - webpage, 'video_id') + if not video_id: + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)', + webpage, 'video id') return self.url_result( 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, From 6f9f3340bb4c837d86067856e12232fc9695e698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:09 +0700 Subject: [PATCH 0013/1201] [dtube] PEP 8 (#17455) --- youtube_dl/extractor/dtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 4ca97f860..5887887e1 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor): try: self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) self._downloader._opener.open(video_url, timeout=5).close() - except timeout as e: + except timeout: self.to_screen( '%s: %s URL is invalid, skipping' % (video_id, format_id)) continue From 13ef64fd9340794e214ba64c258fa0999fbd0a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:34 +0700 Subject: [PATCH 0014/1201] [motherless] PEP 8 (#17455) --- youtube_dl/extractor/motherless.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index bed5645f2..d4bd273b6 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -167,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor): if not entries: entries = [ self.url_result( - compat_urlparse.urljoin(base, '/' + video_id), - ie=MotherlessIE.ie_key(), video_id=video_id) - for video_id in orderedSet(re.findall( + compat_urlparse.urljoin(base, '/' + entry_id), + ie=MotherlessIE.ie_key(), video_id=entry_id) + for entry_id in orderedSet(re.findall( r'data-codename=["\']([A-Z0-9]+)', webpage))] return entries From 2c9d3b9962e28b61615e382bfcaf6db6d6987c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:48 +0700 Subject: [PATCH 0015/1201] [seznamzpravy] PEP 8 (#17455) --- youtube_dl/extractor/seznamzpravy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py index 6d4e3b76d..7a1c7e38b 100644 --- a/youtube_dl/extractor/seznamzpravy.py +++ b/youtube_dl/extractor/seznamzpravy.py @@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor): description = info.get('description') or self._og_search_description(webpage) return self.playlist_result([ - self.url_result(url, ie=SeznamZpravyIE.ie_key()) - for url in SeznamZpravyIE._extract_urls(webpage)], + self.url_result(entry_url, ie=SeznamZpravyIE.ie_key()) + for entry_url in SeznamZpravyIE._extract_urls(webpage)], article_id, title, description) From 2e4350eec6981ffb12617abda7623e4fb565ce8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Sep 2018 17:24:59 +0700 Subject: [PATCH 0016/1201] [generic] PEP 8 (#17455) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1db154c4f..76ef01332 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3112,7 +3112,7 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key()) - sharevideos_urls = [mobj.group('url') for mobj in re.finditer( + sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer( r']+?\bsrc\s*=\s*(["\'])(?P(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', webpage)] if sharevideos_urls: From a2637a2dda5e90b83bd2f403d06ad1ce21fc8c5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 01:34:49 +0700 Subject: [PATCH 0017/1201] [iprima] Add support for prima.iprima.cz (closes #17514) --- youtube_dl/extractor/iprima.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 3c4b7e48b..1d58d6e85 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -12,7 +12,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P[^?#]+)' + _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P[^?#]+)' _GEO_BYPASS = False _TESTS = [{ @@ -33,6 +33,14 @@ class IPrimaIE(InfoExtractor): # geo restricted 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', 'only_matching': True, + }, { + # iframe api.play-backend.iprima.cz + 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2', + 'only_matching': True, + }, { + # iframe prima.iprima.cz + 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', + 'only_matching': True, }] def _real_extract(self, url): @@ -42,7 +50,10 @@ class IPrimaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') + video_id = self._search_regex( + (r']+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', + r'data-product="([^"]+)">'), + webpage, 'real id') playerpage = self._download_webpage( 'http://play.iprima.cz/prehravac/init', From 25d110be30b92f785617140b0617a73d8eec5f7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 02:37:22 +0700 Subject: [PATCH 0018/1201] [utils] Properly recognize AV1 codec (closes #17506) --- test/test_utils.py | 4 ++++ youtube_dl/utils.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8da5ccc56..9e28e008f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -785,6 +785,10 @@ class TestUtil(unittest.TestCase): 'vcodec': 'h264', 'acodec': 'aac', }) + self.assertEqual(parse_codecs('av01.0.05M.08'), { + 'vcodec': 'av01.0.05M.08', + 'acodec': 'none', + }) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bcfb72d43..e84d35d4d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2477,7 +2477,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): From 14f577e31c3d11e84bda5048d11bc7c31a830e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 02:45:44 +0700 Subject: [PATCH 0019/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index ac95c9b71..c02450bf4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version + +Core ++ [utils] Properly recognize AV1 codec (#17506) + +Extractors ++ [iprima] Add support for prima.iprima.cz (#17514) ++ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414) +* [nbc] Fix extraction of percent encoded URLs (#17374) + + version 2018.09.08 Extractors From 8476b4fd91ce71c052a3bdea9e010e5196331606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Sep 2018 02:48:37 +0700 Subject: [PATCH 0020/1201] release 2018.09.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2d67247e6..f41266f32 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.08 +[debug] youtube-dl version 2018.09.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c02450bf4..d184f69ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.10 Core + [utils] Properly recognize AV1 codec (#17506) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f0d6be901..9b8601751 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -847,6 +847,7 @@ - **techtv.mit.edu** - **ted** - **Tele13** + - **Tele5** - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 716d9ffe0..b078c4993 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.08' +__version__ = '2018.09.10' From 96dbf70de63c08fc27e9bd7bec97670325225758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Sep 2018 02:24:32 +0700 Subject: [PATCH 0021/1201] [eporner] Extract JSON-LD (closes #17519) --- youtube_dl/extractor/eporner.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 6d03d7095..c050bf9df 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -9,6 +9,7 @@ from ..utils import ( encode_base_n, ExtractorError, int_or_none, + merge_dicts, parse_duration, str_to_int, url_or_none, @@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor): 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', 'ext': 'mp4', 'title': 'Infamous Tiffany Teen Strip Tease Video', + 'description': 'md5:764f39abf932daafa37485eb46efa152', + 'timestamp': 1232520922, + 'upload_date': '20090121', 'duration': 1838, 'view_count': int, 'age_limit': 18, }, + 'params': { + 'proxy': '127.0.0.1:8118' + } }, { # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', @@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor): }) self._sort_formats(formats) - duration = parse_duration(self._html_search_meta('duration', webpage)) + json_ld = self._search_json_ld(webpage, display_id, default={}) + + duration = parse_duration(self._html_search_meta( + 'duration', webpage, default=None)) view_count = str_to_int(self._search_regex( r'id="cinemaviews">\s*([0-9,]+)\s*views', webpage, 'view count', fatal=False)) - return { + return merge_dicts(json_ld, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, - } + }) From 79facb27735baf27d65baf050121d68e26654a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Sep 2018 02:29:45 +0700 Subject: [PATCH 0022/1201] [tube8] Fix metadata extraction (closes #17520) --- youtube_dl/extractor/tube8.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 368c45729..db93b0182 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE): r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( - r'>Description:\s*(.+?)\s*<', webpage, 'description', fatal=False) + r'(?s)Description:\s*
(.+?)
', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'\s*(.+?)\s*<', webpage, 'uploader', fatal=False) @@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE): dislike_count = int_or_none(self._search_regex( r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) view_count = str_to_int(self._search_regex( - r'Views: ([\d,\.]+)\s*', + r'Views:\s*\s*
([\d,\.]+)', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._search_regex( r'(\d+)', webpage, 'comment count', fatal=False)) category = self._search_regex( - r'Category:\s*\s*]+href=[^>]+>([^<]+)', + r'Category:\s*\s*
\s*]+href=[^>]+>([^<]+)', webpage, 'category', fatal=False) categories = [category] if category else None tags_str = self._search_regex( - r'(?s)Tags:\s*(.+?)\s*
(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None From f5b0175349aa85a1b366d12a487ebd66aec5e1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Sep 2018 02:41:05 +0700 Subject: [PATCH 0023/1201] [vzaar] Add support for HLS --- youtube_dl/extractor/vzaar.py | 60 +++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 02fcd52c7..6000671c3 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -4,15 +4,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, float_or_none, + unified_timestamp, + url_or_none, ) class VzaarIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' _TESTS = [{ + # HTTP and HLS 'url': 'https://vzaar.com/videos/1152805', 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', 'info_dict': { @@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor): video_id = self._match_id(url) video_data = self._download_json( 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - source_url = video_data['sourceUrl'] - info = { + title = video_data['videoTitle'] + + formats = [] + + source_url = url_or_none(video_data.get('sourceUrl')) + if source_url: + f = { + 'url': source_url, + 'format_id': 'http', + } + if 'audio' in source_url: + f.update({ + 'vcodec': 'none', + 'ext': 'mp3', + }) + else: + f.update({ + 'width': int_or_none(video_data.get('width')), + 'height': int_or_none(video_data.get('height')), + 'ext': 'mp4', + 'fps': float_or_none(video_data.get('fps')), + }) + formats.append(f) + + video_guid = video_data.get('guid') + usp = video_data.get('usp') + if isinstance(video_guid, compat_str) and isinstance(usp, dict): + m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?' + % (video_guid, video_id)) + '&'.join( + '%s=%s' % (k, v) for k, v in usp.items()) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + + return { 'id': video_id, - 'title': video_data['videoTitle'], - 'url': source_url, + 'title': title, 'thumbnail': self._proto_relative_url(video_data.get('poster')), 'duration': float_or_none(video_data.get('videoDuration')), + 'timestamp': unified_timestamp(video_data.get('ts')), + 'formats': formats, } - if 'audio' in source_url: - info.update({ - 'vcodec': 'none', - 'ext': 'mp3', - }) - else: - info.update({ - 'width': int_or_none(video_data.get('width')), - 'height': int_or_none(video_data.get('height')), - 'ext': 'mp4', - }) - return info From db348e88495bd87adcf058bb6c0a26c1fc591f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Sep 2018 23:26:38 +0700 Subject: [PATCH 0024/1201] [twitch:clips] Extend _VALID_URL (closes #17559) --- youtube_dl/extractor/twitch.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index b39972b1e..26661fdf6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -559,7 +559,8 @@ class TwitchStreamIE(TwitchBaseIE): TwitchAllVideosIE, TwitchUploadsIE, TwitchPastBroadcastsIE, - TwitchHighlightsIE)) + TwitchHighlightsIE, + TwitchClipsIE)) else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): @@ -633,7 +634,7 @@ class TwitchStreamIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE): IE_NAME = 'twitch:clips' - _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', @@ -653,6 +654,9 @@ class TwitchClipsIE(TwitchBaseIE): # multiple formats 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', + 'only_matching': True, }] def _real_extract(self, url): From 0f2aa0dcaa5ebffbca781f8ecd15c4d52a80f011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Sep 2018 23:56:03 +0700 Subject: [PATCH 0025/1201] [asiancrush] Fix extraction (closes #15630) --- youtube_dl/extractor/asiancrush.py | 42 ++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 594c88c9c..6d71c5ad5 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -8,7 +8,6 @@ from .kaltura import KalturaIE from ..utils import ( extract_attributes, remove_end, - urlencode_postdata, ) @@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id, - data=urlencode_postdata({ - 'postid': video_id, - 'action': 'get_channel_kaltura_vars', - })) + webpage = self._download_webpage(url, video_id) - entry_id = data['entry_id'] + entry_id, partner_id, title = [None] * 3 + + vars = self._parse_json( + self._search_regex( + r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', + default='{}'), video_id, fatal=False) + if vars: + entry_id = vars.get('entry_id') + partner_id = vars.get('partner_id') + title = vars.get('vid_label') + + if not entry_id: + entry_id = self._search_regex( + r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') + + player = self._download_webpage( + 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + query={'id': entry_id}) + + kaltura_id = self._search_regex( + r'entry_id["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', player, + 'kaltura id', group='id') + + if not partner_id: + partner_id = self._search_regex( + r'/p(?:artner_id)?/(\d+)', player, 'partner id', + default='513551') return self.url_result( - 'kaltura:%s:%s' % (data['partner_id'], entry_id), - ie=KalturaIE.ie_key(), video_id=entry_id, - video_title=data.get('vid_label')) + 'kaltura:%s:%s' % (partner_id, kaltura_id), + ie=KalturaIE.ie_key(), video_id=kaltura_id, + video_title=title) class AsianCrushPlaylistIE(InfoExtractor): From 15bf2ca0da0ae94dbc6c3b76c89910ceaea88bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 00:42:27 +0700 Subject: [PATCH 0026/1201] [porntube] Fix extraction (closes #17541) --- youtube_dl/extractor/fourtube.py | 94 ++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index ad273a0e7..20391b2bb 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,15 +3,44 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_b64decode, + compat_str, + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import ( + int_or_none, parse_duration, parse_iso8601, str_to_int, + try_get, + unified_timestamp, + url_or_none, ) class FourTubeBaseIE(InfoExtractor): + _TKN_HOST = 'tkn.kodicdn.com' + + def _extract_formats(self, url, video_id, media_id, sources): + token_url = 'https://%s/%s/desktop/%s' % ( + self._TKN_HOST, media_id, '+'.join(sources)) + + parsed_url = compat_urlparse.urlparse(url) + tokens = self._download_json(token_url, video_id, data=b'', headers={ + 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Referer': url, + }) + formats = [{ + 'url': tokens[format]['token'], + 'format_id': format + 'p', + 'resolution': format + 'p', + 'quality': int(format), + } for format in sources] + self._sort_formats(formats) + return formats + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') @@ -68,21 +97,7 @@ class FourTubeBaseIE(InfoExtractor): media_id = params[0] sources = ['%s' % p for p in params[2]] - token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format( - media_id, '+'.join(sources)) - - parsed_url = compat_urlparse.urlparse(url) - tokens = self._download_json(token_url, video_id, data=b'', headers={ - 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), - 'Referer': url, - }) - formats = [{ - 'url': tokens[format]['token'], - 'format_id': format + 'p', - 'resolution': format + 'p', - 'quality': int(format), - } for format in sources] - self._sort_formats(formats) + formats = self._extract_formats(url, video_id, media_id, sources) return { 'id': video_id, @@ -164,6 +179,7 @@ class FuxIE(FourTubeBaseIE): class PornTubeIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?porntube\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' + _TKN_HOST = 'tkn.porntube.com' _TESTS = [{ 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', 'info_dict': { @@ -171,13 +187,12 @@ class PornTubeIE(FourTubeBaseIE): 'ext': 'mp4', 'title': 'Teen couple doing anal', 'uploader': 'Alexy', - 'uploader_id': 'Alexy', + 'uploader_id': '91488', 'upload_date': '20150606', 'timestamp': 1433595647, 'duration': 5052, 'view_count': int, 'like_count': int, - 'categories': list, 'age_limit': 18, }, 'params': { @@ -191,6 +206,49 @@ class PornTubeIE(FourTubeBaseIE): 'only_matching': True, }] + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video = self._parse_json( + self._search_regex( + r'INITIALSTATE\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data', group='value'), video_id, + transform_source=lambda x: compat_urllib_parse_unquote( + compat_b64decode(x).decode('utf-8')))['page']['video'] + + title = video['title'] + media_id = video['mediaId'] + sources = [compat_str(e['height']) + for e in video['encodings'] if e.get('height')] + formats = self._extract_formats(url, video_id, media_id, sources) + + thumbnail = url_or_none(video.get('masterThumb')) + uploader = try_get(video, lambda x: x['user']['username'], compat_str) + uploader_id = compat_str(try_get(video, lambda x: x['user']['id'], int)) + like_count = int_or_none(video.get('likes')) + dislike_count = int_or_none(video.get('dislikes')) + view_count = int_or_none(video.get('playsQty')) + duration = int_or_none(video.get('durationInSeconds')) + timestamp = unified_timestamp(video.get('publishedAt')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'timestamp': timestamp, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'view_count': view_count, + 'duration': duration, + 'age_limit': 18, + } + class PornerBrosIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' From 6f1f59f39cf934d5ccae3ca2e6fd4eaab726137f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:23:36 +0700 Subject: [PATCH 0027/1201] [extractor/common] Introduce channel meta fields --- youtube_dl/extractor/common.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b8bbaf81a..8eab5947f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -211,6 +211,11 @@ class InfoExtractor(object): If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader. + channel: Full name of the channel the video is uploaded on. + Note that channel fields may or may noy repeat uploader + fields. This depends on a particular extractor. + channel_id: Id of the channel. + channel_url: Full URL to a channel webpage. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format {tag: subformats}. "tag" is usually a language code, and From dd4c449219976ad6e80fef74a3431c5553cf5031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:24:26 +0700 Subject: [PATCH 0028/1201] [youtube] Extract channel meta fields (closes #9676, closes #12939) --- youtube_dl/extractor/youtube.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 27047425d..2fe074cb4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -490,6 +490,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', + 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', + 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -1907,6 +1909,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self._downloader.report_warning('unable to extract uploader nickname') + channel_id = self._html_search_meta( + 'channelId', video_webpage, 'channel id') + channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None + # thumbnail image # We try first to get a high quality image: m_thumb = re.search(r'', @@ -2078,6 +2084,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': video_uploader, 'uploader_id': video_uploader_id, 'uploader_url': video_uploader_url, + 'channel_id': channel_id, + 'channel_url': channel_url, 'upload_date': upload_date, 'license': video_license, 'creator': video_creator or artist, From d03beddf0f0b460d1b58c98173e2d4620d1d11b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:24:48 +0700 Subject: [PATCH 0029/1201] [vimeo] Extract channel meta fields --- youtube_dl/extractor/vimeo.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index e49b233f2..95d368cc1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', + 'channel_id': 'keypeele', + 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele', 'timestamp': 1380339469, 'upload_date': '20130928', 'duration': 187, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://vimeo.com/76979871', @@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/channels/tributes/6213729', 'info_dict': { 'id': '6213729', - 'ext': 'mov', + 'ext': 'mp4', 'title': 'Vimeo Tribute: The Shining', 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', + 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes', + 'channel_id': 'tributes', 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', @@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor): if 'Referer' not in headers: headers['Referer'] = url + channel_id = self._search_regex( + r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) + # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -652,6 +660,8 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') + channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None + info_dict = { 'id': video_id, 'formats': formats, @@ -662,6 +672,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, + 'channel_id': channel_id, + 'channel_url': channel_url, } info_dict = merge_dicts(info_dict, info_dict_config, json_ld) From 127103b643c5105bd361c5e3be267ac40843ccdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:25:32 +0700 Subject: [PATCH 0030/1201] [porntube] Extract channel meta fields --- youtube_dl/extractor/fourtube.py | 33 +++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index 20391b2bb..a9a1f911e 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, parse_duration, parse_iso8601, + str_or_none, str_to_int, try_get, unified_timestamp, @@ -198,6 +199,26 @@ class PornTubeIE(FourTubeBaseIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406', + 'info_dict': { + 'id': '1331406', + 'ext': 'mp4', + 'title': 'Squirting Teen Ballerina on ECG', + 'uploader': 'Exploited College Girls', + 'uploader_id': '665', + 'channel': 'Exploited College Girls', + 'channel_id': '665', + 'upload_date': '20130920', + 'timestamp': 1379685485, + 'duration': 851, + 'view_count': int, + 'like_count': int, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.porntube.com/embed/7089759', 'only_matching': True, @@ -227,7 +248,11 @@ class PornTubeIE(FourTubeBaseIE): thumbnail = url_or_none(video.get('masterThumb')) uploader = try_get(video, lambda x: x['user']['username'], compat_str) - uploader_id = compat_str(try_get(video, lambda x: x['user']['id'], int)) + uploader_id = str_or_none(try_get( + video, lambda x: x['user']['id'], int)) + channel = try_get(video, lambda x: x['channel']['name'], compat_str) + channel_id = str_or_none(try_get( + video, lambda x: x['channel']['id'], int)) like_count = int_or_none(video.get('likes')) dislike_count = int_or_none(video.get('dislikes')) view_count = int_or_none(video.get('playsQty')) @@ -239,8 +264,10 @@ class PornTubeIE(FourTubeBaseIE): 'title': title, 'formats': formats, 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, + 'uploader': uploader or channel, + 'uploader_id': uploader_id or channel_id, + 'channel': channel, + 'channel_id': channel_id, 'timestamp': timestamp, 'like_count': like_count, 'dislike_count': dislike_count, From 0e7b8d3eaca710fbae8aa5531e28a6b6f05b387a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 01:53:01 +0700 Subject: [PATCH 0031/1201] [extractor/common] Fix typos --- youtube_dl/extractor/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8eab5947f..2dbf81e6e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -212,7 +212,7 @@ class InfoExtractor(object): uploader_id: Nickname or id of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader. channel: Full name of the channel the video is uploaded on. - Note that channel fields may or may noy repeat uploader + Note that channel fields may or may not repeat uploader fields. This depends on a particular extractor. channel_id: Id of the channel. channel_url: Full URL to a channel webpage. @@ -1706,9 +1706,9 @@ class InfoExtractor(object): # However, this is not always respected, for example, [2] # contains EXT-X-STREAM-INF tag which references AUDIO # rendition group but does not have CODECS and despite - # referencing audio group an audio group, it represents - # a complete (with audio and video) format. So, for such cases - # we will ignore references to rendition groups and treat them + # referencing an audio group it represents a complete + # (with audio and video) format. So, for such cases we will + # ignore references to rendition groups and treat them # as complete formats. if audio_group_id and codecs and f.get('vcodec') != 'none': audio_group = groups.get(audio_group_id) From 3661ebf2b6e0cb951dca5b4498a1d6885cbf7b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Sep 2018 02:04:43 +0700 Subject: [PATCH 0032/1201] [pornhub] Extract upload date (closes #17574) --- youtube_dl/extractor/pornhub.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 6782848d9..19eaf389f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor): 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', + 'upload_date': '20130628', 'duration': 361, 'view_count': int, 'like_count': int, @@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor): 'ext': 'mp4', 'title': '重庆婷婷女王足交', 'uploader': 'Unknown', + 'upload_date': '20150213', 'duration': 1753, 'view_count': int, 'like_count': int, @@ -237,8 +239,14 @@ class PornHubIE(InfoExtractor): video_urls.append((video_url, None)) video_urls_set.add(video_url) + upload_date = None formats = [] for video_url, height in video_urls: + if not upload_date: + upload_date = self._search_regex( + r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) + if upload_date: + upload_date = upload_date.replace('/', '') tbr = None mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url) if mobj: @@ -278,6 +286,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, 'uploader': video_uploader, + 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, 'duration': duration, From 8b40c92724453995701b75559b190075d6dbfd7d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 15 Sep 2018 06:30:57 +0100 Subject: [PATCH 0033/1201] [vimeo] redirect to feature url only in the case of a trailer(closes #14591) --- youtube_dl/extractor/vimeo.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 95d368cc1..0a9239b62 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -571,19 +571,23 @@ class VimeoIE(VimeoBaseInfoExtractor): if config.get('view') == 4: config = self._verify_player_video_password(redirect_url, video_id) + vod = config.get('video', {}).get('vod', {}) + def is_rented(): if '>You rented this title.<' in webpage: return True if config.get('user', {}).get('purchased'): return True - label = try_get( - config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) - if label and label.startswith('You rented this'): - return True + for purchase_option in vod.get('purchase_options', []): + if purchase_option.get('purchased'): + return True + label = purchase_option.get('label_string') + if label and (label.startswith('You rented this') or label.endswith(' remaining')): + return True return False - if is_rented(): - feature_id = config.get('video', {}).get('vod', {}).get('feature_id') + if is_rented() and vod.get('is_trailer'): + feature_id = vod.get('feature_id') if feature_id and not data.get('force_feature_id', False): return self.url_result(smuggle_url( 'https://player.vimeo.com/player/%s' % feature_id, From d9b0d118adfa365e8ed67de277f5e83e29c4635c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 16 Sep 2018 23:52:27 +0700 Subject: [PATCH 0034/1201] [vrv] Make format ids deterministic --- youtube_dl/extractor/vrv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 921e9e172..ac0819c7c 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -90,7 +90,13 @@ class VRVIE(VRVBaseIE): def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): if not url or stream_format not in ('hls', 'dash'): return [] - stream_id = hardsub_lang or audio_lang + assert audio_lang or hardsub_lang + stream_id_list = [] + if audio_lang: + stream_id_list.append('audio-%s' % audio_lang) + if hardsub_lang: + stream_id_list.append('hardsub-%s' % hardsub_lang) + stream_id = '-'.join(stream_id_list) format_id = '%s-%s' % (stream_format, stream_id) if stream_format == 'hls': adaptive_formats = self._extract_m3u8_formats( From 1084563eaab3a039ea8ccb65f2507e919d8e11f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 16 Sep 2018 23:54:25 +0700 Subject: [PATCH 0035/1201] [crunchyroll] Prefer hardsubless formats and formats in locale language --- youtube_dl/extractor/crunchyroll.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ba8b9fa7e..af786d096 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -445,6 +445,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage, 'vilos media', default='{}'), video_id) media_metadata = media.get('metadata') or {} + language = self._search_regex( + r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'language', default=None, group='lang') + video_title = self._html_search_regex( r'(?s)]*>((?:(?!]+itemprop=["\']title["\'][^>]*>(?:(?!', webpage, 'video_title') @@ -466,9 +470,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text formats = [] for stream in media.get('streams', []): - formats.extend(self._extract_vrv_formats( + audio_lang = stream.get('audio_lang') + hardsub_lang = stream.get('hardsub_lang') + vrv_formats = self._extract_vrv_formats( stream.get('url'), video_id, stream.get('format'), - stream.get('audio_lang'), stream.get('hardsub_lang'))) + audio_lang, hardsub_lang) + for f in vrv_formats: + if not hardsub_lang: + f['preference'] = 1 + language_preference = 0 + if audio_lang == language: + language_preference += 1 + if hardsub_lang == language: + language_preference += 1 + if language_preference: + f['language_preference'] = language_preference + formats.extend(vrv_formats) if not formats: available_fmts = [] for a, fmt in re.findall(r'(]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): @@ -557,7 +574,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'ext': 'flv', }) formats.append(format_info) - self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) + self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps')) metadata = self._call_rpc_api( 'VideoPlayer_GetMediaMetadata', video_id, From c11485162bcbf6f517fd9225850a048fc5f7cec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:13:39 +0700 Subject: [PATCH 0036/1201] [youtube] Don't pollute default query dict (closes #17593) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2fe074cb4..e80e36f98 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -259,7 +259,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return True def _download_webpage_handle(self, *args, **kwargs): - kwargs.setdefault('query', {})['disable_polymer'] = 'true' + query = kwargs.get('query', {}).copy() + query['disable_polymer'] = 'true' + kwargs['query'] = query return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) From e2f61598be49b75f16b501f7a4d24f4c8c230c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:14:28 +0700 Subject: [PATCH 0037/1201] [twitch] Don't pollute default headers dict --- youtube_dl/extractor/twitch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 26661fdf6..401615683 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -51,7 +51,9 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _call_api(self, path, item_id, *args, **kwargs): - kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID + headers = kwargs.get('headers', {}).copy() + headers['Client-ID'] = self._CLIENT_ID + kwargs['headers'] = headers response = self._download_json( '%s/%s' % (self._API_BASE, path), item_id, *args, **compat_kwargs(kwargs)) From c8f6ab8c38f8397b06028cdea2839d6e25e2c809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:14:53 +0700 Subject: [PATCH 0038/1201] [udemy] Don't pollute default headers dict --- youtube_dl/extractor/udemy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 79c45f80e..105826e9b 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -122,7 +122,9 @@ class UdemyIE(InfoExtractor): raise ExtractorError(error_str, expected=True) def _download_webpage_handle(self, *args, **kwargs): - kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + headers = kwargs.get('headers', {}).copy() + headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + kwargs['headers'] = headers return super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) From e504b0907082512d5252643d7fd8c5ca67225e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Sep 2018 22:15:27 +0700 Subject: [PATCH 0039/1201] [adobepass] Don't pollute default headers dict --- youtube_dl/extractor/adobepass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index b83b51efb..1cf2dcbf3 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1325,8 +1325,8 @@ class AdobePassIE(InfoExtractor): _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' def _download_webpage_handle(self, *args, **kwargs): - headers = kwargs.get('headers', {}) - headers.update(self.geo_verification_headers()) + headers = self.geo_verification_headers() + headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers return super(AdobePassIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) From d9b1cec1712c39d7e98a4ca44051643eebef8b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 18 Sep 2018 01:44:55 +0700 Subject: [PATCH 0040/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index d184f69ee..b2b2d90a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version + +Core ++ [extractor/common] Introduce channel meta fields + +Extractors +* [adobepass] Don't pollute default headers dict +* [udemy] Don't pollute default headers dict +* [twitch] Don't pollute default headers dict +* [youtube] Don't pollute default query dict (#17593) +* [crunchyroll] Prefer hardsubless formats and formats in locale language +* [vrv] Make format ids deterministic +* [vimeo] Fix ondemand playlist extraction (#14591) ++ [pornhub] Extract upload date (#17574) ++ [porntube] Extract channel meta fields ++ [vimeo] Extract channel meta fields ++ [youtube] Extract channel meta fields (#9676, #12939) +* [porntube] Fix extraction (#17541) +* [asiancrush] Fix extraction (#15630) ++ [twitch:clips] Extend URL regular expression (closes #17559) ++ [vzaar] Add support for HLS +* [tube8] Fix metadata extraction (#17520) +* [eporner] Extract JSON-LD (#17519) + + version 2018.09.10 Core From 3a9c928426258e51c74515b09f7c6f565d60efe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 18 Sep 2018 01:46:36 +0700 Subject: [PATCH 0041/1201] release 2018.09.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f41266f32..a4602287a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.10 +[debug] youtube-dl version 2018.09.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b2b2d90a1..800ece790 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.09.18 Core + [extractor/common] Introduce channel meta fields diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b078c4993..2b3b584a4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.10' +__version__ = '2018.09.18' From 28fcb7b06178cebfc361f0d1c491ea9507b0d75e Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Wed, 19 Sep 2018 04:48:39 +0200 Subject: [PATCH 0042/1201] [raiplay:playlist] Remove a debug leftover print() --- youtube_dl/extractor/rai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index f916b2619..548a6553b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -274,7 +274,6 @@ class RaiPlayPlaylistIE(InfoExtractor): ('programma', 'nomeProgramma'), webpage, 'title') description = unescapeHTML(self._html_search_meta( ('description', 'og:description'), webpage, 'description')) - print(description) entries = [] for mobj in re.finditer( From 4ac73fc170f41ccb2d468a0b8563b2ec39d9e9b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 19 Sep 2018 22:16:43 +0700 Subject: [PATCH 0043/1201] [popcorntv] Remove debug output --- youtube_dl/extractor/popcorntv.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/popcorntv.py b/youtube_dl/extractor/popcorntv.py index ac901f426..9f834fb6c 100644 --- a/youtube_dl/extractor/popcorntv.py +++ b/youtube_dl/extractor/popcorntv.py @@ -58,8 +58,6 @@ class PopcornTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) timestamp = unified_timestamp(self._html_search_meta( 'uploadDate', webpage, 'timestamp')) - print(self._html_search_meta( - 'duration', webpage)) duration = int_or_none(self._html_search_meta( 'duration', webpage), invscale=60) view_count = int_or_none(self._html_search_meta( From 21160a179256cfbb859948138905ffe67cb970a8 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Sun, 23 Sep 2018 16:34:47 +0200 Subject: [PATCH 0044/1201] [zattoo] Fix extraction (closes #17175) --- youtube_dl/extractor/zattoo.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index fb167c198..9c9024799 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -93,28 +93,30 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( - '%s/zapi/program/details' % self._HOST_URL, + '%s/zapi/v2/cached/program/power_details/%s' % ( + self._HOST_URL, self._power_guide_hash), video_id, 'Downloading video information', query={ - 'program_id': video_id, - 'complete': True + 'program_ids': video_id, + 'complete': True, }) - p = data['program'] + p = data['programs'][0] cid = p['cid'] info_dict = { 'id': video_id, - 'title': p.get('title') or p['episode_title'], - 'description': p.get('description'), - 'thumbnail': p.get('image_url'), + 'title': p.get('t') or p['et'], + 'description': p.get('d'), + 'thumbnail': p.get('i_url'), 'creator': p.get('channel_name'), - 'episode': p.get('episode_title'), - 'episode_number': int_or_none(p.get('episode_number')), - 'season_number': int_or_none(p.get('season_number')), + 'episode': p.get('et'), + 'episode_number': int_or_none(p.get('e_no')), + 'season_number': int_or_none(p.get('s_no')), 'release_year': int_or_none(p.get('year')), - 'categories': try_get(p, lambda x: x['categories'], list), + 'categories': try_get(p, lambda x: x['c'], list), + 'tags': try_get(p, lambda x: x['g'], list) } return cid, info_dict From f6d7f7b474075955afb02c373b8d2a442faaa0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Sep 2018 23:30:18 +0700 Subject: [PATCH 0045/1201] [zattoo] Add support for more zattoo platform sites --- youtube_dl/extractor/extractors.py | 12 ++ youtube_dl/extractor/zattoo.py | 178 ++++++++++++++++++++++++++--- 2 files changed, 172 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7dc569724..464c8d690 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1455,8 +1455,20 @@ from .youtube import ( from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zattoo import ( + BBVTVIE, + EinsUndEinsTVIE, + EWETVIE, + GlattvisionTVIE, + MNetTVIE, + MyVisionTVIE, + NetPlusIE, + OsnatelTVIE, + QuantumTVIE, QuicklineIE, QuicklineLiveIE, + SAKTVIE, + VTXTVIE, + WalyTVIE, ZattooIE, ZattooLiveIE, ) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 9c9024799..bbe0aecb6 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -18,12 +18,12 @@ from ..utils import ( ) -class ZattooBaseIE(InfoExtractor): - _NETRC_MACHINE = 'zattoo' - _HOST_URL = 'https://zattoo.com' - +class ZattooPlatformBaseIE(InfoExtractor): _power_guide_hash = None + def _host_url(self): + return 'https://%s' % self._HOST + def _login(self): username, password = self._get_login_info() if not username or not password: @@ -33,13 +33,13 @@ class ZattooBaseIE(InfoExtractor): try: data = self._download_json( - '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in', + '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in', data=urlencode_postdata({ 'login': username, 'password': password, 'remember': 'true', }), headers={ - 'Referer': '%s/login' % self._HOST_URL, + 'Referer': '%s/login' % self._host_url(), 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }) except ExtractorError as e: @@ -53,7 +53,7 @@ class ZattooBaseIE(InfoExtractor): def _real_initialize(self): webpage = self._download_webpage( - self._HOST_URL, None, 'Downloading app token') + self._host_url(), None, 'Downloading app token') app_token = self._html_search_regex( r'appToken\s*=\s*(["\'])(?P(?:(?!\1).)+?)\1', webpage, 'app token', group='token') @@ -62,7 +62,7 @@ class ZattooBaseIE(InfoExtractor): # Will setup appropriate cookies self._request_webpage( - '%s/zapi/v2/session/hello' % self._HOST_URL, None, + '%s/zapi/v2/session/hello' % self._host_url(), None, 'Opening session', data=urlencode_postdata({ 'client_app_token': app_token, 'uuid': compat_str(uuid4()), @@ -75,7 +75,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( - '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL, + '%s/zapi/v2/cached/channels/%s' % (self._host_url(), self._power_guide_hash), video_id, 'Downloading channel list', query={'details': False})['channel_groups'] @@ -94,7 +94,7 @@ class ZattooBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( '%s/zapi/v2/cached/program/power_details/%s' % ( - self._HOST_URL, self._power_guide_hash), + self._host_url(), self._power_guide_hash), video_id, 'Downloading video information', query={ @@ -128,11 +128,11 @@ class ZattooBaseIE(InfoExtractor): if is_live: postdata_common.update({'timeshift': 10800}) - url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid) + url = '%s/zapi/watch/live/%s' % (self._host_url(), cid) elif record_id: - url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id) + url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id) else: - url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id) + url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id) formats = [] for stream_type in ('dash', 'hls', 'hls5', 'hds'): @@ -203,13 +203,13 @@ class ZattooBaseIE(InfoExtractor): return info_dict -class QuicklineBaseIE(ZattooBaseIE): +class QuicklineBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'quickline' - _HOST_URL = 'https://mobiltv.quickline.com' + _HOST = 'mobiltv.quickline.com' class QuicklineIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P[^/]+)/(?P[0-9]+)' % re.escape(QuicklineBaseIE._HOST) _TEST = { 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', @@ -222,7 +222,7 @@ class QuicklineIE(QuicklineBaseIE): class QuicklineLiveIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P[^/]+)' % re.escape(QuicklineBaseIE._HOST) _TEST = { 'url': 'https://mobiltv.quickline.com/watch/srf1', @@ -238,8 +238,18 @@ class QuicklineLiveIE(QuicklineBaseIE): return self._extract_video(channel_name, video_id, is_live=True) +class ZattooBaseIE(ZattooPlatformBaseIE): + _NETRC_MACHINE = 'zattoo' + _HOST = 'zattoo.com' + + +def _make_valid_url(tmpl, host): + return tmpl % re.escape(host) + + class ZattooIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P[^/]+?)/(?P[0-9]+)[^/]+(?:/(?P[0-9]+))?' + _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST) # Since regular videos are only available for 7 days and recorded videos # are only available for a specific user, we cannot have detailed tests. @@ -271,3 +281,135 @@ class ZattooLiveIE(ZattooBaseIE): def _real_extract(self, url): channel_name = video_id = self._match_id(url) return self._extract_video(channel_name, video_id, is_live=True) + + +class NetPlusIE(ZattooIE): + _NETRC_MACHINE = 'netplus' + _HOST = 'netplus.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.netplus.tv/watch/abc/123-abc', + 'only_matching': True, + }] + + +class MNetTVIE(ZattooIE): + _NETRC_MACHINE = 'mnettv' + _HOST = 'tvplus.m-net.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class WalyTVIE(ZattooIE): + _NETRC_MACHINE = 'walytv' + _HOST = 'player.waly.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.player.waly.tv/watch/abc/123-abc', + 'only_matching': True, + }] + + +class BBVTVIE(ZattooIE): + _NETRC_MACHINE = 'bbvtv' + _HOST = 'bbv-tv.net' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.bbv-tv.net/watch/abc/123-abc', + 'only_matching': True, + }] + + +class VTXTVIE(ZattooIE): + _NETRC_MACHINE = 'vtxtv' + _HOST = 'vtxtv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.vtxtv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class MyVisionTVIE(ZattooIE): + _NETRC_MACHINE = 'myvisiontv' + _HOST = 'myvisiontv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.myvisiontv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class GlattvisionTVIE(ZattooIE): + _NETRC_MACHINE = 'glattvisiontv' + _HOST = 'iptv.glattvision.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class SAKTVIE(ZattooIE): + _NETRC_MACHINE = 'saktv' + _HOST = 'saktv.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.saktv.ch/watch/abc/123-abc', + 'only_matching': True, + }] + + +class EWETVIE(ZattooIE): + _NETRC_MACHINE = 'ewetv' + _HOST = 'tvonline.ewe.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class QuantumTVIE(ZattooIE): + _NETRC_MACHINE = 'quantumtv' + _HOST = 'quantum-tv.com' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.quantum-tv.com/watch/abc/123-abc', + 'only_matching': True, + }] + + +class OsnatelTVIE(ZattooIE): + _NETRC_MACHINE = 'osnateltv' + _HOST = 'onlinetv.osnatel.de' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', + 'only_matching': True, + }] + + +class EinsUndEinsTVIE(ZattooIE): + _NETRC_MACHINE = '1und1tv' + _HOST = '1und1.tv' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://www.1und1.tv/watch/abc/123-abc', + 'only_matching': True, + }] From cd5a74a28e7102e877d00b1e3cffba82d28b2f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Sep 2018 00:14:49 +0700 Subject: [PATCH 0046/1201] [youtube] Add support for invidio.us (closes #17613) --- youtube_dl/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e80e36f98..78203ef84 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -349,6 +349,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| + (?:www\.)?invidio\.us/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1068,6 +1069,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 'only_matching': True, }, + { + 'url': 'https://invidio.us/watch?v=BaW_jenozKc', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -2419,7 +2424,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com channels' - _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P[0-9A-Za-z_-]+)' + _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P[0-9A-Za-z_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' _VIDEO_RE = r'(?:title="(?P[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?' IE_NAME = 'youtube:channel' @@ -2440,6 +2445,9 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, + }, { + 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', + 'only_matching': True, }] @classmethod From 60ce0c67fd1ef71463af2c036bbabf06ec26bd98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 25 Sep 2018 23:43:41 +0700 Subject: [PATCH 0047/1201] [README.md] Document channel meta fields for output template --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index dd068a462..fdd115c9b 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date (YYYYMMDD) - `uploader_id` (string): Nickname or id of the video uploader + - `channel` (string): Full name of the channel the video is uploaded on + - `channel_id` (string): Id of the channel - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `view_count` (numeric): How many users have watched the video on the platform From 8fd12a083131550476fb771c180a0734794d0b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 05:38:41 +0700 Subject: [PATCH 0048/1201] [mediaset] Improve embed support (closes #17668) --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/mediaset.py | 38 +++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 76ef01332..2a48667f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor): wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) # Look for Mediaset embeds - mediaset_urls = MediasetIE._extract_urls(webpage) + mediaset_urls = MediasetIE._extract_urls(self, webpage) if mediaset_urls: return self.playlist_from_matches( mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 57f97409d..df3748798 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -4,6 +4,11 @@ from __future__ import unicode_literals import re from .theplatform import ThePlatformBaseIE +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE): }] @staticmethod - def _extract_urls(webpage): - return [ - mobj.group('url') - for mobj in re.finditer( - r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1', - webpage)] + def _extract_urls(ie, webpage): + def _qs(url): + return compat_parse_qs(compat_urllib_parse_urlparse(url).query) + + def _program_guid(qs): + return qs.get('programGuid', [None])[0] + + entries = [] + for mobj in re.finditer( + r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1', + webpage): + embed_url = mobj.group('url') + embed_qs = _qs(embed_url) + program_guid = _program_guid(embed_qs) + if program_guid: + entries.append(embed_url) + continue + video_id = embed_qs.get('id', [None])[0] + if not video_id: + continue + urlh = ie._request_webpage( + embed_url, video_id, note='Following embed URL redirect') + embed_url = compat_str(urlh.geturl()) + program_guid = _program_guid(_qs(embed_url)) + if program_guid: + entries.append(embed_url) + return entries def _real_extract(self, url): guid = self._match_id(url) From c17e100b96e3a1c1d2115226c9a2f166c8338031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 09:27:40 +0700 Subject: [PATCH 0049/1201] [pluralsight] Fix subtitles extraction (closes #17671) --- youtube_dl/extractor/pluralsight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 1257841e4..ec67381bb 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -213,7 +213,7 @@ query viewClip { def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id): captions_post = { 'a': author, - 'cn': clip_idx, + 'cn': int(clip_idx), 'lc': lang, 'm': name, } From 3d3499742c88ce89de5b49dff4e96483b089fcaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 11:56:15 +0700 Subject: [PATCH 0050/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 800ece790..15b875297 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version <unreleased> + +Extractors +* [pluralsight] Fix subtitles extraction (#17671) +* [mediaset] Improve embed support (#17668) ++ [youtube] Add support for invidio.us (#17613) ++ [zattoo] Add support for more zattoo platform sites +* [zattoo] Fix extraction (#17175, #17542) + + version 2018.09.18 Core From 4c89a675dd50429a1a5f5903e624db0204d2435a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Sep 2018 11:58:25 +0700 Subject: [PATCH 0051/1201] release 2018.09.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 12 ++++++++++++ youtube_dl/version.py | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a4602287a..ed3e0a157 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.18 +[debug] youtube-dl version 2018.09.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 15b875297..241712037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.09.26 Extractors * [pluralsight] Fix subtitles extraction (#17671) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9b8601751..736ab6da7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **BBVTV** - **Beatport** - **Beeg** - **BehindKink** @@ -251,6 +252,7 @@ - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson - **eHow** + - **EinsUndEinsTV** - **Einthusan** - **eitb.tv** - **EllenTube** @@ -268,6 +270,7 @@ - **EsriVideo** - **Europa** - **EveryonesMixtape** + - **EWETV** - **ExpoTV** - **Expressen** - **ExtremeTube** @@ -327,6 +330,7 @@ - **Gfycat** - **GiantBomb** - **Giga** + - **GlattvisionTV** - **Glide**: Glide mobile video messages (glide.me) - **Globo** - **GloboArticle** @@ -494,6 +498,7 @@ - **Mixer:vod** - **MLB** - **Mnet** + - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **Mojvideo** @@ -525,6 +530,7 @@ - **Myvi** - **MyVidster** - **MyviEmbed** + - **MyVisionTV** - **n-tv.de** - **natgeo** - **natgeo:episodeguide** @@ -550,6 +556,7 @@ - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 + - **NetPlus** - **Netzkino** - **Newgrounds** - **NewgroundsPlaylist** @@ -626,6 +633,7 @@ - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek + - **OsnatelTV** - **PacktPub** - **PacktPubCourse** - **PandaTV**: 熊猫TV @@ -686,6 +694,7 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **QuantumTV** - **Quickline** - **QuicklineLive** - **R7** @@ -753,6 +762,7 @@ - **safari**: safaribooksonline.com online video - **safari:api** - **safari:course**: safaribooksonline.com online courses + - **SAKTV** - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au @@ -1035,12 +1045,14 @@ - **vrv** - **vrv:series** - **VShare** + - **VTXTV** - **vube**: Vube.com - **VuClip** - **VVVVID** - **VyboryMos** - **Vzaar** - **Walla** + - **WalyTV** - **washingtonpost** - **washingtonpost:article** - **wat.tv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2b3b584a4..6f2cc31df 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.18' +__version__ = '2018.09.26' From 85cd69adcb41454f30d4db85eeb4d5cc26b6bb5d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 26 Sep 2018 08:13:16 +0100 Subject: [PATCH 0052/1201] [hotstar] fix extraction(closes #14694)(closes #14931)(closes #17637) --- youtube_dl/extractor/hotstar.py | 159 +++++++++++++++----------------- 1 file changed, 73 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index d28af36ec..354ac00dc 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -1,10 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import hashlib +import hmac +import time from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, @@ -13,37 +15,40 @@ from ..utils import ( class HotStarBaseIE(InfoExtractor): - _GEO_COUNTRIES = ['IN'] + _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' - def _download_json(self, *args, **kwargs): - response = super(HotStarBaseIE, self)._download_json(*args, **kwargs) - if response['resultCode'] != 'OK': - if kwargs.get('fatal'): - raise ExtractorError( - response['errorDescription'], expected=True) - return None - return response['resultObj'] - - def _download_content_info(self, content_id): - return self._download_json( - 'https://account.hotstar.com/AVS/besc', content_id, query={ - 'action': 'GetAggregatedContentDetails', - 'appVersion': '5.0.40', - 'channel': 'PCTV', - 'contentId': content_id, - })['contentInfo'][0] + def _call_api(self, path, video_id, query_name='contentId'): + st = int(time.time()) + exp = st + 6000 + auth = 'st=%d~exp=%d~acl=/*' % (st, exp) + auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() + response = self._download_json( + 'https://api.hotstar.com/' + path, + video_id, headers={ + 'hotstarauth': auth, + 'x-country-code': 'IN', + 'x-platform-code': 'JIO', + }, query={ + query_name: video_id, + 'tas': 10000, + }) + if response['statusCode'] != 'OK': + raise ExtractorError( + response['body']['message'], expected=True) + return response['body']['results'] class HotStarIE(HotStarBaseIE): + IE_NAME = 'hotstar' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _TESTS = [{ - 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', + 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB', + 'title': 'Can You Not Spread Rumours?', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', - 'timestamp': 1447227000, + 'timestamp': 1447248600, 'upload_date': '20151111', 'duration': 381, }, @@ -58,47 +63,43 @@ class HotStarIE(HotStarBaseIE): 'url': 'http://www.hotstar.com/1000000515', 'only_matching': True, }] + _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_content_info(video_id) + webpage = self._download_webpage(url, video_id) + app_state = self._parse_json(self._search_regex( + r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', + webpage, 'app state'), video_id) + video_data = list(app_state.values())[0]['initialState']['contentData']['content'] - title = video_data['episodeTitle'] + title = video_data['title'] - if video_data.get('encrypted') == 'Y': + if video_data.get('drmProtected'): raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - for f in ('JIO',): - format_data = self._download_json( - 'http://getcdn.hotstar.com/AVS/besc', - video_id, 'Downloading %s JSON metadata' % f, - fatal=False, query={ - 'action': 'GetCDN', - 'asJson': 'Y', - 'channel': f, - 'id': video_id, - 'type': 'VOD', - }) - if format_data: - format_url = format_data.get('src') - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - # produce broken files - continue - else: - formats.append({ - 'url': format_url, - 'width': int_or_none(format_data.get('width')), - 'height': int_or_none(format_data.get('height')), - }) + format_data = self._call_api('h/v1/play', video_id)['item'] + format_url = format_data['playbackUrl'] + ext = determine_ext(format_url) + if ext == 'm3u8': + try: + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted(countries=['IN']) + raise + elif ext == 'f4m': + # produce broken files + pass + else: + formats.append({ + 'url': format_url, + 'width': int_or_none(format_data.get('width')), + 'height': int_or_none(format_data.get('height')), + }) self._sort_formats(formats) return { @@ -106,57 +107,43 @@ class HotStarIE(HotStarBaseIE): 'title': title, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(video_data.get('broadcastDate')), + 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 'formats': formats, + 'channel': video_data.get('channelName'), + 'channel_id': video_data.get('channelId'), + 'series': video_data.get('showName'), + 'season': video_data.get('seasonName'), + 'season_number': int_or_none(video_data.get('seasonNo')), + 'season_id': video_data.get('seasonId'), 'episode': title, - 'episode_number': int_or_none(video_data.get('episodeNumber')), - 'series': video_data.get('contentTitle'), + 'episode_number': int_or_none(video_data.get('episodeNo')), } class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' _TESTS = [{ - 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993', + 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'info_dict': { - 'id': '14812', + 'id': '3_2_26', }, - 'playlist_mincount': 75, + 'playlist_mincount': 20, }, { - 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998', + 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'only_matching': True, }] - _ITEM_TYPES = { - 'episodes': 'EPISODE', - 'popular-clips': 'CLIPS', - } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - base_url = mobj.group('url') - content_id = mobj.group('content_id') - playlist_type = mobj.group('type') + playlist_id = self._match_id(url) - content_info = self._download_content_info(content_id) - playlist_id = compat_str(content_info['categoryId']) - - collection = self._download_json( - 'https://search.hotstar.com/AVS/besc', playlist_id, query={ - 'action': 'SearchContents', - 'appVersion': '5.0.40', - 'channel': 'PCTV', - 'moreFilters': 'series:%s;' % playlist_id, - 'query': '*', - 'searchOrder': 'last_broadcast_date desc,year desc,title asc', - 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'), - }) + collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId') entries = [ self.url_result( - '%s/_/%s' % (base_url, video['contentId']), + 'https://www.hotstar.com/%s' % video['contentId'], ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in collection['response']['docs'] + for video in collection['assets']['items'] if video.get('contentId')] return self.playlist_result(entries, playlist_id) From 245cbb33bcb7d519c897277e65acdcbc45335233 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 28 Sep 2018 15:13:25 +0100 Subject: [PATCH 0053/1201] [spike] fix Paramount Network extraction(closes #17677) --- youtube_dl/extractor/spike.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index e76522b45..6090e0066 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -44,3 +44,10 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] + + def _extract_mgid(self, webpage): + cs = self._parse_json(self._search_regex( + r'window\.__DATA__\s*=\s*({.+})', + webpage, 'data'), None)['children'] + c = next(c for c in cs if c.get('type') == 'VideoPlayer') + return c['props']['media']['video']['config']['uri'] From 85fa80d5f9d867bab706b92d15a6c9ad5b862b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 21:13:43 +0700 Subject: [PATCH 0054/1201] [vimeo] Add another config regex (closes #17690) --- youtube_dl/extractor/vimeo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 0a9239b62..88f4d9979 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -551,6 +551,7 @@ class VimeoIE(VimeoBaseInfoExtractor): else: config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;') + config_re.append(r'\bconfig\s*=\s*({.+?})\s*;') config = self._search_regex(config_re, webpage, 'info section', flags=re.DOTALL) config = json.loads(config) From 365343131d752bece96d2279a3e0bcd7e9f0000f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 21:45:24 +0700 Subject: [PATCH 0055/1201] [pluralsight] Fix subtitles extraction (closes #17726, closes #17728) --- youtube_dl/extractor/pluralsight.py | 34 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index ec67381bb..daf172570 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -210,18 +210,26 @@ query viewClip { raise ExtractorError('Unable to log in') - def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id): - captions_post = { - 'a': author, - 'cn': int(clip_idx), - 'lc': lang, - 'm': name, - } - captions = self._download_json( - '%s/player/retrieve-captions' % self._API_BASE, video_id, - 'Downloading captions JSON', 'Unable to download captions JSON', - fatal=False, data=json.dumps(captions_post).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=utf-8'}) + def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): + captions = None + if clip_id: + captions = self._download_json( + '%s/transcript/api/v1/caption/json/%s/%s' + % (self._API_BASE, clip_id, lang), video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False) + if not captions: + captions_post = { + 'a': author, + 'cn': int(clip_idx), + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/player/retrieve-captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) if captions: return { lang: [{ @@ -413,7 +421,7 @@ query viewClip { # TODO: other languages? subtitles = self.extract_subtitles( - author, clip_idx, 'en', name, duration, display_id) + author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) return { 'id': clip_id, From 9795d93316cb63e0e2a73ee840600be7f13e19f5 Mon Sep 17 00:00:00 2001 From: Enes <enessolak99@gmail.com> Date: Mon, 1 Oct 2018 17:48:59 +0300 Subject: [PATCH 0056/1201] [openload] Add support for oload.cloud (closes #17710) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d264fe206..dc01b6346 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -307,6 +307,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.download/f/kUEfGclsU9o', 'only_matching': True, + }, { + 'url': 'https://oload.cloud/f/4ZDnBXRWiB8', + 'only_matching': True, }, { # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', From 3c7da54c92b7560c3ceeb8b58a67d4759ed843d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 22:05:18 +0700 Subject: [PATCH 0057/1201] [jamendo] Add support for licensing.jamendo.com (closes #17724) --- youtube_dl/extractor/jamendo.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 595d7a5b7..c21827618 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -26,8 +26,15 @@ class JamendoBaseIE(InfoExtractor): class JamendoIE(JamendoBaseIE): - _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + licensing\.jamendo\.com/[^/]+| + (?:www\.)?jamendo\.com + ) + /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) + ''' + _TESTS = [{ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', 'md5': '6e9e82ed6db98678f171c25a8ed09ffd', 'info_dict': { @@ -40,14 +47,19 @@ class JamendoIE(JamendoBaseIE): 'duration': 210, 'thumbnail': r're:^https?://.*\.jpg' } - } + }, { + 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', + 'only_matching': True, + }] def _real_extract(self, url): mobj = self._VALID_URL_RE.match(url) track_id = mobj.group('id') display_id = mobj.group('display_id') - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage( + 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), + display_id) title, artist, track = self._extract_meta(webpage) From 66d106f2705d565675b6a94fb0e7e0bdfe132065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Oct 2018 23:29:24 +0700 Subject: [PATCH 0058/1201] [philharmoniedeparis] Fix extraction and add support for pad.philharmoniedeparis.fr (closes #17705) --- youtube_dl/extractor/philharmoniedeparis.py | 118 ++++++++++++-------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f1008ae51..f723a2b3b 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -2,31 +2,38 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - float_or_none, - int_or_none, - parse_iso8601, - xpath_text, + try_get, + urljoin, ) class PhilharmonieDeParisIE(InfoExtractor): IE_DESC = 'Philharmonie de Paris' - _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + pad\.philharmoniedeparis\.fr/doc/CIMU/ + ) + (?P<id>\d+) + ''' _TESTS = [{ + 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower', + 'md5': 'a0a4b195f544645073631cbec166a2c2', + 'info_dict': { + 'id': '1086697', + 'ext': 'mp4', + 'title': 'Jazz à la Villette : Knower', + }, + }, { 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'info_dict': { 'id': '1032066', - 'ext': 'flv', - 'title': 'md5:d1f5585d87d041d07ce9434804bc8425', - 'timestamp': 1428179400, - 'upload_date': '20150404', - 'duration': 6592.278, + 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0', }, - 'params': { - # rtmp download - 'skip_download': True, - } + 'playlist_mincount': 2, }, { 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', 'only_matching': True, @@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor): 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, }] + _LIVE_URL = 'https://live.philharmoniedeparis.fr' def _real_extract(self, url): video_id = self._match_id(url) - concert = self._download_xml( - 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, - video_id).find('./concert') + config = self._download_json( + '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={ + 'id': video_id, + 'lang': 'fr-FR', + }) - formats = [] - info_dict = { - 'id': video_id, - 'title': xpath_text(concert, './titre', 'title', fatal=True), - 'formats': formats, - } - - fichiers = concert.find('./fichiers') - stream = fichiers.attrib['serveurstream'] - for fichier in fichiers.findall('./fichier'): - info_dict['duration'] = float_or_none(fichier.get('timecodefin')) - for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): - format_url = fichier.get('url%s' % suffix) - if not format_url: + def extract_entry(source): + if not isinstance(source, dict): + return + title = source.get('title') + if not title: + return + files = source.get('files') + if not isinstance(files, dict): + return + format_urls = set() + formats = [] + for format_id in ('mobile', 'desktop'): + format_url = try_get( + files, lambda x: x[format_id]['file'], compat_str) + if not format_url or format_url in format_urls: continue - formats.append({ - 'url': stream, - 'play_path': format_url, - 'ext': 'flv', - 'format_id': format_id, - 'width': int_or_none(concert.get('largeur%s' % suffix)), - 'height': int_or_none(concert.get('hauteur%s' % suffix)), - 'quality': quality, - }) - self._sort_formats(formats) + format_urls.add(format_url) + m3u8_url = urljoin(self._LIVE_URL, format_url) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + if not formats: + return + self._sort_formats(formats) + return { + 'title': title, + 'formats': formats, + } - date, hour = concert.get('date'), concert.get('heure') - if date and hour: - info_dict['timestamp'] = parse_iso8601( - '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) - elif date: - info_dict['upload_date'] = date + thumbnail = urljoin(self._LIVE_URL, config.get('image')) - return info_dict + info = extract_entry(config) + if info: + info.update({ + 'id': video_id, + 'thumbnail': thumbnail, + }) + return info + + entries = [] + for num, chapter in enumerate(config['chapters'], start=1): + entry = extract_entry(chapter) + entry['id'] = '%s-%d' % (video_id, num) + entries.append(entry) + + return self.playlist_result(entries, video_id, config.get('title')) From 05e7c184da85f83b254bc3d138f89b11da802bdb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 2 Oct 2018 06:07:06 +0100 Subject: [PATCH 0059/1201] [hotstar] fix extraction in python 2(closes #17696) --- youtube_dl/extractor/hotstar.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 354ac00dc..bf5717f1b 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -11,6 +11,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + try_get, ) @@ -72,7 +73,11 @@ class HotStarIE(HotStarBaseIE): app_state = self._parse_json(self._search_regex( r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', webpage, 'app state'), video_id) - video_data = list(app_state.values())[0]['initialState']['contentData']['content'] + video_data = {} + for v in app_state.values(): + content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict) + if content and content.get('contentId') == video_id: + video_data = content title = video_data['title'] From d98cb62e552ee74079fda2e4173a40b14faac3fe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 2 Oct 2018 19:43:06 +0100 Subject: [PATCH 0060/1201] [crunchyroll] switch to HTTPS for RpcApi(closes #17749) --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index af786d096..045be0ab5 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -45,7 +45,7 @@ class CrunchyrollBaseIE(InfoExtractor): data['req'] = 'RpcApi' + method data = compat_urllib_parse_urlencode(data).encode('utf-8') return self._download_xml( - 'http://www.crunchyroll.com/xml/', + 'https://www.crunchyroll.com/xml/', video_id, note, fatal=False, data=data, headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) From f60b9803a473da8e324313d01af91e5676792c77 Mon Sep 17 00:00:00 2001 From: Enes <enessolak99@gmail.com> Date: Sat, 29 Sep 2018 13:28:56 +0300 Subject: [PATCH 0061/1201] [dailymotion] Fix extraction (closes #17699) --- youtube_dl/extractor/dailymotion.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 040f0bd02..842d9a259 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -24,6 +24,7 @@ from ..utils import ( str_to_int, unescapeHTML, urlencode_postdata, + try_get, ) @@ -172,7 +173,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor): webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) - metadata = player['metadata'] + metadata = try_get( + player, lambda x: x['metadata'], dict) or self._download_json( + 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) if metadata.get('error', {}).get('type') == 'password_protected': password = self._downloader.params.get('videopassword') From 0082f44a08e33712fcd33ceabab15215c962eaac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:02:58 +0700 Subject: [PATCH 0062/1201] [dailymotion] Improve metadata extraction (closes #17706) --- youtube_dl/extractor/dailymotion.py | 32 ++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 842d9a259..1816c559e 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -22,9 +22,11 @@ from ..utils import ( parse_iso8601, sanitized_Request, str_to_int, - unescapeHTML, - urlencode_postdata, try_get, + unescapeHTML, + update_url_query, + url_or_none, + urlencode_postdata, ) @@ -172,15 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: - player = self._parse_json(player_v5, video_id) - metadata = try_get( - player, lambda x: x['metadata'], dict) or self._download_json( - 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ - 'integration': 'inline', - 'GK_PV5_NEON': '1', - }) + player = self._parse_json(player_v5, video_id, fatal=False) or {} + metadata = try_get(player, lambda x: x['metadata'], dict) + if not metadata: + metadata_url = url_or_none(try_get( + player, lambda x: x['context']['metadata_template_url1'])) + if metadata_url: + metadata_url = metadata_url.replace(':videoId', video_id) + else: + metadata_url = update_url_query( + 'https://www.dailymotion.com/player/metadata/video/%s' + % video_id, { + 'embedder': url, + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) + metadata = self._download_json( + metadata_url, video_id, 'Downloading metadata JSON') - if metadata.get('error', {}).get('type') == 'password_protected': + if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': password = self._downloader.params.get('videopassword') if password: r = int(metadata['id'][1:], 36) From 21c1a00dd7dbb9f7551ca9809a194f6380dee7a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:27:14 +0700 Subject: [PATCH 0063/1201] [pluralsight] Improve authentication (closes #17762) --- youtube_dl/extractor/pluralsight.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index daf172570..eafe56897 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -4,6 +4,7 @@ import collections import json import os import random +import re from .common import InfoExtractor from ..compat import ( @@ -196,7 +197,10 @@ query viewClip { if error: raise ExtractorError('Unable to login: %s' % error, expected=True) - if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): BLOCKED = 'Your account has been blocked due to suspicious activity' if BLOCKED in response: raise ExtractorError( From 2e7ed29e3429c20e735f3f7dfceb1b13cf757037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:29:52 +0700 Subject: [PATCH 0064/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index 241712037..e2757f891 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version <unreleased> + +Extractors +* [pluralsight] Improve authentication (#17762) +* [dailymotion] Fix extraction (#17699) +* [crunchyroll] Switch to HTTPS for RpcApi (#17749) ++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705) +* [philharmoniedeparis] Fix extraction (#17705) ++ [jamendo] Add support for licensing.jamendo.com (#17724) ++ [openload] Add support for oload.cloud (#17710) +* [pluralsight] Fix subtitles extraction (#17726, #17728) ++ [vimeo] Add another config regular expression (#17690) +* [spike] Fix Paramount Network extraction (#17677) +* [hotstar] Fix extraction (#14694, #14931, #17637) + + version 2018.09.26 Extractors From d96f976b0c36f65894380d3d831b0520d6260c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 5 Oct 2018 02:31:30 +0700 Subject: [PATCH 0065/1201] release 2018.10.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ed3e0a157..058eb4321 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.26 +[debug] youtube-dl version 2018.10.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e2757f891..86cf489b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.10.05 Extractors * [pluralsight] Improve authentication (#17762) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 736ab6da7..f167a6ddc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -360,7 +360,7 @@ - **HitRecord** - **HornBunny** - **HotNewHipHop** - - **HotStar** + - **hotstar** - **hotstar:playlist** - **Howcast** - **HowStuffWorks** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6f2cc31df..7d3f25019 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.26' +__version__ = '2018.10.05' From c9d891f19a923f53132b49a1f5b97f344d92503c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 5 Oct 2018 20:11:01 +0100 Subject: [PATCH 0066/1201] [patreon] fix extraction(closes #14502)(closes #10471) --- youtube_dl/extractor/patreon.py | 160 ++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 72 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 9eb027679..6f73ed68d 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -2,52 +2,63 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + parse_iso8601, +) class PatreonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' - _TESTS = [ - { - 'url': 'http://www.patreon.com/creation?hid=743933', - 'md5': 'e25505eec1053a6e6813b8ed369875cc', - 'info_dict': { - 'id': '743933', - 'ext': 'mp3', - 'title': 'Episode 166: David Smalley of Dogma Debate', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.patreon.com/creation?hid=743933', + 'md5': 'e25505eec1053a6e6813b8ed369875cc', + 'info_dict': { + 'id': '743933', + 'ext': 'mp3', + 'title': 'Episode 166: David Smalley of Dogma Debate', + 'description': 'md5:713b08b772cd6271b9f3906683cfacdf', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', + 'timestamp': 1406473987, + 'upload_date': '20140727', }, - { - 'url': 'http://www.patreon.com/creation?hid=754133', - 'md5': '3eb09345bf44bf60451b8b0b81759d0a', - 'info_dict': { - 'id': '754133', - 'ext': 'mp3', - 'title': 'CD 167 Extra', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + }, { + 'url': 'http://www.patreon.com/creation?hid=754133', + 'md5': '3eb09345bf44bf60451b8b0b81759d0a', + 'info_dict': { + 'id': '754133', + 'ext': 'mp3', + 'title': 'CD 167 Extra', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', }, - { - 'url': 'https://www.patreon.com/creation?hid=1682498', - 'info_dict': { - 'id': 'SU4fj_aEMVw', - 'ext': 'mp4', - 'title': 'I\'m on Patreon!', - 'uploader': 'TraciJHines', - 'thumbnail': 're:^https?://.*$', - 'upload_date': '20150211', - 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', - 'uploader_id': 'TraciJHines', - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - } + 'skip': 'Patron-only content', + }, { + 'url': 'https://www.patreon.com/creation?hid=1682498', + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'uploader': 'TraciJHines', + 'thumbnail': 're:^https?://.*$', + 'upload_date': '20150211', + 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', + 'uploader_id': 'TraciJHines', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, } - ] + }, { + 'url': 'https://www.patreon.com/posts/episode-166-of-743933', + 'only_matching': True, + }, { + 'url': 'https://www.patreon.com/posts/743933', + 'only_matching': True, + }] # Currently Patreon exposes download URL via hidden CSS, so login is not # needed. Keeping this commented for when this inevitably changes. @@ -78,38 +89,43 @@ class PatreonIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage).strip() - - attach_fn = self._html_search_regex( - r'<div class="attach"><a target="_blank" href="([^"]+)">', - webpage, 'attachment URL', default=None) - embed = self._html_search_regex( - r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"', - webpage, 'embedded URL', default=None) - - if attach_fn is not None: - video_url = 'http://www.patreon.com' + attach_fn - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._html_search_regex( - r'<strong>(.*?)</strong> is creating', webpage, 'uploader') - elif embed is not None: - return self.url_result(embed) - else: - playlist = self._parse_json(self._search_regex( - r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', - webpage, 'playlist JSON'), - video_id, transform_source=js_to_json) - data = playlist[0] - video_url = self._proto_relative_url(data['mp3']) - thumbnail = self._proto_relative_url(data.get('cover')) - uploader = data.get('artist') - - return { + post = self._download_json( + 'https://www.patreon.com/api/posts/' + video_id, video_id) + attributes = post['data']['attributes'] + title = attributes['title'].strip() + image = attributes.get('image') or {} + info = { 'id': video_id, - 'url': video_url, - 'ext': 'mp3', 'title': title, - 'uploader': uploader, - 'thumbnail': thumbnail, + 'description': clean_html(attributes.get('content')), + 'thumbnail': image.get('large_url') or image.get('url'), + 'timestamp': parse_iso8601(attributes.get('published_at')), + 'like_count': int_or_none(attributes.get('like_count')), + 'comment_count': int_or_none(attributes.get('comment_count')), } + + for i in post.get('included', []): + i_type = i.get('type') + if i_type == 'attachment': + attachment_attributes = i.get('attributes') or {} + attachment_url = attachment_attributes.get('url') + if attachment_url: + info.update({ + 'url': attachment_url, + 'ext': determine_ext(attachment_attributes.get('name'), 'mp3'), + }) + elif i_type == 'user': + user_attributes = i.get('attributes') + if user_attributes: + info.update({ + 'uploader': user_attributes.get('full_name'), + 'uploader_url': user_attributes.get('url'), + }) + + if not info.get('url'): + info.update({ + '_type': 'url', + 'url': attributes['embed']['url'], + }) + + return info From 19a352854f5143b7cd120e990433d0fd40f617b0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 5 Oct 2018 22:45:04 +0100 Subject: [PATCH 0067/1201] [patreon] extract post_file url(#17792) --- youtube_dl/extractor/patreon.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 6f73ed68d..426dd8121 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -104,16 +104,18 @@ class PatreonIE(InfoExtractor): 'comment_count': int_or_none(attributes.get('comment_count')), } + def add_file(file_data): + file_url = file_data.get('url') + if file_url: + info.update({ + 'url': file_url, + 'ext': determine_ext(file_data.get('name'), 'mp3'), + }) + for i in post.get('included', []): i_type = i.get('type') if i_type == 'attachment': - attachment_attributes = i.get('attributes') or {} - attachment_url = attachment_attributes.get('url') - if attachment_url: - info.update({ - 'url': attachment_url, - 'ext': determine_ext(attachment_attributes.get('name'), 'mp3'), - }) + add_file(i.get('attributes') or {}) elif i_type == 'user': user_attributes = i.get('attributes') if user_attributes: @@ -122,6 +124,9 @@ class PatreonIE(InfoExtractor): 'uploader_url': user_attributes.get('url'), }) + if not info.get('url'): + add_file(attributes.get('post_file') or {}) + if not info.get('url'): info.update({ '_type': 'url', From 5d90a8a5f3fef73bb4ccbecd8c61583522b88d79 Mon Sep 17 00:00:00 2001 From: yonaikerlol <39972049+yonaikerlol@users.noreply.github.com> Date: Sun, 7 Oct 2018 09:05:45 -0400 Subject: [PATCH 0068/1201] [openload] Add support for oload.cc --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index dc01b6346..c652603a5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -314,6 +314,9 @@ class OpenloadIE(InfoExtractor): # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', 'only_matching': True, + }, { + 'url': 'https://oload.cc/embed/5NEAbI2BDSk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From a94e7c195e261137461b546c6446033b371dfbbe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 11:51:40 +0100 Subject: [PATCH 0069/1201] [ted] fix extraction for http and rtmp formats(closes #5941)(closes #17572)(closes #17894) --- youtube_dl/extractor/ted.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 212ac80ab..f9b6aa48f 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -212,8 +212,6 @@ class TEDIE(InfoExtractor): http_url = None for format_id, resources in resources_.items(): - if not isinstance(resources, dict): - continue if format_id == 'h264': for resource in resources: h264_url = resource.get('file') @@ -242,6 +240,8 @@ class TEDIE(InfoExtractor): 'tbr': int_or_none(resource.get('bitrate')), }) elif format_id == 'hls': + if not isinstance(resources, dict): + continue stream_url = url_or_none(resources.get('stream')) if not stream_url: continue From f0ee386851bb0d53801a27dafbe4e8fee5b43d88 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 16:26:29 +0100 Subject: [PATCH 0070/1201] [tv3] remove extractor(closes #10461)(closes #15339) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/tv3.py | 34 ------------------------------ 2 files changed, 35 deletions(-) delete mode 100644 youtube_dl/extractor/tv3.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 464c8d690..17b576df3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1153,7 +1153,6 @@ from .tv2 import ( TV2ArticleIE, ) from .tv2hu import TV2HuIE -from .tv3 import TV3IE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tva import TVAIE diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py deleted file mode 100644 index 3867ec90d..000000000 --- a/youtube_dl/extractor/tv3.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TV3IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx' - _TEST = { - 'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx', - 'info_dict': { - 'id': '4659127992001', - 'ext': 'mp4', - 'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3', - 'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.', - 'uploader_id': '3812193411001', - 'upload_date': '20151213', - 'timestamp': 1449975272, - }, - 'expected_warnings': [ - 'Failed to download MPD manifest' - ], - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id') - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From ee5fe42e442f04e1c9a388a46e0b4552be4a56d7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 17:54:38 +0100 Subject: [PATCH 0071/1201] [brightcove:legacy] fall back to brightcove:new(#13912) --- youtube_dl/extractor/brightcove.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 14f9a14ed..5dbd71e12 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -356,7 +356,9 @@ class BrightcoveLegacyIE(InfoExtractor): def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) + publisher_id = video_info.get('publisherId') + info = { 'id': video_id, 'title': video_info['displayName'].strip(), @@ -444,8 +446,16 @@ class BrightcoveLegacyIE(InfoExtractor): else: return ad_info - if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % video_id) + if not info.get('url') and not info.get('formats'): + uploader_id = info.get('uploader_id') + if uploader_id: + info.update({ + '_type': 'url', + 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (uploader_id, video_id), + 'ie_key': BrightcoveNewIE.ie_key(), + }) + else: + raise ExtractorError('Unable to extract video url for %s' % video_id) return info From 160c2773f63c72686635533bc2553634b22e7e2e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 18:41:57 +0100 Subject: [PATCH 0072/1201] [brightcove:legacy] add another fall back to brightcove:new --- youtube_dl/extractor/brightcove.py | 39 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5dbd71e12..40c3959fd 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,8 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import base64 import json +import re +import struct from .common import InfoExtractor from .adobepass import AdobePassIE @@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor): 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', expected=True) + def _brightcove_new_url_result(self, publisher_id, video_id): + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + def _get_video_info(self, video_id, query, referer=None): headers = {} linkBase = query.get('linkBaseURL') @@ -323,6 +329,29 @@ class BrightcoveLegacyIE(InfoExtractor): r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, 'error message', default=None) if error_msg is not None: + publisher_id = query.get('publisherId') + if publisher_id and publisher_id[0].isdigit(): + publisher_id = publisher_id[0] + if not publisher_id: + valid_key = lambda key: key and ',' in key + player_key = query.get('playerKey') + if player_key and ',' in player_key[0]: + player_key = player_key[0] + else: + player_id = query.get('playerID') + if player_id and player_id[0].isdigit(): + player_page = self._download_webpage( + 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + video_id, headers=headers, fatal=False) + if player_page: + player_key = self._search_regex( + r'<param\s+name="playerKey"\s+value="([\w~,-]+)"', + player_page, 'player key', fatal=False) + if player_key: + enc_pub_id = player_key.split(',')[1].replace('~', '=') + publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] + if publisher_id: + return self._brightcove_new_url_result(publisher_id, video_id) raise ExtractorError( 'brightcove said: %s' % error_msg, expected=True) @@ -356,9 +385,7 @@ class BrightcoveLegacyIE(InfoExtractor): def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) - publisher_id = video_info.get('publisherId') - info = { 'id': video_id, 'title': video_info['displayName'].strip(), @@ -449,11 +476,7 @@ class BrightcoveLegacyIE(InfoExtractor): if not info.get('url') and not info.get('formats'): uploader_id = info.get('uploader_id') if uploader_id: - info.update({ - '_type': 'url', - 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (uploader_id, video_id), - 'ie_key': BrightcoveNewIE.ie_key(), - }) + info.update(self._brightcove_new_url_result(uploader_id, video_id)) else: raise ExtractorError('Unable to extract video url for %s' % video_id) return info From 582797d780b6f4857a5b8b6ca8c63915242c0ab9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 15 Oct 2018 20:47:12 +0100 Subject: [PATCH 0073/1201] [brightcove] remove unused variable --- youtube_dl/extractor/brightcove.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 40c3959fd..465ae396e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -333,7 +333,6 @@ class BrightcoveLegacyIE(InfoExtractor): if publisher_id and publisher_id[0].isdigit(): publisher_id = publisher_id[0] if not publisher_id: - valid_key = lambda key: key and ',' in key player_key = query.get('playerKey') if player_key and ',' in player_key[0]: player_key = player_key[0] From baeabf77428ad1a6bd5a910e7be07100fcb1eadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 16 Oct 2018 23:19:44 +0700 Subject: [PATCH 0074/1201] [rutube] Use geo verification headers (closes #17897) --- youtube_dl/extractor/rutube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 261bcbb83..10ac8ed1f 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE): options = self._download_json( 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON') + video_id, 'Downloading options JSON', + headers=self.geo_verification_headers()) formats = [] for format_id, format_url in options['video_balancer'].items(): From b99b0bcfa079a15a988cf931a3ce44bb480dfbdb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 17 Oct 2018 06:22:07 +0100 Subject: [PATCH 0075/1201] [cwtv] handle api errors(closes #17905) --- youtube_dl/extractor/cwtv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 224a1fb5d..f9bd535f6 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_iso8601, @@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( + data = self._download_json( 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id, - video_id)['video'] + video_id) + if data.get('result') != 'ok': + raise ExtractorError(data['msg'], expected=True) + video_data = data['video'] title = video_data['title'] mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id From 7d9e858132cd28b975d2174f5836d3de03f741d1 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 26 Oct 2018 05:40:49 +0100 Subject: [PATCH 0076/1201] [viewster] reduce format requests --- youtube_dl/extractor/viewster.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index d5d5b4c69..6e318479c 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor): def concat(suffix, sep='-'): return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix - for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): - media = self._download_json( - 'https://public-api.viewster.com/movies/%s/video' % entry_id, - video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={ - 'mediaType': media_type, - 'language': audio, - 'subtitle': subtitle, - }) - if not media: - continue + medias = self._download_json( + 'https://public-api.viewster.com/movies/%s/videos' % entry_id, + video_id, fatal=False, query={ + 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'], + 'language': audio, + 'subtitle': subtitle, + }) + if not medias: + continue + for media in medias: video_url = media.get('Uri') if not video_url: continue From 5e733b066a5fca7fe91ad5800a3e83f0a49c8fbd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 26 Oct 2018 05:41:57 +0100 Subject: [PATCH 0077/1201] [dailymail] fix format extraction(closes #17976) --- youtube_dl/extractor/dailymail.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py index af3978035..4f75a2a30 100644 --- a/youtube_dl/extractor/dailymail.py +++ b/youtube_dl/extractor/dailymail.py @@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor): 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) video_sources = self._download_json(sources_url, video_id) + body = video_sources.get('body') + if body: + video_sources = body formats = [] for rendition in video_sources['renditions']: From 08c7d3dadec053ff5535ab2dc91f550ef4788297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Oct 2018 22:12:54 +0700 Subject: [PATCH 0078/1201] [crunchyroll] Improve extraction failsafeness (closes #17991) --- youtube_dl/extractor/crunchyroll.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 045be0ab5..4a68d092b 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re import json +import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -398,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if sub_doc is None: + if not isinstance(sub_doc, etree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -515,7 +516,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if streamdata is not None: + if isinstance(streamdata, etree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -526,7 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if stream_info is not None: + if isinstance(stream_info, etree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -598,10 +599,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text series = self._html_search_regex( r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', webpage, 'series', fatal=False) - season = xpath_text(metadata, 'series_title') - episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title') - episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number')) + season = episode = episode_number = duration = thumbnail = None + + if isinstance(metadata, etree.Element): + season = xpath_text(metadata, 'series_title') + episode = xpath_text(metadata, 'episode_title') + episode_number = int_or_none(xpath_text(metadata, 'episode_number')) + duration = float_or_none(media_metadata.get('duration'), 1000) + thumbnail = xpath_text(metadata, 'episode_image_url') + + if not episode: + episode = media_metadata.get('title') + if not episode_number: + episode_number = int_or_none(media_metadata.get('episode_number')) + if not thumbnail: + thumbnail = media_metadata.get('thumbnail', {}).get('url') season_number = int_or_none(self._search_regex( r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', @@ -611,8 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'id': video_id, 'title': video_title, 'description': video_description, - 'duration': float_or_none(media_metadata.get('duration'), 1000), - 'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), + 'duration': duration, + 'thumbnail': thumbnail, 'uploader': video_uploader, 'upload_date': video_upload_date, 'series': series, From 022218f2f0dce112c0e2b15923c3a368bdfe4d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Oct 2018 22:49:10 +0700 Subject: [PATCH 0079/1201] [ivi] Add support for ivi.tv --- youtube_dl/extractor/ivi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index cb51cef2d..86c014b07 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -15,7 +15,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' _GEO_BYPASS = False _GEO_COUNTRIES = ['RU'] @@ -65,7 +65,11 @@ class IviIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', - } + }, + { + 'url': 'https://www.ivi.tv/watch/33560/', + 'only_matching': True, + }, ] # Sorted by quality From c901cc38e50a47c9659db534e637ac4f6a54c450 Mon Sep 17 00:00:00 2001 From: yonaikerlol <lawlietrs7@gmail.com> Date: Sun, 28 Oct 2018 11:51:29 -0400 Subject: [PATCH 0080/1201] [openload] Add support for oload.icu --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c652603a5..a91f29f5c 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -317,6 +317,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.cc/embed/5NEAbI2BDSk', 'only_matching': True, + }, { + 'url': 'https://oload.icu/f/-_i4y_F_Hs8', + 'only_matching': True }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 1fafb329849e3f07e6a6e4141bcd4547e141745c Mon Sep 17 00:00:00 2001 From: sichuan-pepper <huajiao.sichuan.pepper@gmail.com> Date: Sun, 28 Oct 2018 01:46:32 +0900 Subject: [PATCH 0081/1201] [screencast] Fix extraction (closes #14590) --- youtube_dl/extractor/screencast.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 62a6a8337..c6554c905 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -90,6 +90,14 @@ class ScreencastIE(InfoExtractor): r'src=(.*?)(?:$|&)', video_meta, 'meta tag video URL', default=None) + if video_url is None: + video_url = self._html_search_regex( + r'"MediaContentUrl":"([^"]+)"', webpage, 'media content url', default=None) + + if video_url is None: + video_url = self._html_search_meta( + 'og:video', webpage, default=None) + if video_url is None: raise ExtractorError('Cannot find video') From a1d1c63678dcb075a8e741947c41abfee6c790a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Oct 2018 23:23:32 +0700 Subject: [PATCH 0082/1201] [screencast] Improve extraction (closes #14617, closes #17990) --- youtube_dl/extractor/screencast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c6554c905..69a0d01f3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -92,7 +92,8 @@ class ScreencastIE(InfoExtractor): if video_url is None: video_url = self._html_search_regex( - r'"MediaContentUrl":"([^"]+)"', webpage, 'media content url', default=None) + r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', default=None, group='url') if video_url is None: video_url = self._html_search_meta( From 4c237ab78768972e4d61d0b97fe9078d95dc4433 Mon Sep 17 00:00:00 2001 From: Alexey Trofimov <dmzkrsk@gmail.com> Date: Fri, 26 Oct 2018 15:00:55 +0700 Subject: [PATCH 0083/1201] [sportbox] Fix extraction --- youtube_dl/extractor/sportbox.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 54497c880..9413cf27a 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -18,7 +18,7 @@ class SportBoxEmbedIE(InfoExtractor): 'info_dict': { 'id': '211355', 'ext': 'mp4', - 'title': '211355', + 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 292, 'view_count': int, @@ -48,9 +48,18 @@ class SportBoxEmbedIE(InfoExtractor): wjplayer_data = self._parse_json( self._search_regex( - r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), + r'(?s)var\s+playerOptions\s*=\s*({.+?});', webpage, 'wjplayer settings'), video_id, transform_source=js_to_json) + wjplayer_data['sources'] = self._parse_json( + self._search_regex( + r'(?s)playerOptions\.sources\s*=\s*(\[.+?\]);', webpage, 'wjplayer sources'), + video_id, transform_source=js_to_json) + + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage) or self._html_search_regex( + r'<title>(.+?)', webpage, 'title', fatal=False) or video_id + formats = [] for source in wjplayer_data['sources']: src = source.get('src') @@ -71,7 +80,7 @@ class SportBoxEmbedIE(InfoExtractor): return { 'id': video_id, - 'title': video_id, + 'title': title, 'thumbnail': wjplayer_data.get('poster'), 'duration': int_or_none(wjplayer_data.get('duration')), 'view_count': view_count, From bebef109092ba2ad1b08619661aa1b51e65be1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:19:08 +0700 Subject: [PATCH 0084/1201] [extractor/common] Add validation for JSON-LD URLs --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2dbf81e6e..8452125c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,6 +69,7 @@ from ..utils import ( update_url_query, urljoin, url_basename, + url_or_none, xpath_element, xpath_text, xpath_with_ns, @@ -1213,10 +1214,10 @@ class InfoExtractor(object): def extract_video_object(e): assert e['@type'] == 'VideoObject' info.update({ - 'url': e.get('contentUrl'), + 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), + 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), 'filesize': float_or_none(e.get('contentSize')), From 476cf548e1c6aa83686150db7abf625c6237a67f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:20:29 +0700 Subject: [PATCH 0085/1201] [sportbox] Improve extraction, add support for matchtv.ru and fix video id (closes #17978) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/generic.py | 6 ++-- youtube_dl/extractor/sportbox.py | 55 ++++++++++++++++++------------ 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17b576df3..f013d13c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1043,7 +1043,7 @@ from .spike import ( ) from .stitcher import StitcherIE from .sport5 import Sport5IE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a48667f0..545e03371 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .tvc import TVCIE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE @@ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor): return self.url_result(tvc_url, 'TVC') # Look for embedded SportBox player - sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) + sportbox_urls = SportBoxIE._extract_urls(webpage) if sportbox_urls: - return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') + return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 9413cf27a..b9017fd2a 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -8,20 +8,24 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, + merge_dicts, ) -class SportBoxEmbedIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' +class SportBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' _TESTS = [{ 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 'info_dict': { - 'id': '211355', + 'id': '109158', 'ext': 'mp4', 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 292, 'view_count': int, + 'timestamp': 1426237001, + 'upload_date': '20150313', }, 'params': { # m3u8 download @@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor): }, { 'url': 'https://news.sportbox.ru/vdl/player/media/193095', 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/109158', + 'only_matching': True, + }, { + 'url': 'https://matchtv.ru/vdl/player/media/109158', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return re.findall( - r']+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', + r']+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', webpage) def _real_extract(self, url): @@ -46,22 +56,14 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - wjplayer_data = self._parse_json( + sources = self._parse_json( self._search_regex( - r'(?s)var\s+playerOptions\s*=\s*({.+?});', webpage, 'wjplayer settings'), + r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', + webpage, 'sources'), video_id, transform_source=js_to_json) - wjplayer_data['sources'] = self._parse_json( - self._search_regex( - r'(?s)playerOptions\.sources\s*=\s*(\[.+?\]);', webpage, 'wjplayer sources'), - video_id, transform_source=js_to_json) - - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage) or self._html_search_regex( - r'(.+?)', webpage, 'title', fatal=False) or video_id - formats = [] - for source in wjplayer_data['sources']: + for source in sources: src = source.get('src') if not src: continue @@ -75,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor): }) self._sort_formats(formats) + player = self._parse_json( + self._search_regex( + r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, + 'player options', default='{}'), + video_id, transform_source=js_to_json) + media_id = player['mediaId'] + + info = self._search_json_ld(webpage, media_id, default={}) + view_count = int_or_none(self._search_regex( r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) - return { - 'id': video_id, - 'title': title, - 'thumbnail': wjplayer_data.get('poster'), - 'duration': int_or_none(wjplayer_data.get('duration')), + return merge_dicts(info, { + 'id': media_id, + 'title': self._og_search_title(webpage, default=None) or media_id, + 'thumbnail': player.get('poster'), + 'duration': int_or_none(player.get('duration')), 'view_count': view_count, 'formats': formats, - } + }) From c2fe21efaaf5be47da9d88bb2a490c688bc920f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:38:06 +0700 Subject: [PATCH 0086/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 86cf489b1..a21177dac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version + +Core ++ [extractor/common] Add validation for JSON-LD URLs + +Extractors ++ [sportbox] Add support for matchtv.ru +* [sportbox] Fix extraction (#17978) +* [screencast] Fix extraction (#14590, #14617, #17990) ++ [openload] Add support for oload.icu ++ [ivi] Add support for ivi.tv +* [crunchyroll] Improve extraction failsafeness (#17991) +* [dailymail] Fix formats extraction (#17976) +* [viewster] Reduce format requests +* [cwtv] Handle API errors (#17905) ++ [rutube] Use geo verification headers (#17897) ++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912) +- [tv3] Remove extractor (#10461, #15339) +* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894) ++ [openload] Add support for oload.cc (#17823) ++ [patreon] Extract post_file URL (#17792) +* [patreon] Fix extraction (#14502, #10471) + + version 2018.10.05 Extractors From 9ff558f67f2285a17d2a4214b5f74aeb6ce4d9b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 00:39:29 +0700 Subject: [PATCH 0087/1201] release 2018.10.29 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 058eb4321..aefed163a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.10.05 +[debug] youtube-dl version 2018.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a21177dac..57dbde12d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.10.29 Core + [extractor/common] Add validation for JSON-LD URLs diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f167a6ddc..e5a6879bc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -818,7 +818,7 @@ - **Spiegeltv** - **sport.francetvinfo.fr** - **Sport5** - - **SportBoxEmbed** + - **SportBox** - **SportDeutschland** - **SpringboardPlatform** - **Sprout** @@ -909,7 +909,6 @@ - **TV2** - **tv2.hu** - **TV2Article** - - **TV3** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **TVA** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7d3f25019..ae9a77966 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.10.05' +__version__ = '2018.10.29' From 9c4a83a1bec9d7abd066a89be40e62dd36ffa67d Mon Sep 17 00:00:00 2001 From: Ali Irani Date: Thu, 6 Sep 2018 02:08:38 +0430 Subject: [PATCH 0088/1201] [aparat] Fix extraction --- youtube_dl/extractor/aparat.py | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 6eb8bbb6e..780439e17 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -34,32 +34,32 @@ class AparatIE(InfoExtractor): 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, video_id) - title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') - file_list = self._parse_json( self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, + r'var options\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) + title = file_list['plugins']['sabaPlayerPlugin']['title'] + formats = [] - for item in file_list[0]: - file_url = url_or_none(item.get('file')) - if not file_url: - continue - ext = mimetype2ext(item.get('type')) - label = item.get('label') - formats.append({ - 'url': file_url, - 'ext': ext, - 'format_id': label or ext, - 'height': int_or_none(self._search_regex( - r'(\d+)[pP]', label or '', 'height', default=None)), - }) + for list in file_list['plugins']['sabaPlayerPlugin']['multiSRC']: + for item in list: + file_url = url_or_none(item.get('src')) + if not file_url: + continue + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': label or ext, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', default=None)), + }) self._sort_formats(formats) - thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + thumbnail = file_list['poster'] return { 'id': video_id, From 2943397e8701d3dcd28433e485e50459fdbda62a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 23:29:05 +0700 Subject: [PATCH 0089/1201] [aparat] Improve extraction and extract more metadata (closes #17445, closes #18008) --- youtube_dl/extractor/aparat.py | 89 ++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 780439e17..883dcee7a 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + merge_dicts, mimetype2ext, url_or_none, ) @@ -12,59 +13,83 @@ from ..utils import ( class AparatIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.aparat.com/v/wP8On', 'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 'info_dict': { 'id': 'wP8On', 'ext': 'mp4', 'title': 'تیم گلکسی 11 - زومیت', - 'age_limit': 0, + 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028', + 'duration': 231, + 'timestamp': 1387394859, + 'upload_date': '20131218', + 'view_count': int, }, - # 'skip': 'Extremely unreliable', - } + }, { + # multiple formats + 'url': 'https://www.aparat.com/v/8dflw/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - # Note: There is an easier-to-parse configuration at - # http://www.aparat.com/video/video/config/videohash/%video_id - # but the URL in there does not work - webpage = self._download_webpage( - 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, - video_id) + # Provides more metadata + webpage = self._download_webpage(url, video_id, fatal=False) - file_list = self._parse_json( + if not webpage: + # Note: There is an easier-to-parse configuration at + # http://www.aparat.com/video/video/config/videohash/%video_id + # but the URL in there does not work + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) + + options = self._parse_json( self._search_regex( - r'var options\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, - 'file list'), + r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P(?:(?!\1).)+)\1\s*\)', + webpage, 'options', group='value'), video_id) - title = file_list['plugins']['sabaPlayerPlugin']['title'] + player = options['plugins']['sabaPlayerPlugin'] formats = [] - for list in file_list['plugins']['sabaPlayerPlugin']['multiSRC']: - for item in list: + for sources in player['multiSRC']: + for item in sources: + if not isinstance(item, dict): + continue file_url = url_or_none(item.get('src')) if not file_url: continue - ext = mimetype2ext(item.get('type')) - label = item.get('label') - formats.append({ - 'url': file_url, - 'ext': ext, - 'format_id': label or ext, - 'height': int_or_none(self._search_regex( - r'(\d+)[pP]', label or '', 'height', default=None)), - }) - self._sort_formats(formats) + item_type = item.get('type') + if item_type == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': 'http-%s' % (label or ext), + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', + default=None)), + }) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) - thumbnail = file_list['poster'] + info = self._search_json_ld(webpage, video_id, default={}) - return { + if not info.get('title'): + info['title'] = player['title'] + + return merge_dicts(info, { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'age_limit': self._family_friendly_search(webpage), + 'thumbnail': url_or_none(options.get('poster')), + 'duration': int_or_none(player.get('duration')), 'formats': formats, - } + }) From ffa7b2bfee7b94191ffc20ef00c22f708c97cddf Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 30 Jul 2018 18:15:20 +0200 Subject: [PATCH 0090/1201] [cbnc] Add support for new URL schema (closes #14193) --- youtube_dl/extractor/cnbc.py | 41 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index d354d9f95..35c0b6124 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,8 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals + from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + js_to_json, + smuggle_url, +) class CNBCIE(InfoExtractor): @@ -34,3 +38,38 @@ class CNBCIE(InfoExtractor): {'force_smil_url': True}), 'id': video_id, } + + +class CNBCNewIE(InfoExtractor): + IE_NAME = 'CNBC:new' + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._parse_json( + self._search_regex( + r'(?s).*]*>.*?({.+?content_id.+?}).*?', + webpage, display_id), + display_id, transform_source=js_to_json + )['content_id'] + + return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f013d13c3..93574907b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -209,7 +209,10 @@ from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import CNBCIE +from .cnbc import ( + CNBCIE, + CNBCNewIE, +) from .cnn import ( CNNIE, CNNBlogsIE, From 94db1f7f3b7269d5843b815ef2aa5b71d0361e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 23:53:39 +0700 Subject: [PATCH 0091/1201] [cnbc] Simplify extraction (closes #14280, closes #17110) --- youtube_dl/extractor/cnbc.py | 29 ++++++++++------------------- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 35c0b6124..81b0c9fc4 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - js_to_json, - smuggle_url, -) +from ..utils import smuggle_url class CNBCIE(InfoExtractor): @@ -40,36 +37,30 @@ class CNBCIE(InfoExtractor): } -class CNBCNewIE(InfoExtractor): - IE_NAME = 'CNBC:new' - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { 'id': '7000031301', 'ext': 'mp4', - 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'title': "Trump: I don't necessarily agree with raising rates", 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'timestamp': 1531958400, 'upload_date': '20180719', 'uploader': 'NBCU-CNBC', }, 'params': { - # m3u8 download 'skip_download': True, }, } - CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._parse_json( - self._search_regex( - r'(?s).*]*>.*?({.+?content_id.+?}).*?', - webpage, display_id), - display_id, transform_source=js_to_json - )['content_id'] - - return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 93574907b..d96e23905 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -211,7 +211,7 @@ from .clyp import ClypIE from .cmt import CMTIE from .cnbc import ( CNBCIE, - CNBCNewIE, + CNBCVideoIE, ) from .cnn import ( CNNIE, From 9aac22c195cf41ff46a644bd027481629d0e6d06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Oct 2018 00:22:18 +0700 Subject: [PATCH 0092/1201] [theplatform] Improve error detection (#13222) --- youtube_dl/extractor/theplatform.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index ffef5bf06..181620615 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE): smil_url, video_id, note=note, query={'format': 'SMIL'}, headers=self.geo_verification_headers()) error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') - if error_element is not None and error_element.attrib['src'].startswith( - 'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD): - raise ExtractorError(error_element.attrib['abstract'], expected=True) + if error_element is not None: + exception = find_xpath_attr( + error_element, _x('.//smil:param'), 'name', 'exception') + if exception is not None: + if exception.get('value') == 'GeoLocationBlocked': + self.raise_geo_restricted(error_element.attrib['abstract']) + elif error_element.attrib['src'].startswith( + 'http://link.theplatform.%s/s/errorFiles/Unavailable.' + % self._TP_TLD): + raise ExtractorError( + error_element.attrib['abstract'], expected=True) smil_formats = self._parse_smil_formats( meta, smil_url, video_id, namespace=default_ns, From aa7e974a2a61a20e017b52a3a9ab1fb43cf8cd13 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 29 Oct 2018 19:28:09 +0100 Subject: [PATCH 0093/1201] [linkedin:learning] Add new extractor(closes #13545) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/linkedin.py | 175 +++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 youtube_dl/extractor/linkedin.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d96e23905..8879f5d90 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -572,6 +572,10 @@ from .limelight import ( LimelightChannelListIE, ) from .line import LineTVIE +from .linkedin import ( + LinkedInLearningIE, + LinkedInLearningCourseIE, +) from .litv import LiTVIE from .liveleak import ( LiveLeakIE, diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py new file mode 100644 index 000000000..6333a8fd3 --- /dev/null +++ b/youtube_dl/extractor/linkedin.py @@ -0,0 +1,175 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + urlencode_postdata, +) + + +class LinkedInLearningBaseIE(InfoExtractor): + _NETRC_MACHINE = 'linkedin' + + def _call_api(self, course_slug, fields, video_slug=None, resolution=None): + query = { + 'courseSlug': course_slug, + 'fields': fields, + 'q': 'slugs', + } + sub = '' + if video_slug: + query.update({ + 'videoSlug': video_slug, + 'resolution': '_%s' % resolution, + }) + sub = ' %dp' % resolution + api_url = 'https://www.linkedin.com/learning-api/detailedCourses' + return self._download_json( + api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ + 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, + }, query=query)['elements'][0] + + def _get_video_id(self, urn, course_slug, video_slug): + if urn: + mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) + if mobj: + return mobj.group(1) + return '%s/%s' % (course_slug, video_slug) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + login_page = self._download_webpage( + 'https://www.linkedin.com/uas/login?trk=learning', + None, 'Downloading login page') + action_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, 'post url', + default='https://www.linkedin.com/uas/login-submit', group='url') + data = self._hidden_inputs(login_page) + data.update({ + 'session_key': email, + 'session_password': password, + }) + login_submit_page = self._download_webpage( + action_url, None, 'Logging in', + data=urlencode_postdata(data)) + error = self._search_regex( + r']+class="error"[^>]*>\s*(.+?)\s*', + login_submit_page, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + +class LinkedInLearningIE(LinkedInLearningBaseIE): + IE_NAME = 'linkedin:learning' + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P[^/]+)/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true', + 'md5': 'a1d74422ff0d5e66a792deb996693167', + 'info_dict': { + 'id': '90426', + 'ext': 'mp4', + 'title': 'Welcome', + 'timestamp': 1430396150.82, + 'upload_date': '20150430', + }, + } + + def _real_extract(self, url): + course_slug, video_slug = re.match(self._VALID_URL, url).groups() + + video_data = None + formats = [] + for width, height in ((640, 360), (960, 540), (1280, 720)): + video_data = self._call_api( + course_slug, 'selectedVideo', video_slug, height)['selectedVideo'] + + video_url_data = video_data.get('url') or {} + progressive_url = video_url_data.get('progressiveUrl') + if progressive_url: + formats.append({ + 'format_id': 'progressive-%dp' % height, + 'url': progressive_url, + 'height': height, + 'width': width, + 'source_preference': 1, + }) + + title = video_data['title'] + + audio_url = video_data.get('audio', {}).get('progressiveUrl') + if audio_url: + formats.append({ + 'abr': 64, + 'ext': 'm4a', + 'format_id': 'audio', + 'url': audio_url, + 'vcodec': 'none', + }) + + streaming_url = video_url_data.get('streamingUrl') + if streaming_url: + formats.extend(self._extract_m3u8_formats( + streaming_url, video_slug, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + + self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) + + return { + 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), + 'title': title, + 'formats': formats, + 'thumbnail': video_data.get('defaultThumbnail'), + 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), + 'duration': int_or_none(video_data.get('durationInSeconds')), + } + + +class LinkedInLearningCourseIE(LinkedInLearningBaseIE): + IE_NAME = 'linkedin:learning:course' + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals', + 'info_dict': { + 'id': 'programming-foundations-fundamentals', + 'title': 'Programming Foundations: Fundamentals', + 'description': 'md5:76e580b017694eb89dc8e8923fff5c86', + }, + 'playlist_mincount': 61, + } + + @classmethod + def suitable(cls, url): + return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_slug = self._match_id(url) + course_data = self._call_api(course_slug, 'chapters,description,title') + + entries = [] + for chapter in course_data.get('chapters', []): + chapter_title = chapter.get('title') + for video in chapter.get('videos', []): + video_slug = video.get('slug') + if not video_slug: + continue + entries.append({ + '_type': 'url', + 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), + 'title': video.get('title'), + 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), + 'chapter': chapter_title, + 'ie_key': LinkedInLearningIE.ie_key(), + }) + + return self.playlist_result( + entries, course_slug, + course_data.get('title'), + course_data.get('description')) From b14475724b78bf3b4f2f448bb2953dfd52d3d425 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 29 Oct 2018 21:49:12 +0100 Subject: [PATCH 0094/1201] [linkedin:learning:course] use url_transparent type for playlist entries --- youtube_dl/extractor/linkedin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 6333a8fd3..259fc4c5e 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -161,7 +161,7 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE): if not video_slug: continue entries.append({ - '_type': 'url', + '_type': 'url_transparent', 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), 'title': video.get('title'), 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), From f16679e8436fb0e9d01aca2343ce22a01802667f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Oct 2018 04:57:28 +0700 Subject: [PATCH 0095/1201] [cnbc:video] Fix _VALID_URL (#17110) --- youtube_dl/extractor/cnbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 81b0c9fc4..6889b0f40 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -38,7 +38,7 @@ class CNBCIE(InfoExtractor): class CNBCVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' + _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { From c70ba664f19f0323d74e4e8ea76249f4c97def06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Nov 2018 01:35:32 +0700 Subject: [PATCH 0096/1201] [njpwworld] Fix authentication (closes #17427) --- youtube_dl/extractor/njpwworld.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py index febef097a..025c5d249 100644 --- a/youtube_dl/extractor/njpwworld.py +++ b/youtube_dl/extractor/njpwworld.py @@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor): 'skip': 'Requires login', } + _LOGIN_URL = 'https://front.njpwworld.com/auth/login' + def _real_initialize(self): self._login() @@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor): if not username: return True + # Setup session (will set necessary cookies) + self._request_webpage( + 'https://njpwworld.com/', None, note='Setting up session') + webpage, urlh = self._download_webpage_handle( - 'https://njpwworld.com/auth/login', None, + self._LOGIN_URL, None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({'login_id': username, 'pw': password}), - headers={'Referer': 'https://njpwworld.com/auth'}) + headers={'Referer': 'https://front.njpwworld.com/auth'}) # /auth/login will return 302 for successful logins - if urlh.geturl() == 'https://njpwworld.com/auth/login': + if urlh.geturl() == self._LOGIN_URL: self.report_warning('unable to login') return False From 061ea3a776830f15dbce899bad8b48232f32aaf0 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Fri, 2 Nov 2018 12:08:41 -0400 Subject: [PATCH 0097/1201] [openload] Add support for oload.fun --- youtube_dl/extractor/openload.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a91f29f5c..2473536fd 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -319,7 +319,10 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }, { 'url': 'https://oload.icu/f/-_i4y_F_Hs8', - 'only_matching': True + 'only_matching': True, + }, { + 'url': 'https://oload.fun/f/gb6G1H4sHXY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From c620694c97151396055108cd10d2a393036eb334 Mon Sep 17 00:00:00 2001 From: Sebastian Haas Date: Tue, 30 Oct 2018 23:44:50 +0100 Subject: [PATCH 0098/1201] [orf:tvthek] Fix extraction (closes #17737) use _extract_m3u8_formats and _extract_f4m_formats helper functions closes #17737 --- youtube_dl/extractor/orf.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index c1fb580ca..da8031ad2 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -80,14 +80,16 @@ class ORFTVthekIE(InfoExtractor): if not video_id or not title: continue video_id = compat_str(video_id) - formats = [{ - 'preference': -10 if fd['delivery'] == 'hls' else None, - 'format_id': '%s-%s-%s' % ( - fd['delivery'], fd['quality'], fd['quality_string']), - 'url': fd['src'], - 'protocol': fd['protocol'], - 'quality': quality_to_int(fd['quality']), - } for fd in sd['sources']] + formats = [] + for fd in sd['sources']: + format_id = '%s-%s-%s' % ( + fd['delivery'], fd['quality'], fd['quality_string']) + if determine_ext(fd['src']) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + fd['src'], video_id, 'mp4', m3u8_id=format_id)) + elif determine_ext(fd['src']) == 'f4m': + formats.extend(self._extract_f4m_formats( + fd['src'], video_id, f4m_id=format_id)) # Check for geoblocking. # There is a property is_geoprotection, but that's always false From 4b6aca17cc7d4df22e78501b4c00a9281c189ab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Nov 2018 23:46:56 +0700 Subject: [PATCH 0099/1201] [orf:tvthek] Improve extraction and remove unused code (closes #17956, closes #18024) --- youtube_dl/extractor/orf.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index da8031ad2..d432e3449 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -15,6 +15,7 @@ from ..utils import ( strip_jsonp, unescapeHTML, unified_strdate, + url_or_none, ) @@ -68,12 +69,6 @@ class ORFTVthekIE(InfoExtractor): webpage, 'playlist', group='json'), playlist_id, transform_source=unescapeHTML)['playlist']['videos'] - def quality_to_int(s): - m = re.search('([0-9]+)', s) - if m is None: - return -1 - return int(m.group(1)) - entries = [] for sd in data_jsb: video_id, title = sd.get('id'), sd.get('title') @@ -82,14 +77,27 @@ class ORFTVthekIE(InfoExtractor): video_id = compat_str(video_id) formats = [] for fd in sd['sources']: - format_id = '%s-%s-%s' % ( - fd['delivery'], fd['quality'], fd['quality_string']) + src = url_or_none(fd.get('src')) + if not src: + continue + format_id_list = [] + for key in ('delivery', 'quality', 'quality_string'): + value = fd.get(key) + if value: + format_id_list.append(value) + format_id = '-'.join(format_id_list) if determine_ext(fd['src']) == 'm3u8': formats.extend(self._extract_m3u8_formats( fd['src'], video_id, 'mp4', m3u8_id=format_id)) elif determine_ext(fd['src']) == 'f4m': formats.extend(self._extract_f4m_formats( fd['src'], video_id, f4m_id=format_id)) + else: + formats.append({ + 'format_id': format_id, + 'url': src, + 'protocol': fd.get('protocol'), + }) # Check for geoblocking. # There is a property is_geoprotection, but that's always false From 036f905161b104ef90e75ad42d472b45eeb102ba Mon Sep 17 00:00:00 2001 From: sichuan-pepper Date: Sat, 27 Oct 2018 03:40:44 +0900 Subject: [PATCH 0100/1201] [twitcasting] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitcasting.py | 44 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/twitcasting.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8879f5d90..27452d73e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1196,6 +1196,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import TwitcastingIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py new file mode 100644 index 000000000..856df5c0b --- /dev/null +++ b/youtube_dl/extractor/twitcasting.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class TwitcastingIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|ssl|en|pt|es|ja|ko)\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' + _TEST = { + 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', + 'md5': '745243cad58c4681dc752490f7540d7f', + 'info_dict': { + 'id': '2357609', + 'ext': 'mp4', + 'title': 'Recorded Live #2357609', + 'uploader_id': 'ivetesangalo', + 'description': "Moi! I'm live on TwitCasting from my iPhone.", + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + uploader_id = mobj.group('uploader_id') + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._html_search_regex(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage, name='playlist url', group='url') + formats = self._extract_m3u8_formats(playlist_url, video_id, ext='mp4') + thumbnail = self._og_search_thumbnail(webpage) + title = self._html_search_meta('twitter:title', webpage) + description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage) + return{ + 'id': video_id, + 'url': url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader_id': uploader_id, + 'formats': formats, + } From cf0db4d99785532d767d0ca1cc029c73d16bb045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Nov 2018 00:27:36 +0700 Subject: [PATCH 0101/1201] [twitcasting] Improve extraction and fix issues (closes #17981) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/twitcasting.py | 36 +++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 27452d73e..b41cd65d7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1196,7 +1196,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE -from .twitcasting import TwitcastingIE +from .twitcasting import TwitCastingIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py index 856df5c0b..05f8aa9ce 100644 --- a/youtube_dl/extractor/twitcasting.py +++ b/youtube_dl/extractor/twitcasting.py @@ -6,8 +6,8 @@ from .common import InfoExtractor import re -class TwitcastingIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|ssl|en|pt|es|ja|ko)\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' +class TwitCastingIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P[^/]+)/movie/(?P\d+)' _TEST = { 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', 'md5': '745243cad58c4681dc752490f7540d7f', @@ -18,24 +18,40 @@ class TwitcastingIE(InfoExtractor): 'uploader_id': 'ivetesangalo', 'description': "Moi! I'm live on TwitCasting from my iPhone.", 'thumbnail': r're:^https?://.*\.jpg$', - } + }, + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = mobj.group('id') uploader_id = mobj.group('uploader_id') webpage = self._download_webpage(url, video_id) - playlist_url = self._html_search_regex(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage, name='playlist url', group='url') - formats = self._extract_m3u8_formats(playlist_url, video_id, ext='mp4') + title = self._html_search_regex( + r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)(?:(?!\1).)+)\1', + r'(["\'])(?Phttp.+?\.m3u8.*?)\1'), + webpage, 'm3u8 url', group='url') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + thumbnail = self._og_search_thumbnail(webpage) - title = self._html_search_meta('twitter:title', webpage) - description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage) - return{ + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage) + + return { 'id': video_id, - 'url': url, 'title': title, 'description': description, 'thumbnail': thumbnail, From 95e42d7336d01f505d6551a21df52f3ae234e96b Mon Sep 17 00:00:00 2001 From: Xiao Di Guan Date: Sat, 3 Nov 2018 05:18:20 +1100 Subject: [PATCH 0102/1201] [extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447) --- test/helper.py | 10 ++++++++ test/test_InfoExtractor.py | 42 ++++++++++++++++++++++++++++++++-- test/test_downloader_http.py | 12 +--------- test/test_http.py | 10 +------- youtube_dl/extractor/common.py | 5 ++++ 5 files changed, 57 insertions(+), 22 deletions(-) diff --git a/test/helper.py b/test/helper.py index dfee217a9..aa9a1c9b2 100644 --- a/test/helper.py +++ b/test/helper.py @@ -7,6 +7,7 @@ import json import os.path import re import types +import ssl import sys import youtube_dl.extractor @@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re): real_warning(w) ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 4833396a5..06be72616 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -9,11 +9,30 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value -from youtube_dl.compat import compat_etree_fromstring +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from youtube_dl.compat import compat_etree_fromstring, compat_http_server from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False class TestIE(InfoExtractor): @@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + if __name__ == '__main__': unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 5cf2bf1a5..750472281 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -9,26 +9,16 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import try_rm +from test.helper import http_server_port, try_rm from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server from youtube_dl.downloader.http import HttpFD from youtube_dl.utils import encodeFilename -import ssl import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - TEST_SIZE = 10 * 1024 diff --git a/test/test_http.py b/test/test_http.py index 409fec9c8..3ee0a5dda 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,6 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import http_server_port from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server, compat_urllib_request import ssl @@ -16,15 +17,6 @@ import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8452125c8..e5f8136fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -606,6 +606,11 @@ class InfoExtractor(object): except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): + # Retain reference to error to prevent file object from + # being closed before it can be read. Works around the + # effects of + # introduced in Python 3.4.1. + err.fp._error = err return err.fp if errnote is False: From da56fb631fb6389849f2f021bdd047aa1c55dc0a Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Tue, 2 Oct 2018 14:49:01 +0200 Subject: [PATCH 0103/1201] [azmedien] Adopt to major site redesign (closes #17745) --- youtube_dl/extractor/azmedien.py | 222 +++++++---------------------- youtube_dl/extractor/extractors.py | 6 +- 2 files changed, 53 insertions(+), 175 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 68f26e2ca..9d606ee67 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -1,19 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - get_element_by_class, - get_element_by_id, - strip_or_none, - urljoin, -) class AZMedienBaseIE(InfoExtractor): + _PARTNER_ID = '1719221' + def _kaltura_video(self, partner_id, entry_id): return self.url_result( 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), @@ -25,189 +22,74 @@ class AZMedienIE(AZMedienBaseIE): _VALID_URL = r'''(?x) https?:// (?:www\.)? - (?: + (?P telezueri\.ch| telebaern\.tv| telem1\.ch )/ - [0-9]+-show-[^/\#]+ - (?: - /[0-9]+-episode-[^/\#]+ - (?: - /[0-9]+-segment-(?:[^/\#]+\#)?| - \# - )| - \# + [^/]+/ + (?P + [^/]+-(?P\d+) ) - (?P[^\#]+) + (?: + \#video= + (?P + [_0-9a-z]+ + ) + )? ''' _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', 'info_dict': { - 'id': '1_2444peh4', + 'id': '1_anruz3wy', 'ext': 'mp4', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, + 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen', + 'description': 'md5:dd9f96751ec9c35e409a698a328402f3', + 'uploader_id': 'TVOnline', + 'upload_date': '20180930', + 'timestamp': 1538328802, }, 'params': { 'skip_download': True, }, }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', 'only_matching': True }] def _real_extract(self, url): video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + entry_id = mobj.group('kaltura_id') - webpage = self._download_webpage(url, video_id) + if not entry_id: + webpage = self._download_webpage(url, video_id) + api_path = self._search_regex( + r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']', + webpage, 'api path') + api_url = 'https://www.%s%s' % (mobj.group('host'), api_path) + payload = { + 'query': '''query VideoContext($articleId: ID!) { + article: node(id: $articleId) { + ... on Article { + mainAssetRelation { + asset { + ... on VideoAsset { + kalturaId + } + } + } + } + } + }''', + 'variables': {'articleId': 'Article:%s' % mobj.group('article_id')}, + } + json_data = self._download_json( + api_url, video_id, headers={ + 'Content-Type': 'application/json', + }, + data=json.dumps(payload).encode()) + entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId'] - partner_id = self._search_regex( - r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', - webpage, 'kaltura partner id') - entry_id = self._html_search_regex( - r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' - % re.escape(video_id), webpage, 'kaltura entry id', group='id') - - return self._kaltura_video(partner_id, entry_id) - - -class AZMedienPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien playlists' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - telezueri\.ch| - telebaern\.tv| - telem1\.ch - )/ - (?P[0-9]+- - (?: - show| - topic| - themen - )-[^/\#]+ - (?: - /[0-9]+-episode-[^/\#]+ - )? - )$ - ''' - - _TESTS = [{ - # URL with 'episode' - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News - Donnerstag, 15. Dezember 2016', - }, - 'playlist_count': 9, - }, { - # URL with 'themen' - 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics', - 'info_dict': { - 'id': '258-themen-tele-m1-classics', - 'title': 'Tele M1 Classics', - }, - 'playlist_mincount': 15, - }, { - # URL with 'topic', contains nested playlists - 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen', - 'only_matching': True, - }, { - # URL with 'show' only - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) - - entries = [] - - partner_id = self._search_regex( - r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', - webpage, 'kaltura partner id', default=None) - - if partner_id: - entries = [ - self._kaltura_video(partner_id, m.group('id')) - for m in re.finditer( - r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] - - if not entries: - entries = [ - self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) - for m in re.finditer( - r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] - - if not entries: - entries = [ - # May contain nested playlists (e.g. [1]) thus no explicit - # ie_key - # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen) - self.url_result(urljoin(url, m.group('url'))) - for m in re.finditer( - r']+name=[^>]+href=(["\'])(?P/.+?)\1', webpage)] - - title = self._search_regex( - r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'title', - default=strip_or_none(get_element_by_id( - 'video-title', webpage)), group='title') - - return self.playlist_result(entries, show_id, title) - - -class AZMedienShowPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien show playlists' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - telezueri\.ch| - telebaern\.tv| - telem1\.ch - )/ - (?: - all-episodes| - alle-episoden - )/ - (?P<id>[^/?#&]+) - ''' - - _TEST = { - 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', - 'info_dict': { - 'id': 'astrotalk', - 'title': 'TeleZüri: AstroTalk - alle episoden', - 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', - }, - 'playlist_mincount': 13, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - episodes = get_element_by_class('search-mobile-box', webpage) - entries = [self.url_result( - urljoin(url, m.group('url'))) for m in re.finditer( - r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)] - title = self._og_search_title(webpage, fatal=False) - description = self._og_search_description(webpage) - return self.playlist_result(entries, playlist_id, title, description) + return self._kaltura_video(self._PARTNER_ID, entry_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b41cd65d7..9b68c9efe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,11 +88,7 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedien import ( - AZMedienIE, - AZMedienPlaylistIE, - AZMedienShowPlaylistIE, -) +from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE From 573531dcfb6869afa143761faf4ebc6ab405f808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 01:32:29 +0700 Subject: [PATCH 0104/1201] [azmedien] Simplify (closes #17746) --- youtube_dl/extractor/azmedien.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 9d606ee67..a57a5f114 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -8,16 +8,7 @@ from .common import InfoExtractor from .kaltura import KalturaIE -class AZMedienBaseIE(InfoExtractor): - _PARTNER_ID = '1719221' - - def _kaltura_video(self, partner_id, entry_id): - return self.url_result( - 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), - video_id=entry_id) - - -class AZMedienIE(AZMedienBaseIE): +class AZMedienIE(InfoExtractor): IE_DESC = 'AZ Medien videos' _VALID_URL = r'''(?x) https?:// @@ -58,9 +49,11 @@ class AZMedienIE(AZMedienBaseIE): 'only_matching': True }] + _PARTNER_ID = '1719221' + def _real_extract(self, url): - video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') entry_id = mobj.group('kaltura_id') if not entry_id: @@ -92,4 +85,6 @@ class AZMedienIE(AZMedienBaseIE): data=json.dumps(payload).encode()) entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId'] - return self._kaltura_video(self._PARTNER_ID, entry_id) + return self.url_result( + 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + ie=KalturaIE.ie_key(), video_id=entry_id) From faac1c1f70425ddd60ff39d3b6a2b34c7941463b Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Tue, 1 May 2018 05:36:03 +0200 Subject: [PATCH 0105/1201] [ehftv] Add extractor (closes #15408) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/laola1tv.py | 115 ++++++++++++++++++----------- 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9b68c9efe..e5488cce4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -539,6 +539,7 @@ from .la7 import LA7IE from .laola1tv import ( Laola1TvEmbedIE, Laola1TvIE, + EHFTVIE, ITTFIE, ) from .lci import LCIIE diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index c7f813370..d985bd3ca 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..utils import ( @@ -119,9 +120,59 @@ class Laola1TvEmbedIE(InfoExtractor): } -class Laola1TvIE(Laola1TvEmbedIE): +class Laola1TvBaseIE(Laola1TvEmbedIE): + def _extract_video(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + if 'Dieser Livestream ist bereits beendet.' in webpage: + raise ExtractorError('This live stream has already finished.', expected=True) + + conf = self._parse_json(self._search_regex( + r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), + display_id, + transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s))) + video_id = conf['videoid'] + + config = self._download_json(conf['configUrl'], video_id, query={ + 'videoid': video_id, + 'partnerid': conf['partnerid'], + 'language': conf.get('language', ''), + 'portal': conf.get('portalid', ''), + }) + error = config.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + video_data = config['video'] + title = video_data['title'] + is_live = video_data.get('isLivestream') and video_data.get('isLive') + meta = video_data.get('metaInformation') + sports = meta.get('sports') + categories = sports.split(',') if sports else [] + + token_url = self._extract_token_url( + video_data['streamAccess'], video_id, + video_data['abo']['required']) + + formats = self._extract_formats(token_url, video_id) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('image'), + 'categories': categories, + 'formats': formats, + 'is_live': is_live, + } + + +class Laola1TvIE(Laola1TvBaseIE): IE_NAME = 'laola1tv' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { @@ -169,52 +220,30 @@ class Laola1TvIE(Laola1TvEmbedIE): }] def _real_extract(self, url): - display_id = self._match_id(url) + return self._extract_video(url) - webpage = self._download_webpage(url, display_id) - if 'Dieser Livestream ist bereits beendet.' in webpage: - raise ExtractorError('This live stream has already finished.', expected=True) +class EHFTVIE(Laola1TvBaseIE): + IE_NAME = 'ehftv' + _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' - conf = self._parse_json(self._search_regex( - r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), - display_id, js_to_json) + _TESTS = [{ + 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761', + 'info_dict': { + 'id': '1166761', + 'display_id': 'paris-saint-germain-handball-pge-vive-kielce', + 'ext': 'mp4', + 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce', + 'is_live': False, + 'categories': ['Handball'], + }, + 'params': { + 'skip_download': True, + }, + }] - video_id = conf['videoid'] - - config = self._download_json(conf['configUrl'], video_id, query={ - 'videoid': video_id, - 'partnerid': conf['partnerid'], - 'language': conf.get('language', ''), - 'portal': conf.get('portalid', ''), - }) - error = config.get('error') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - - video_data = config['video'] - title = video_data['title'] - is_live = video_data.get('isLivestream') and video_data.get('isLive') - meta = video_data.get('metaInformation') - sports = meta.get('sports') - categories = sports.split(',') if sports else [] - - token_url = self._extract_token_url( - video_data['streamAccess'], video_id, - video_data['abo']['required']) - - formats = self._extract_formats(token_url, video_id) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': self._live_title(title) if is_live else title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('image'), - 'categories': categories, - 'formats': formats, - 'is_live': is_live, - } + def _real_extract(self, url): + return self._extract_video(url) class ITTFIE(InfoExtractor): From 6895ea4d3f9d44048fa59800a06ab8177a24bd1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 02:44:35 +0700 Subject: [PATCH 0106/1201] [laola1tv:embed] Set correct stream access URL scheme (closes #16341) --- youtube_dl/extractor/laola1tv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d985bd3ca..fa217365a 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -33,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor): def _extract_token_url(self, stream_access_url, video_id, data): return self._download_json( - stream_access_url, video_id, headers={ + self._proto_relative_url(stream_access_url, 'https:'), video_id, + headers={ 'Content-Type': 'application/json', }, data=json.dumps(data).encode())['data']['stream-access'][0] @@ -225,7 +226,7 @@ class Laola1TvIE(Laola1TvBaseIE): class EHFTVIE(Laola1TvBaseIE): IE_NAME = 'ehftv' - _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761', From a085410936a5e0c90a8eb3059a5cd9e0703848bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 02:56:14 +0700 Subject: [PATCH 0107/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 57dbde12d..05857596a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version <unreleased> + +Core +* [extractor/common] Ensure response handle is not prematurely closed before + it can be read if it matches expected_status (#17195, #17846, #17447) + +Extractors +* [laola1tv:embed] Set correct stream access URL scheme (#16341) ++ [ehftv] Add support for ehftv.com (#15408) +* [azmedien] Adopt to major site redesign (#17745, #17746) ++ [twitcasting] Add support for twitcasting.tv (#17981) +* [orf:tvthek] Fix extraction (#17737, #17956, #18024) ++ [openload] Add support for oload.fun (#18045) +* [njpwworld] Fix authentication (#17427) ++ [linkedin:learning] Add support for linkedin.com/learning (#13545) +* [theplatform] Improve error detection (#13222) +* [cnbc] Simplify extraction (#14280, #17110) ++ [cbnc] Add support for new URL schema (#14193) +* [aparat] Improve extraction and extract more metadata (#17445, #18008) +* [aparat] Fix extraction + + version 2018.10.29 Core From 38c32dbf19d0168295d02b0afaed9227fed46338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 02:57:48 +0700 Subject: [PATCH 0108/1201] release 2018.11.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index aefed163a..eb8cef8ef 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.10.29 +[debug] youtube-dl version 2018.11.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 05857596a..11e1ba333 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.11.03 Core * [extractor/common] Ensure response handle is not prematurely closed before diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e5a6879bc..24c3254c3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -84,8 +84,6 @@ - **awaan:season** - **awaan:video** - **AZMedien**: AZ Medien videos - - **AZMedienPlaylist**: AZ Medien playlists - - **AZMedienShowPlaylist**: AZ Medien show playlists - **BaiduVideo**: 百度视频 - **bambuser** - **bambuser:channel** @@ -178,6 +176,7 @@ - **Clyp** - **cmt.com** - **CNBC** + - **CNBCVideo** - **CNN** - **CNNArticle** - **CNNBlogs** @@ -251,6 +250,7 @@ - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson + - **ehftv** - **eHow** - **EinsUndEinsTV** - **Einthusan** @@ -445,6 +445,8 @@ - **limelight:channel** - **limelight:channel_list** - **LineTV** + - **linkedin:learning** + - **linkedin:learning:course** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -930,6 +932,7 @@ - **TVPlayer** - **TVPlayHome** - **Tweakers** + - **TwitCasting** - **twitch:chapter** - **twitch:clips** - **twitch:profile** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ae9a77966..90de01214 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.10.29' +__version__ = '2018.11.03' From dbdaaa231add0a8d1fa8138c448ccb344f585894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 3 Nov 2018 06:26:16 +0700 Subject: [PATCH 0109/1201] [youtube] Add fallback metadata extraction from videoDetails (closes #18052) --- youtube_dl/extractor/youtube.py | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 78203ef84..abadfa545 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -41,6 +41,7 @@ from ..utils import ( remove_quotes, remove_start, smuggle_url, + str_or_none, str_to_int, try_get, unescapeHTML, @@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, + 'view_count': int, 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, + 'view_count': int, 'like_count': int, 'dislike_count': int, }, @@ -1538,6 +1541,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + player_response = {} + # Get video info embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: @@ -1580,6 +1585,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True sts = ytplayer_config.get('sts') + if not player_response: + pl_response = str_or_none(args.get('player_response')) + if pl_response: + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + player_response = pl_response if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): # We also try looking in get_video_info since it may contain different dashmpd # URL that points to a DASH manifest with possibly different itag set (some itags @@ -1608,6 +1619,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info_webpage: continue get_video_info = compat_parse_qs(video_info_webpage) + if not player_response: + pl_response = get_video_info.get('player_response', [None])[0] + if isinstance(pl_response, dict): + player_response = pl_response add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) @@ -1653,9 +1668,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '"token" parameter not in video info for unknown reason', video_id=video_id) + video_details = try_get( + player_response, lambda x: x['videoDetails'], dict) or {} + # title if 'title' in video_info: video_title = video_info['title'][0] + elif 'title' in player_response: + video_title = video_details['title'] else: self._downloader.report_warning('Unable to extract video title') video_title = '_' @@ -1718,6 +1738,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if view_count is None: view_count = extract_view_count(video_info) + if view_count is None and video_details: + view_count = int_or_none(video_details.get('viewCount')) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: @@ -1898,7 +1920,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') # uploader - video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str) + video_uploader = try_get( + video_info, lambda x: x['author'][0], + compat_str) or str_or_none(video_details.get('author')) if video_uploader: video_uploader = compat_urllib_parse_unquote_plus(video_uploader) else: @@ -2011,12 +2035,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): like_count = _extract_count('like') dislike_count = _extract_count('dislike') + if view_count is None: + view_count = str_to_int(self._search_regex( + r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage, + 'view count', default=None)) + # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) video_duration = try_get( video_info, lambda x: int_or_none(x['length_seconds'][0])) + if not video_duration: + video_duration = int_or_none(video_details.get('lengthSeconds')) if not video_duration: video_duration = parse_duration(self._html_search_meta( 'duration', video_webpage, 'video duration')) @@ -2244,6 +2275,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', 'categories': ['People & Blogs'], 'tags': list, + 'view_count': int, 'like_count': int, 'dislike_count': int, }, From 22e07ce502275fbede32d212eacdaeabee22fe4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 00:11:36 +0700 Subject: [PATCH 0110/1201] [README.md] Improve documentation on safe metadata extraction and add more examples --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fdd115c9b..35c3de512 100644 --- a/README.md +++ b/README.md @@ -1168,7 +1168,28 @@ title = self._search_regex( ### Use safe conversion functions -Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` # EMBEDDING YOUTUBE-DL From 16d896b2a74e2b9989fc0483728f8009876fc4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 15:52:46 +0700 Subject: [PATCH 0111/1201] [zattoo] Arrange API hosts for derived extractors (closes #18035) --- youtube_dl/extractor/zattoo.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index bbe0aecb6..cb1bac3a3 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor): _power_guide_hash = None def _host_url(self): - return 'https://%s' % self._HOST + return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST) def _login(self): username, password = self._get_login_info() @@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE): class NetPlusIE(ZattooIE): _NETRC_MACHINE = 'netplus' _HOST = 'netplus.tv' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', + 'url': 'https://tvplus.m-net.de/watch/abc/123-abc', 'only_matching': True, }] @@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.player.waly.tv/watch/abc/123-abc', + 'url': 'https://player.waly.tv/watch/abc/123-abc', 'only_matching': True, }] @@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE): class BBVTVIE(ZattooIE): _NETRC_MACHINE = 'bbvtv' _HOST = 'bbv-tv.net' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE): class VTXTVIE(ZattooIE): _NETRC_MACHINE = 'vtxtv' _HOST = 'vtxtv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE): class MyVisionTVIE(ZattooIE): _NETRC_MACHINE = 'myvisiontv' _HOST = 'myvisiontv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', + 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc', 'only_matching': True, }] @@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE): class SAKTVIE(ZattooIE): _NETRC_MACHINE = 'saktv' _HOST = 'saktv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -377,7 +382,7 @@ class EWETVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', + 'url': 'https://tvonline.ewe.de/watch/abc/123-abc', 'only_matching': True, }] @@ -385,6 +390,7 @@ class EWETVIE(ZattooIE): class QuantumTVIE(ZattooIE): _NETRC_MACHINE = 'quantumtv' _HOST = 'quantum-tv.com' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -399,7 +405,7 @@ class OsnatelTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', + 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc', 'only_matching': True, }] @@ -407,6 +413,7 @@ class OsnatelTVIE(ZattooIE): class EinsUndEinsTVIE(ZattooIE): _NETRC_MACHINE = '1und1tv' _HOST = '1und1.tv' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ From 2004e2210bc74aa950feba0f22df4d5a8980b3e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 17:09:57 +0700 Subject: [PATCH 0112/1201] [osnateltv] Update host --- youtube_dl/extractor/zattoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index cb1bac3a3..896276301 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -401,7 +401,7 @@ class QuantumTVIE(ZattooIE): class OsnatelTVIE(ZattooIE): _NETRC_MACHINE = 'osnateltv' - _HOST = 'onlinetv.osnatel.de' + _HOST = 'tvonline.osnatel.de' _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ From c0345b825f8758571a8de871ab9349c46b062fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Nov 2018 19:08:39 +0700 Subject: [PATCH 0113/1201] [youtube:playlist] Add support for invidio.us (closes #18077) --- youtube_dl/extractor/youtube.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index abadfa545..6ab2db274 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2162,7 +2162,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (?:https?://)? (?:\w+\.)? (?: - youtube\.com/ + (?: + youtube\.com| + invidio\.us + ) + / (?: (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) \? (?:.*?[&;])*? (?:p|a|list)= @@ -2314,6 +2318,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # music album playlist 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM', 'only_matching': True, + }, { + 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', + 'only_matching': True, }] def _real_initialize(self): From 432cd4841023091811db46cd82c188698a386841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Nov 2018 23:29:42 +0700 Subject: [PATCH 0114/1201] [cliphinter] Fix extraction (closes #18083) --- youtube_dl/extractor/cliphunter.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index ab651d1c8..f2ca7a337 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -1,19 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none - - -_translation_table = { - 'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n', - 'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r', - 'y': 'l', 'z': 'i', - '$': ':', '&': '.', '(': '=', '^': '&', '=': '/', -} - - -def _decode(s): - return ''.join(_translation_table.get(c, c) for c in s) +from ..utils import ( + int_or_none, + url_or_none, +) class CliphunterIE(InfoExtractor): @@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor): formats = [] for format_id, f in gexo_files.items(): - video_url = f.get('url') + video_url = url_or_none(f.get('url')) if not video_url: continue fmt = f.get('fmt') height = f.get('h') format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id formats.append({ - 'url': _decode(video_url), + 'url': video_url, 'format_id': format_id, 'width': int_or_none(f.get('w')), 'height': int_or_none(height), From 0df514f07e23ce70cccec045b4e71bdec151fcc7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 6 Nov 2018 21:22:00 +0100 Subject: [PATCH 0115/1201] [facebook] fix tahoe request(closes #17171) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 97cfe0fc3..74954049d 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor): _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' - _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true' + _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' _TESTS = [{ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', From 2511eee215c2a66020ae927c86face826f48ba8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Nov 2018 09:55:59 +0700 Subject: [PATCH 0116/1201] [youtube] Add another JS signature function name regex (closes #18091, closes #18093, closes #18094) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6ab2db274..3f49f3889 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1192,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('), + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From f81d44aab6d8ee01024a637cb80374251737872e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Nov 2018 09:58:08 +0700 Subject: [PATCH 0117/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 11e1ba333..920a4855a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Extractors ++ [youtube] Add another JS signature function name regex (#18091, #18093, + #18094) +* [facebook] Fix tahoe request (#17171) +* [cliphunter] Fix extraction (#18083) ++ [youtube:playlist] Add support for invidio.us (#18077) +* [zattoo] Arrange API hosts for derived extractors (#18035) ++ [youtube] Add fallback metadata extraction from videoDetails (#18052) + + version 2018.11.03 Core From 532782ade1dab884606dbbd82081ed7ab9c52a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Nov 2018 01:38:25 +0700 Subject: [PATCH 0118/1201] release 2018.11.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 23 ++++++++++++++++++++++- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index eb8cef8ef..7607e0e03 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.11.03 +[debug] youtube-dl version 2018.11.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 333acee80..bbcb78808 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -296,5 +296,26 @@ title = self._search_regex( ### Use safe conversion functions -Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` diff --git a/ChangeLog b/ChangeLog index 920a4855a..fa5de8b04 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.11.07 Extractors + [youtube] Add another JS signature function name regex (#18091, #18093, diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 90de01214..7f32ad36c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.11.03' +__version__ = '2018.11.07' From cab26223bf480553d67840fc9f46aa9ff89ec29f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 10 Nov 2018 15:22:59 +0700 Subject: [PATCH 0119/1201] [ruutu] Update API endpoint (closes #18138) --- youtube_dl/extractor/ruutu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 9fa8688f8..f530f0083 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor): video_id = self._match_id(url) video_xml = self._download_xml( - 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id) + 'https://gatling.nelonenmedia.fi/media-xml-cache', video_id, + query={'id': video_id}) formats = [] processed_urls = [] From 96a91b15513af2121be1dd93871cc3769c06da3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 10 Nov 2018 23:37:27 +0700 Subject: [PATCH 0120/1201] [vivo] Fix extraction (closes #18139) --- youtube_dl/extractor/shared.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index b2250afdd..931a0f70e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -5,6 +5,7 @@ from ..compat import compat_b64decode from ..utils import ( ExtractorError, int_or_none, + url_or_none, urlencode_postdata, ) @@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE): } def _extract_video_url(self, webpage, video_id, *args): + def decode_url(encoded_url): + return compat_b64decode(encoded_url).decode('utf-8') + + stream_url = url_or_none(decode_url(self._search_regex( + r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'stream url', default=None, group='url'))) + if stream_url: + return stream_url return self._parse_json( self._search_regex( r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'stream', group='url'), - video_id, - transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0] + video_id, transform_source=decode_url)[0] From 83852e57bf2f96ba50418b1a888ae5a1836549cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 11 Nov 2018 00:44:49 +0700 Subject: [PATCH 0121/1201] [zype] Add extractor (closes #18143) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 20 +++++++++++ youtube_dl/extractor/zype.py | 57 ++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 youtube_dl/extractor/zype.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e5488cce4..b2b00c86f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1478,3 +1478,4 @@ from .zattoo import ( ) from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE +from .zype import ZypeIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 545e03371..59cf03faf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -114,6 +114,7 @@ from .apa import APAIE from .foxnews import FoxNewsIE from .viqeo import ViqeoIE from .expressen import ExpressenIE +from .zype import ZypeIE class GenericIE(InfoExtractor): @@ -2070,6 +2071,20 @@ class GenericIE(InfoExtractor): }, 'playlist_count': 6, }, + { + # Zype embed + 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', + 'info_dict': { + 'id': '5b400b834b32992a310622b9', + 'ext': 'mp4', + 'title': 'Smoky Barbecue Favorites', + 'thumbnail': r're:^https?://.*\.jpe?g', + }, + 'add_ie': [ZypeIE.ie_key()], + 'params': { + 'skip_download': True, + }, + }, { # videojs embed 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904', @@ -3129,6 +3144,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key()) + zype_urls = ZypeIE._extract_urls(webpage) + if zype_urls: + return self.playlist_from_matches( + zype_urls, video_id, video_title, ie=ZypeIE.ie_key()) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/youtube_dl/extractor/zype.py b/youtube_dl/extractor/zype.py new file mode 100644 index 000000000..3b16e703b --- /dev/null +++ b/youtube_dl/extractor/zype.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ZypeIE(InfoExtractor): + _VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+' + _TEST = { + 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false', + 'md5': 'eaee31d474c76a955bdaba02a505c595', + 'info_dict': { + 'id': '5b400b834b32992a310622b9', + 'ext': 'mp4', + 'title': 'Smoky Barbecue Favorites', + 'thumbnail': r're:^https?://.*\.jpe?g', + }, + } + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1', + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex( + r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, + 'title', group='value') + + m3u8_url = self._search_regex( + r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage, + 'm3u8 url', group='url') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + thumbnail = self._search_regex( + r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail', + default=False, group='url') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } From f17a24a6df293370b94082c7feb6c447a3e7d8d9 Mon Sep 17 00:00:00 2001 From: Patrick Griffis <tingping@tingping.se> Date: Mon, 21 May 2018 17:02:16 -0400 Subject: [PATCH 0122/1201] [picarto] Use API and add token support This is just more reliable than trying to extract it from the page itself. --- youtube_dl/extractor/picarto.py | 36 +++++++++------------------------ 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index 2366dfb34..27ee9643b 100644 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -8,14 +8,13 @@ from ..compat import compat_str from ..utils import ( ExtractorError, js_to_json, - try_get, update_url_query, urlencode_postdata, ) class PicartoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?' _TEST = { 'url': 'https://picarto.tv/Setz', 'info_dict': { @@ -34,19 +33,11 @@ class PicartoIE(InfoExtractor): def _real_extract(self, url): channel_id = self._match_id(url) - stream_page = self._download_webpage(url, channel_id) + metadata = self._download_json( + 'https://api.picarto.tv/v1/channel/name/' + channel_id, + channel_id) - if '>This channel does not exist' in stream_page: - raise ExtractorError( - 'Channel %s does not exist' % channel_id, expected=True) - - player = self._parse_json( - self._search_regex( - r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page, - 'player settings'), - channel_id, transform_source=js_to_json) - - if player.get('online') is False: + if metadata.get('online') is False: raise ExtractorError('Stream is offline', expected=True) cdn_data = self._download_json( @@ -54,20 +45,13 @@ class PicartoIE(InfoExtractor): data=urlencode_postdata({'loadbalancinginfo': channel_id}), note='Downloading load balancing info') - def get_event(key): - return try_get(player, lambda x: x['event'][key], compat_str) or '' - + token = self._VALID_URL_RE.match(url).group('token') or 'public' params = { - 'token': player.get('token') or '', - 'ticket': get_event('ticket'), 'con': int(time.time() * 1000), - 'type': get_event('ticket'), - 'scope': get_event('scope'), + 'token': token, } prefered_edge = cdn_data.get('preferedEdge') - default_tech = player.get('defaultTech') - formats = [] for edge in cdn_data['edges']: @@ -81,8 +65,6 @@ class PicartoIE(InfoExtractor): preference = 0 if edge_id == prefered_edge: preference += 1 - if tech_type == default_tech: - preference += 1 format_id = [] if edge_id: format_id.append(edge_id) @@ -109,7 +91,7 @@ class PicartoIE(InfoExtractor): continue self._sort_formats(formats) - mature = player.get('mature') + mature = metadata.get('adult') if mature is None: age_limit = None else: @@ -119,7 +101,7 @@ class PicartoIE(InfoExtractor): 'id': channel_id, 'title': self._live_title(channel_id), 'is_live': True, - 'thumbnail': player.get('vodThumb'), + 'thumbnail': metadata.get('thumbnails', {}).get('web'), 'age_limit': age_limit, 'formats': formats, } From 730c0d12a06f349907481570f1f2890251f7a181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 11 Nov 2018 16:08:54 +0700 Subject: [PATCH 0123/1201] [picarto] Extract more metadata (closes #16518) --- youtube_dl/extractor/picarto.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index 27ee9643b..8099ef1d6 100644 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import re import time from .common import InfoExtractor @@ -8,6 +9,7 @@ from ..compat import compat_str from ..utils import ( ExtractorError, js_to_json, + try_get, update_url_query, urlencode_postdata, ) @@ -32,7 +34,9 @@ class PicartoIE(InfoExtractor): return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) def _real_extract(self, url): - channel_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + metadata = self._download_json( 'https://api.picarto.tv/v1/channel/name/' + channel_id, channel_id) @@ -45,7 +49,7 @@ class PicartoIE(InfoExtractor): data=urlencode_postdata({'loadbalancinginfo': channel_id}), note='Downloading load balancing info') - token = self._VALID_URL_RE.match(url).group('token') or 'public' + token = mobj.group('token') or 'public' params = { 'con': int(time.time() * 1000), 'token': token, @@ -99,9 +103,11 @@ class PicartoIE(InfoExtractor): return { 'id': channel_id, - 'title': self._live_title(channel_id), + 'title': self._live_title(metadata.get('title') or channel_id), 'is_live': True, - 'thumbnail': metadata.get('thumbnails', {}).get('web'), + 'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']), + 'channel': channel_id, + 'channel_url': 'https://picarto.tv/%s' % channel_id, 'age_limit': age_limit, 'formats': formats, } From 9b9b3501c5bee18d608dd2961a80936667f8ece2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Nov 2018 22:55:35 +0700 Subject: [PATCH 0124/1201] [tnaflixnetwork:embed] Fix extraction (closes #18205) --- youtube_dl/extractor/tnaflix.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 0c2f8f119..6798ef4c3 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -18,8 +18,9 @@ from ..utils import ( class TNAFlixNetworkBaseIE(InfoExtractor): # May be overridden in descendants if necessary _CONFIG_REGEX = [ - r'flashvars\.config\s*=\s*escape\("([^"]+)"', - r'<input[^>]+name="config\d?" value="([^"]+)"', + r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"', + r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"', + r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1', ] _HOST = 'tna' _VKEY_SUFFIX = '' @@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): webpage = self._download_webpage(url, display_id) cfg_url = self._proto_relative_url(self._html_search_regex( - self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:') + self._CONFIG_REGEX, webpage, 'flashvars.config', default=None, + group='url'), 'http:') if not cfg_url: inputs = self._hidden_inputs(webpage) From 2599956c9ff0162e7afbddebb00d73eea6b0403c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Nov 2018 00:07:59 +0700 Subject: [PATCH 0125/1201] [rte] Add support for new API endpoint (closes #18206) --- youtube_dl/extractor/rte.py | 129 +++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index a6fac6c35..1fbc72915 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -8,7 +8,10 @@ from ..compat import compat_HTTPError from ..utils import ( float_or_none, parse_iso8601, + str_or_none, + try_get, unescapeHTML, + url_or_none, ExtractorError, ) @@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) - try: - json_string = self._download_json( - 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id, - item_id) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) - if error_info: - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_info['message']), - expected=True) - raise - - # NB the string values in the JSON are stored using XML escaping(!) - show = json_string['shows'][0] - title = unescapeHTML(show['title']) - description = unescapeHTML(show.get('description')) - thumbnail = show.get('thumbnail') - duration = float_or_none(show.get('duration'), 1000) - timestamp = parse_iso8601(show.get('published')) - - mg = show['media:group'][0] - + info_dict = {} formats = [] - if mg.get('url'): - m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) - if m: - m = m.groupdict() - formats.append({ - 'url': m['url'] + '/' + m['app'], - 'app': m['app'], - 'play_path': m['playpath'], - 'player_url': url, - 'ext': 'flv', - 'format_id': 'rtmp', - }) + ENDPOINTS = ( + 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', + ) - if mg.get('hls_server') and mg.get('hls_url'): - formats.extend(self._extract_m3u8_formats( - mg['hls_server'] + mg['hls_url'], item_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + for num, ep_url in enumerate(ENDPOINTS, start=1): + try: + data = self._download_json(ep_url + item_id, item_id) + except ExtractorError as ee: + if num < len(ENDPOINTS) or formats: + continue + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) + if error_info: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_info['message']), + expected=True) + raise - if mg.get('hds_server') and mg.get('hds_url'): - formats.extend(self._extract_f4m_formats( - mg['hds_server'] + mg['hds_url'], item_id, - f4m_id='hds', fatal=False)) + # NB the string values in the JSON are stored using XML escaping(!) + show = try_get(data, lambda x: x['shows'][0], dict) + if not show: + continue + + if not info_dict: + title = unescapeHTML(show['title']) + description = unescapeHTML(show.get('description')) + thumbnail = show.get('thumbnail') + duration = float_or_none(show.get('duration'), 1000) + timestamp = parse_iso8601(show.get('published')) + info_dict = { + 'id': item_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + } + + mg = try_get(show, lambda x: x['media:group'][0], dict) + if not mg: + continue + + if mg.get('url'): + m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) + if m: + m = m.groupdict() + formats.append({ + 'url': m['url'] + '/' + m['app'], + 'app': m['app'], + 'play_path': m['playpath'], + 'player_url': url, + 'ext': 'flv', + 'format_id': 'rtmp', + }) + + if mg.get('hls_server') and mg.get('hls_url'): + formats.extend(self._extract_m3u8_formats( + mg['hls_server'] + mg['hls_url'], item_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + if mg.get('hds_server') and mg.get('hds_url'): + formats.extend(self._extract_f4m_formats( + mg['hds_server'] + mg['hds_url'], item_id, + f4m_id='hds', fatal=False)) + + mg_rte_server = str_or_none(mg.get('rte:server')) + mg_url = str_or_none(mg.get('url')) + if mg_rte_server and mg_url: + hds_url = url_or_none(mg_rte_server + mg_url) + if hds_url: + formats.extend(self._extract_f4m_formats( + hds_url, item_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) - return { - 'id': item_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } + info_dict['formats'] = formats + return info_dict class RteIE(RteBaseIE): From 0919cd4d011d0545a6afadfab0b71de8c0a4fe02 Mon Sep 17 00:00:00 2001 From: NeroBurner <pyro4hell@gmail.com> Date: Fri, 16 Nov 2018 18:18:50 +0100 Subject: [PATCH 0126/1201] [atvat] Fix extraction (closes #18041) --- youtube_dl/extractor/atvat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py index 1584d53fc..95e572d70 100644 --- a/youtube_dl/extractor/atvat.py +++ b/youtube_dl/extractor/atvat.py @@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_data = self._parse_json(unescapeHTML(self._search_regex( - r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', - webpage, 'player data')), display_id)['config']['initial_video'] + [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1', + r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'], + webpage, 'player data', group='json')), + display_id)['config']['initial_video'] video_id = video_data['id'] video_title = video_data['title'] From d0058c76d5e14ffe89e8265fa3d984e28e922d78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Nov 2018 16:59:20 +0700 Subject: [PATCH 0127/1201] [openload] Use original host during extraction (closes #18211) --- youtube_dl/extractor/openload.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 2473536fd..cf51e4770 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,18 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'''(?x) + https?:// + (?P<host> + (?:www\.)? + (?: + openload\.(?:co|io|link)| + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun) + ) + )/ + (?:f|embed)/ + (?P<id>[a-zA-Z0-9-_]+) + ''' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -334,8 +345,11 @@ class OpenloadIE(InfoExtractor): webpage) def _real_extract(self, url): - video_id = self._match_id(url) - url_pattern = 'https://openload.co/%%s/%s/' % video_id + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') + + url_pattern = 'https://%s/%%s/%s/' % (host, video_id) headers = { 'User-Agent': self._USER_AGENT, } @@ -368,7 +382,7 @@ class OpenloadIE(InfoExtractor): r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, 'stream URL')) - video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id + video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, @@ -379,7 +393,7 @@ class OpenloadIE(InfoExtractor): entry = entries[0] if entries else {} subtitles = entry.get('subtitles') - info_dict = { + return { 'id': video_id, 'title': title, 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), @@ -388,4 +402,3 @@ class OpenloadIE(InfoExtractor): 'subtitles': subtitles, 'http_headers': headers, } - return info_dict From a640c4d226e7b790fe8db43f1c5bdf2358caf839 Mon Sep 17 00:00:00 2001 From: aviperes <avipr24@gmail.com> Date: Sat, 17 Nov 2018 15:59:13 +0200 Subject: [PATCH 0128/1201] [vk] Detect geo restriction --- youtube_dl/extractor/vk.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ef8b9bcb7..b52d15ac6 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -293,8 +293,12 @@ class VKIE(VKBaseIE): # This video is no longer available, because its author has been blocked. 'url': 'https://vk.com/video-10639516_456240611', 'only_matching': True, - } - ] + }, + { + # The video is not available in your region. + 'url': 'https://vk.com/video-51812607_171445436', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -354,6 +358,9 @@ class VKIE(VKBaseIE): r'<!>This video is no longer available, because it has been deleted.': 'Video %s is no longer available, because it has been deleted.', + + r'<!>The video .+? is not available in your region.': + 'Video %s is not available in your region.', } for error_re, error_msg in ERRORS.items(): From 11d19ff50393cd195af884c6865aff6d89ed66ac Mon Sep 17 00:00:00 2001 From: mttronc <mrtn.mtth@gmx.de> Date: Thu, 6 Sep 2018 15:41:07 +0200 Subject: [PATCH 0129/1201] [wwe] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wwe.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/wwe.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b2b00c86f..87c7d8b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1386,6 +1386,7 @@ from .wsj import ( WSJIE, WSJArticleIE, ) +from .wwe import WWEIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py new file mode 100644 index 000000000..c471a79f5 --- /dev/null +++ b/youtube_dl/extractor/wwe.py @@ -0,0 +1,56 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import urljoin + + +class WWEIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?wwe.com/(?:.*/)?videos/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018', + 'md5': '30cbc824b51f4010ea885bfcaec76972', + 'info_dict': { + 'id': '40048199', + 'ext': 'mp4', + 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018', + 'description': 'Still fuming after he and his wife Brie Bella were attacked by The Miz and Maryse last week, Daniel Bryan takes care of some unfinished business with Andrade "Cien" Almas.', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + drupal_settings = self._parse_json( + self._html_search_regex( + r'(?s)Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), + display_id) + + player = drupal_settings['WWEVideoLanding']['initialVideo'] + metadata = player['playlist'][0] + + id = compat_str(metadata['nid']) + title = metadata.get('title') or self._og_search_title(webpage) + video_url = 'https:' + metadata['file'] + thumbnail = None + if metadata.get('image') is not None: + thumbnail = urljoin(url, metadata.get('image')) + description = metadata.get('description') + + formats = self._extract_m3u8_formats(video_url, id, 'mp4') + + return { + 'id': id, + 'title': title, + 'formats': formats, + 'url': video_url, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'description': description, + } From 006374e3aebc3be3f20b7e812c987cdf15b3ae35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Nov 2018 23:59:20 +0700 Subject: [PATCH 0130/1201] [wwe] Fix issues, extract subtitles and add support for playlists (closes #14781, closes #17450) --- youtube_dl/extractor/wwe.py | 138 +++++++++++++++++++++++++++++------- 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py index c471a79f5..bebc77bb5 100644 --- a/youtube_dl/extractor/wwe.py +++ b/youtube_dl/extractor/wwe.py @@ -1,20 +1,75 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str -from ..utils import urljoin +from ..utils import ( + try_get, + unescapeHTML, + url_or_none, + urljoin, +) -class WWEIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?wwe.com/(?:.*/)?videos/(?P<id>[\w-]+)' +class WWEBaseIE(InfoExtractor): + _SUBTITLE_LANGS = { + 'English': 'en', + 'Deutsch': 'de', + } + + def _extract_entry(self, data, url, video_id=None): + video_id = compat_str(video_id or data['nid']) + title = data['title'] + + formats = self._extract_m3u8_formats( + data['file'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + description = data.get('description') + thumbnail = urljoin(url, data.get('image')) + series = data.get('show_name') + episode = data.get('episode_name') + + subtitles = {} + tracks = data.get('tracks') + if isinstance(tracks, list): + for track in tracks: + if not isinstance(track, dict): + continue + if track.get('kind') != 'captions': + continue + track_file = url_or_none(track.get('file')) + if not track_file: + continue + label = track.get('label') + lang = self._SUBTITLE_LANGS.get(label, label) or 'en' + subtitles.setdefault(lang, []).append({ + 'url': track_file, + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'series': series, + 'episode': episode, + 'formats': formats, + 'subtitles': subtitles, + } + + +class WWEIE(WWEBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018', - 'md5': '30cbc824b51f4010ea885bfcaec76972', + 'md5': '92811c6a14bfc206f7a6a9c5d9140184', 'info_dict': { 'id': '40048199', 'ext': 'mp4', 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018', - 'description': 'Still fuming after he and his wife Brie Bella were attacked by The Miz and Maryse last week, Daniel Bryan takes care of some unfinished business with Andrade "Cien" Almas.', + 'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67', 'thumbnail': r're:^https?://.*\.jpg$', } }, { @@ -26,31 +81,60 @@ class WWEIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json( + landing = self._parse_json( self._html_search_regex( - r'(?s)Drupal\.settings\s*,\s*({.+?})\);', + r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;', webpage, 'drupal settings'), - display_id) + display_id)['WWEVideoLanding'] - player = drupal_settings['WWEVideoLanding']['initialVideo'] - metadata = player['playlist'][0] + data = landing['initialVideo']['playlist'][0] + video_id = landing.get('initialVideoId') - id = compat_str(metadata['nid']) - title = metadata.get('title') or self._og_search_title(webpage) - video_url = 'https:' + metadata['file'] - thumbnail = None - if metadata.get('image') is not None: - thumbnail = urljoin(url, metadata.get('image')) - description = metadata.get('description') + info = self._extract_entry(data, url, video_id) + info['display_id'] = display_id + return info - formats = self._extract_m3u8_formats(video_url, id, 'mp4') - return { - 'id': id, - 'title': title, - 'formats': formats, - 'url': video_url, - 'display_id': display_id, - 'thumbnail': thumbnail, - 'description': description, - } +class WWEPlaylistIE(WWEBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.wwe.com/shows/raw/2018-11-12', + 'info_dict': { + 'id': '2018-11-12', + }, + 'playlist_mincount': 11, + }, { + 'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition', + 'only_matching': True, + }, { + 'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer( + r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage): + video = self._parse_json( + mobj.group('data'), display_id, transform_source=unescapeHTML, + fatal=False) + if not video: + continue + data = try_get(video, lambda x: x['playlist'][0], dict) + if not data: + continue + try: + entry = self._extract_entry(data, url) + except Exception: + continue + entry['extractor_key'] = WWEIE.ie_key() + entries.append(entry) + + return self.playlist_result(entries, display_id) From 02df855e1339942c00f365dc59a0e418abb4b26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Nov 2018 00:07:40 +0700 Subject: [PATCH 0131/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index fa5de8b04..15daa1bec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +version <unreleased> + +Extractors ++ [wwe] Extract subtitles ++ [wwe] Add support for playlistst (#14781) ++ [wwe] Add support for wwe.com (#14781, #17450) +* [vk] Detect geo restriction (#17767) +* [openload] Use original host during extraction (#18211) +* [atvat] Fix extraction (#18041) ++ [rte] Add support for new API endpoint (#18206) +* [tnaflixnetwork:embed] Fix extraction (#18205) +* [picarto] Use API and add token support (#16518) ++ [zype] Add support for player.zype.com (#18143) +* [vivo] Fix extraction (#18139) +* [ruutu] Update API endpoint (#18138) + + version 2018.11.07 Extractors From 5bb04792696c41f66f2114b0cc05b01135fa1f68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Nov 2018 00:11:54 +0700 Subject: [PATCH 0132/1201] release 2018.11.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7607e0e03..905576364 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.11.07 +[debug] youtube-dl version 2018.11.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 15daa1bec..0083c4631 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.11.18 Extractors + [wwe] Extract subtitles diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 24c3254c3..9009f7e9e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1080,6 +1080,7 @@ - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal - **WSJArticle** + - **WWE** - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me @@ -1139,3 +1140,4 @@ - **ZDF** - **ZDFChannel** - **zingmp3**: mp3.zing.vn + - **Zype** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7f32ad36c..7f5ad7bf4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.11.07' +__version__ = '2018.11.18' From 4167148fa45e43a93ed202e5923223b7797340ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Nov 2018 01:07:54 +0700 Subject: [PATCH 0133/1201] [nova:embed] Fix extraction (closes #18222) --- youtube_dl/extractor/nova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 80186ec50..901f44b54 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor): bitrates = self._parse_json( self._search_regex( - r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'), + r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), video_id, transform_source=js_to_json) QUALITIES = ('lq', 'mq', 'hq', 'hd') From 1febf99da1924c46a491790639371f4ee9069193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Nov 2018 06:26:08 +0700 Subject: [PATCH 0134/1201] [pornhub] Add pornhub.net alias --- youtube_dl/extractor/pornhub.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 19eaf389f..7ee64dbf6 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) @@ -340,7 +340,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/playlist/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.pornhub.com/playlist/4667351', 'info_dict': { @@ -355,7 +355,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' + _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' _TESTS = [{ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { @@ -393,6 +393,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', + 'only_matching': True, }] def _real_extract(self, url): From f97c099131f625104f64a99a02a8c9894620171a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Nov 2018 11:14:46 +0700 Subject: [PATCH 0135/1201] [pornhub] Move test to correct place --- youtube_dl/extractor/pornhub.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7ee64dbf6..c9c884095 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -121,6 +121,9 @@ class PornHubIE(InfoExtractor): }, { 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', + 'only_matching': True, }] @staticmethod @@ -393,9 +396,6 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', - 'only_matching': True, }] def _real_extract(self, url): From 964b989dc88c37b027481fb01de835b1e796ba5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Nov 2018 20:44:51 +0700 Subject: [PATCH 0136/1201] [americastestkitchen] Add support for zype embeds (closes #18225) --- youtube_dl/extractor/americastestkitchen.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py index 01736872d..8b32aa886 100644 --- a/youtube_dl/extractor/americastestkitchen.py +++ b/youtube_dl/extractor/americastestkitchen.py @@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - partner_id = self._search_regex( - r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', - webpage, 'kaltura partner id') - video_data = self._parse_json( self._search_regex( r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>', @@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor): (lambda x: x['episodeDetail']['content']['data'], lambda x: x['videoDetail']['content']['data']), dict) ep_meta = ep_data.get('full_video', {}) - external_id = ep_data.get('external_id') or ep_meta['external_id'] + + zype_id = ep_meta.get('zype_id') + if zype_id: + embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id + ie_key = 'Zype' + else: + partner_id = self._search_regex( + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id') + external_id = ep_data.get('external_id') or ep_meta['external_id'] + embed_url = 'kaltura:%s:%s' % (partner_id, external_id) + ie_key = 'Kaltura' title = ep_data.get('title') or ep_meta.get('title') description = clean_html(ep_meta.get('episode_description') or ep_data.get( @@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, external_id), - 'ie_key': 'Kaltura', + 'url': embed_url, + 'ie_key': ie_key, 'title': title, 'description': description, 'thumbnail': thumbnail, From 9b27a78a881bceb9d62f3364399d7572e8e2be24 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 18 Nov 2018 16:13:46 +0100 Subject: [PATCH 0137/1201] [kaltura] limit requested MediaEntry fields --- youtube_dl/extractor/kaltura.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 04f68fce4..fdf7f5bbc 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor): 'entryId': video_id, 'service': 'baseentry', 'ks': '{1:result:ks}', + 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', + 'responseProfile:type': 1, }, { 'action': 'getbyentryid', From 8578ea4dcb17834ee3843e0e337c15af706f9803 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 18 Nov 2018 16:15:10 +0100 Subject: [PATCH 0138/1201] [bitchute] use _html_search_regex for title extraction --- youtube_dl/extractor/bitchute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 446a1ab19..43b4732aa 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -37,7 +37,7 @@ class BitChuteIE(InfoExtractor): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', }) - title = self._search_regex( + title = self._html_search_regex( (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), webpage, 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', From 2e1280ed432257244ea52a47efe6a7f0e226b897 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 19 Nov 2018 18:15:51 +0100 Subject: [PATCH 0139/1201] [sixplay] fix format extraction --- youtube_dl/extractor/sixplay.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 207ab4477..0c4f865ef 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -64,7 +64,7 @@ class SixPlayIE(InfoExtractor): for asset in clip_data['assets']: asset_url = asset.get('full_physical_path') protocol = asset.get('protocol') - if not asset_url or protocol == 'primetime' or asset_url in urls: + if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls: continue urls.append(asset_url) container = asset.get('video_container') @@ -81,19 +81,17 @@ class SixPlayIE(InfoExtractor): if not urlh: continue asset_url = urlh.geturl() - asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url) - formats.extend(self._extract_m3u8_formats( - asset_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - asset_url.replace('.m3u8', '.f4m'), - video_id, f4m_id='hds', fatal=False)) - formats.extend(self._extract_mpd_formats( - asset_url.replace('.m3u8', '.mpd'), - video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_ism_formats( - re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url), - video_id, ism_id='mss', fatal=False)) + for i in range(3, 0, -1): + asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i) + m3u8_formats = self._extract_m3u8_formats( + asset_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + formats.extend(m3u8_formats) + formats.extend(self._extract_mpd_formats( + asset_url.replace('.m3u8', '.mpd'), + video_id, mpd_id='dash', fatal=False)) + if m3u8_formats: + break else: formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', 'm3u8_native', From 15ed5a27840e748d9f786c50b78a4c6326e9f186 Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Tue, 20 Nov 2018 20:50:40 +0100 Subject: [PATCH 0140/1201] [nzz] Relax kaltura regex --- youtube_dl/extractor/nzz.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py index 2d352f53f..61ee77adb 100644 --- a/youtube_dl/extractor/nzz.py +++ b/youtube_dl/extractor/nzz.py @@ -11,20 +11,27 @@ from ..utils import ( class NZZIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', 'info_dict': { 'id': '9153', }, 'playlist_mincount': 6, - } + }, { + 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112', + 'info_dict': { + 'id': '1368112', + }, + 'playlist_count': 1, + }] def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) entries = [] - for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage): + for player_element in re.findall( + r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage): player_params = extract_attributes(player_element) if player_params.get('data-type') not in ('kaltura_singleArticle',): self.report_warning('Unsupported player type') From 05bd5e9c77e0e8acb95f47396be4c970fc9f39c4 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank <austindcc@gmail.com> Date: Fri, 26 Oct 2018 19:15:44 -0700 Subject: [PATCH 0141/1201] [ciscolive] Add extractor --- youtube_dl/extractor/ciscolive.py | 136 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 137 insertions(+) create mode 100644 youtube_dl/extractor/ciscolive.py diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py new file mode 100644 index 000000000..2db7aad2c --- /dev/null +++ b/youtube_dl/extractor/ciscolive.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, + compat_parse_qs +) +from ..utils import ( + clean_html, + int_or_none, + try_get, + urlencode_postdata, +) + + +class CiscoLiveIE(InfoExtractor): + IE_NAME = 'ciscolive' + _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P<query>[^#]+)#/(?:session/(?P<id>.+))?$' + _TESTS = [ + { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', + 'timestamp': 1530305395, + 'uploader_id': '5647924234001', + 'upload_date': '20180629', + 'location': '16B Mezz.', + }, + }, + { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'md5': '993d4cf051f6174059328b1dce8e94bd', + 'info_dict': { + 'upload_date': '20180629', + 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', + 'timestamp': 1530316421, + 'uploader_id': '5647924234001', + 'id': '5803751616001', + 'description': 'md5:5f144575cd6848117fe2f756855b038b', + 'location': 'WoS, DevNet Theater', + 'ext': 'mp4', + }, + }, + { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'md5': '80e0c3b87e373fe3a3316b934b8915bf', + 'info_dict': { + 'upload_date': '20180629', + 'title': 'Beating the CCIE Routing & Switching', + 'timestamp': 1530311842, + 'uploader_id': '5647924234001', + 'id': '5803735679001', + 'description': 'md5:e71970799e92d7f5ff57ae23f64b0929', + 'location': 'Tulúm 02', + 'ext': 'mp4', + }, + } + ] + + # These appear to be constant across all Cisco Live presentations + # and are not tied to any user session or event + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + + def _parse_rf_item(self, rf_item): + ''' Parses metadata and passes to Brightcove extractor ''' + event_name = rf_item.get('eventName') + title = rf_item['title'] + description = clean_html(rf_item.get('abstract')) + presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) + bc_id = rf_item['videos'][0]['url'] + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id + duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) + location = try_get(rf_item, lambda x: x['times'][0]['room']) + + if duration: + duration = duration * 60 + + return { + '_type': 'url_transparent', + 'creator': presenter_name, + 'description': description, + 'duration': duration, + 'ie_key': 'BrightcoveNew', + 'location': location, + 'series': event_name, + 'title': title, + 'url': bc_url, + } + + def _check_bc_id_exists(self, rf_item): + ''' Checks for the existence of a Brightcove URL in an API result ''' + bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) + if bc_id: + if bc_id.strip().isdigit(): + return rf_item + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + HEADERS = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, + 'rfWidgetId': self.RAINFOCUS_WIDGETID, + 'Referer': url, + } + # Single session URL (single video) + if mobj.group('id'): + rf_id = mobj.group('id') + request = self.RAINFOCUS_API_URL % 'session' + data = urlencode_postdata({'id': rf_id}) + rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS) + rf_item = self._check_bc_id_exists(rf_result['items'][0]) + return self._parse_rf_item(rf_item) + else: + # Filter query URL (multiple videos) + rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query)) + rf_query['type'] = 'session' + rf_query['size'] = 1000 + data = urlencode_postdata(rf_query) + request = self.RAINFOCUS_API_URL % 'search' + rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS) + entries = [ + self._parse_rf_item(rf_item) + for rf_item + in rf_results['sectionList'][0]['items'] + if self._check_bc_id_exists(rf_item) + ] + return self.playlist_result(entries, 'Filter query') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 87c7d8b0c..2c5988a14 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,6 +194,7 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE +from .ciscolive import CiscoLiveIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From 6a6d7f064178427d28986884524bd3434f0ca957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 21 Nov 2018 05:25:43 +0700 Subject: [PATCH 0142/1201] [ciscolive] Fix issues and improve extraction (closes #17984) --- youtube_dl/extractor/ciscolive.py | 176 ++++++++++++++--------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 87 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 2db7aad2c..32f645713 100644 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -1,84 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import re from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urllib_parse_urlparse, - compat_parse_qs ) from ..utils import ( clean_html, + float_or_none, int_or_none, try_get, urlencode_postdata, ) -class CiscoLiveIE(InfoExtractor): - IE_NAME = 'ciscolive' - _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P<query>[^#]+)#/(?:session/(?P<id>.+))?$' - _TESTS = [ - { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', - 'md5': 'c98acf395ed9c9f766941c70f5352e22', - 'info_dict': { - 'id': '5803694304001', - 'ext': 'mp4', - 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', - 'description': 'md5:ec4a436019e09a918dec17714803f7cc', - 'timestamp': 1530305395, - 'uploader_id': '5647924234001', - 'upload_date': '20180629', - 'location': '16B Mezz.', - }, - }, - { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', - 'md5': '993d4cf051f6174059328b1dce8e94bd', - 'info_dict': { - 'upload_date': '20180629', - 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', - 'timestamp': 1530316421, - 'uploader_id': '5647924234001', - 'id': '5803751616001', - 'description': 'md5:5f144575cd6848117fe2f756855b038b', - 'location': 'WoS, DevNet Theater', - 'ext': 'mp4', - }, - }, - { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', - 'md5': '80e0c3b87e373fe3a3316b934b8915bf', - 'info_dict': { - 'upload_date': '20180629', - 'title': 'Beating the CCIE Routing & Switching', - 'timestamp': 1530311842, - 'uploader_id': '5647924234001', - 'id': '5803735679001', - 'description': 'md5:e71970799e92d7f5ff57ae23f64b0929', - 'location': 'Tulúm 02', - 'ext': 'mp4', - }, - } - ] - +class CiscoLiveBaseIE(InfoExtractor): # These appear to be constant across all Cisco Live presentations # and are not tied to any user session or event RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' - RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' - RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + HEADERS = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID, + 'rfWidgetId': RAINFOCUS_WIDGET_ID, + } + + def _call_api(self, ep, rf_id, query, referrer): + headers = self.HEADERS.copy() + headers['Referer'] = referrer + return self._download_json( + self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query), + headers=headers) + def _parse_rf_item(self, rf_item): - ''' Parses metadata and passes to Brightcove extractor ''' event_name = rf_item.get('eventName') title = rf_item['title'] description = clean_html(rf_item.get('abstract')) presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) bc_id = rf_item['videos'][0]['url'] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id - duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) + duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) location = try_get(rf_item, lambda x: x['times'][0]['room']) if duration: @@ -86,51 +51,76 @@ class CiscoLiveIE(InfoExtractor): return { '_type': 'url_transparent', - 'creator': presenter_name, + 'url': bc_url, + 'ie_key': 'BrightcoveNew', + 'title': title, 'description': description, 'duration': duration, - 'ie_key': 'BrightcoveNew', + 'creator': presenter_name, 'location': location, 'series': event_name, - 'title': title, - 'url': bc_url, } - def _check_bc_id_exists(self, rf_item): - ''' Checks for the existence of a Brightcove URL in an API result ''' - bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) - if bc_id: - if bc_id.strip().isdigit(): - return rf_item + +class CiscoLiveSessionIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)' + _TEST = { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', + 'timestamp': 1530305395, + 'upload_date': '20180629', + 'uploader_id': '5647924234001', + 'location': '16B Mezz.', + }, + 'params': { + 'proxy': '127.0.0.1:8118', + } + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - HEADERS = { - 'Origin': 'https://ciscolive.cisco.com', - 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, - 'rfWidgetId': self.RAINFOCUS_WIDGETID, - 'Referer': url, + rf_id = self._match_id(url) + rf_result = self._call_api('session', rf_id, {'id': rf_id}, url) + return self._parse_rf_item(rf_result['items'][0]) + + +class CiscoLiveSearchIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'info_dict': { + 'title': 'Filter query', + }, + 'playlist_count': 5, + 'params': { + 'proxy': '127.0.0.1:8118', } - # Single session URL (single video) - if mobj.group('id'): - rf_id = mobj.group('id') - request = self.RAINFOCUS_API_URL % 'session' - data = urlencode_postdata({'id': rf_id}) - rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS) - rf_item = self._check_bc_id_exists(rf_result['items'][0]) - return self._parse_rf_item(rf_item) - else: - # Filter query URL (multiple videos) - rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query)) - rf_query['type'] = 'session' - rf_query['size'] = 1000 - data = urlencode_postdata(rf_query) - request = self.RAINFOCUS_API_URL % 'search' - rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS) - entries = [ - self._parse_rf_item(rf_item) - for rf_item - in rf_results['sectionList'][0]['items'] - if self._check_bc_id_exists(rf_item) - ] - return self.playlist_result(entries, 'Filter query') + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + + @staticmethod + def _check_bc_id_exists(rf_item): + return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + + def _real_extract(self, url): + rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + rf_query['type'] = 'session' + rf_query['size'] = 1000 + rf_results = self._call_api('search', None, rf_query, url) + entries = [ + self._parse_rf_item(rf_item) + for rf_item + in rf_results['sectionList'][0]['items'] + if self._check_bc_id_exists(rf_item) + ] + return self.playlist_result(entries, playlist_title='Filter query') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2c5988a14..60e6175b1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,7 +194,10 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE -from .ciscolive import CiscoLiveIE +from .ciscolive import ( + CiscoLiveSessionIE, + CiscoLiveSearchIE, +) from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From 183417a50fd68c0c63b1d0621c6a0b44fbf2ac52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 21 Nov 2018 06:04:34 +0700 Subject: [PATCH 0143/1201] [ciscolive:search] Add support for pagination --- youtube_dl/extractor/ciscolive.py | 58 ++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 32f645713..c99b6ee58 100644 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools + from .common import InfoExtractor from ..compat import ( compat_parse_qs, @@ -29,12 +31,12 @@ class CiscoLiveBaseIE(InfoExtractor): 'rfWidgetId': RAINFOCUS_WIDGET_ID, } - def _call_api(self, ep, rf_id, query, referrer): + def _call_api(self, ep, rf_id, query, referrer, note=None): headers = self.HEADERS.copy() headers['Referer'] = referrer return self._download_json( - self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query), - headers=headers) + self.RAINFOCUS_API_URL % ep, rf_id, note=note, + data=urlencode_postdata(query), headers=headers) def _parse_rf_item(self, rf_item): event_name = rf_item.get('eventName') @@ -77,9 +79,6 @@ class CiscoLiveSessionIE(CiscoLiveBaseIE): 'uploader_id': '5647924234001', 'location': '16B Mezz.', }, - 'params': { - 'proxy': '127.0.0.1:8118', - } } def _real_extract(self, url): @@ -93,12 +92,9 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', 'info_dict': { - 'title': 'Filter query', + 'title': 'Search query', }, 'playlist_count': 5, - 'params': { - 'proxy': '127.0.0.1:8118', - } }, { 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', 'only_matching': True, @@ -112,15 +108,35 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): def _check_bc_id_exists(rf_item): return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + def _entries(self, query, url): + query['size'] = 50 + query['from'] = 0 + for page_num in itertools.count(1): + results = self._call_api( + 'search', None, query, url, + 'Downloading search JSON page %d' % page_num) + sl = try_get(results, lambda x: x['sectionList'][0], dict) + if sl: + results = sl + items = results.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + if not self._check_bc_id_exists(item): + continue + yield self._parse_rf_item(item) + size = int_or_none(results.get('size')) + if size is not None: + query['size'] = size + total = int_or_none(results.get('total')) + if total is not None and query['from'] + query['size'] > total: + break + query['from'] += query['size'] + def _real_extract(self, url): - rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - rf_query['type'] = 'session' - rf_query['size'] = 1000 - rf_results = self._call_api('search', None, rf_query, url) - entries = [ - self._parse_rf_item(rf_item) - for rf_item - in rf_results['sectionList'][0]['items'] - if self._check_bc_id_exists(rf_item) - ] - return self.playlist_result(entries, playlist_title='Filter query') + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query['type'] = 'session' + return self.playlist_result( + self._entries(query, url), playlist_title='Search query') From 6c882aa8991383e1c39a6457cbde5dcab260bff5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 09:44:10 +0100 Subject: [PATCH 0144/1201] [loc] relax _VALID_URL regex and improve formats extraction --- youtube_dl/extractor/libraryofcongress.py | 37 +++++++++++++++-------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 40295a30b..1e5c82c66 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -16,7 +16,7 @@ from ..utils import ( class LibraryOfCongressIE(InfoExtractor): IE_NAME = 'loc' IE_DESC = 'Library of Congress' - _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)' _TESTS = [{ # embedded via <div class="media-player" 'url': 'http://loc.gov/item/90716351/', @@ -57,6 +57,12 @@ class LibraryOfCongressIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.loc.gov/item/ihas.200197114/', + 'only_matching': True, + }, { + 'url': 'https://www.loc.gov/item/afc1981005_afs20503/', + 'only_matching': True, }] def _real_extract(self, url): @@ -67,12 +73,13 @@ class LibraryOfCongressIE(InfoExtractor): (r'id=(["\'])media-player-(?P<id>.+?)\1', r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1', r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1', - r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'), + r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1', + r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'), webpage, 'media id', group='id') data = self._download_json( 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, - video_id)['mediaObject'] + media_id)['mediaObject'] derivative = data['derivatives'][0] media_url = derivative['derivativeUrl'] @@ -89,25 +96,29 @@ class LibraryOfCongressIE(InfoExtractor): if ext not in ('mp4', 'mp3'): media_url += '.mp4' if is_video else '.mp3' - if 'vod/mp4:' in media_url: - formats = [{ - 'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8', + formats = [] + if '/vod/mp4:' in media_url: + formats.append({ + 'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8', 'format_id': 'hls', 'ext': 'mp4', 'protocol': 'm3u8_native', 'quality': 1, - }] - elif 'vod/mp3:' in media_url: - formats = [{ - 'url': media_url.replace('vod/mp3:', ''), - 'vcodec': 'none', - }] + }) + http_format = { + 'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url), + 'format_id': 'http', + 'quality': 1, + } + if not is_video: + http_format['vcodec'] = 'none' + formats.append(http_format) download_urls = set() for m in re.finditer( r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage): format_id = m.group('id').lower() - if format_id == 'gif': + if format_id in ('gif', 'jpeg'): continue download_url = m.group('url') if download_url in download_urls: From 35328915b5fe5c8915b924cfbc54bbdd6d6d1430 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 09:46:13 +0100 Subject: [PATCH 0145/1201] [foxsports] fix extraction(closes #17543) --- youtube_dl/extractor/foxsports.py | 17 +++-------------- youtube_dl/extractor/theplatform.py | 5 +++-- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index 985542727..596fded20 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -8,7 +8,7 @@ from ..utils import ( class FoxSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)' _TEST = { 'url': 'http://www.foxsports.com/tennessee/video/432609859715', @@ -28,16 +28,5 @@ class FoxSportsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - config = self._parse_json( - self._html_search_regex( - r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""", - webpage, 'data player config'), - video_id) - - return self.url_result(smuggle_url(update_url_query( - config['releaseURL'], { - 'mbr': 'true', - 'switch': 'http', - }), {'force_smil_url': True})) + return self.url_result( + 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed') diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 181620615..90b351cbb 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -343,7 +343,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None): real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) entry = self._download_json(real_url, video_id)['entries'][0] - main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None + main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl') formats = [] subtitles = {} @@ -356,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration')) - for asset_type in item['plfile$assetTypes']: + file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes'] + for asset_type in file_asset_types: if asset_type in asset_types: continue asset_types.append(asset_type) From 4e33e0792a3e134b494bd71f257a674294cca8d9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 12:00:50 +0100 Subject: [PATCH 0146/1201] [loc] update test --- youtube_dl/extractor/libraryofcongress.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 1e5c82c66..03f205144 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -20,12 +20,11 @@ class LibraryOfCongressIE(InfoExtractor): _TESTS = [{ # embedded via <div class="media-player" 'url': 'http://loc.gov/item/90716351/', - 'md5': '353917ff7f0255aa6d4b80a034833de8', + 'md5': '6ec0ae8f07f86731b1b2ff70f046210a', 'info_dict': { 'id': '90716351', 'ext': 'mp4', 'title': "Pa's trip to Mars", - 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 0, 'view_count': int, }, From 6866f2449437eeb0ad93b80e5bf39cf758af7a26 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 12:08:46 +0100 Subject: [PATCH 0147/1201] [foxsports] update test --- youtube_dl/extractor/foxsports.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index 596fded20..2b2cb6c6f 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -1,10 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - smuggle_url, - update_url_query, -) class FoxSportsIE(InfoExtractor): @@ -14,14 +10,19 @@ class FoxSportsIE(InfoExtractor): 'url': 'http://www.foxsports.com/tennessee/video/432609859715', 'md5': 'b49050e955bebe32c301972e4012ac17', 'info_dict': { - 'id': 'bwduI3X_TgUB', + 'id': '432609859715', 'ext': 'mp4', 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', 'description': 'Courtney Lee talks about Memphis being focused.', - 'upload_date': '20150423', - 'timestamp': 1429761109, + # TODO: fix timestamp + 'upload_date': '19700101', # '20150423', + # 'timestamp': 1429761109, 'uploader': 'NEWA-FNG-FOXSPORTS', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['ThePlatform'], } From a843464a7e0608b679651f913cbd9447a7b928c0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 12:10:06 +0100 Subject: [PATCH 0148/1201] [nbc] fix NBCNews article extraction(closes #16194) --- youtube_dl/extractor/nbc.py | 91 ++++++++----------------------------- 1 file changed, 19 insertions(+), 72 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 765c46fd2..3282f84ee 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,10 +9,8 @@ from .theplatform import ThePlatformIE from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( - find_xpath_attr, smuggle_url, try_get, - unescapeHTML, update_url_query, int_or_none, ) @@ -269,27 +267,14 @@ class CSNNEIE(InfoExtractor): class NBCNewsIE(ThePlatformIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ - (?:video/.+?/(?P<id>\d+)| - ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+)) - ''' + _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)' _TESTS = [ - { - 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', - 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', - 'info_dict': { - 'id': '52753292', - 'ext': 'flv', - 'title': 'Crew emerges after four-month Mars food study', - 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', - }, - }, { 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'md5': 'af1adfa51312291a017720403826bb64', 'info_dict': { - 'id': 'p_tweet_snow_140529', + 'id': '269389891880', 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', @@ -313,7 +298,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'md5': '73135a2e0ef819107bbb55a5a9b2a802', 'info_dict': { - 'id': 'nn_netcast_150204', + 'id': '394064451844', 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', @@ -326,7 +311,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'md5': 'a49e173825e5fcd15c13fc297fced39d', 'info_dict': { - 'id': 'x_lon_vwhorn_150922', + 'id': '529953347624', 'ext': 'mp4', 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', 'description': 'md5:c8be487b2d80ff0594c005add88d8351', @@ -339,7 +324,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', 'md5': '118d7ca3f0bea6534f119c68ef539f71', 'info_dict': { - 'id': 'tdy_al_space_160420', + 'id': '669831235788', 'ext': 'mp4', 'title': 'See the aurora borealis from space in stunning new NASA video', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', @@ -352,7 +337,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', 'info_dict': { - 'id': 'n_hayes_Aimm_140801_272214', + 'id': '314487875924', 'ext': 'mp4', 'title': 'The chaotic GOP immigration vote', 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', @@ -374,60 +359,22 @@ class NBCNewsIE(ThePlatformIE): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - if video_id is not None: - all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) - info = all_info.find('video') - - return { - 'id': video_id, - 'title': info.find('headline').text, - 'ext': 'flv', - 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, - 'description': info.find('caption').text, - 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, - } - else: - # "feature" and "nightly-news" pages use theplatform.com - video_id = mobj.group('mpx_id') + video_id = self._match_id(url) + if not video_id.isdigit(): webpage = self._download_webpage(url, video_id) - filter_param = 'byId' - bootstrap_json = self._search_regex( - [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', - r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"', - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'], - webpage, 'bootstrap json', default=None) - if bootstrap_json: - bootstrap = self._parse_json( - bootstrap_json, video_id, transform_source=unescapeHTML) + data = self._parse_json(self._search_regex( + r'window\.__data\s*=\s*({.+});', webpage, + 'bootstrap json'), video_id) + video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id'] - info = None - if 'results' in bootstrap: - info = bootstrap['results'][0]['video'] - elif 'video' in bootstrap: - info = bootstrap['video'] - elif 'msnbcVideoInfo' in bootstrap: - info = bootstrap['msnbcVideoInfo']['meta'] - elif 'msnbcThePlatform' in bootstrap: - info = bootstrap['msnbcThePlatform']['videoPlayer']['video'] - else: - info = bootstrap - - if 'guid' in info: - video_id = info['guid'] - filter_param = 'byGuid' - elif 'mpxId' in info: - video_id = info['mpxId'] - - return { - '_type': 'url_transparent', - 'id': video_id, - # http://feed.theplatform.com/f/2E2eJC/nbcnews also works - 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}), - 'ie_key': 'ThePlatformFeed', - } + return { + '_type': 'url_transparent', + 'id': video_id, + # http://feed.theplatform.com/f/2E2eJC/nbcnews also works + 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}), + 'ie_key': 'ThePlatformFeed', + } class NBCOlympicsIE(InfoExtractor): From af60e81e3c557ace943aab35c1364d3d03d5a3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Nov 2018 02:00:38 +0700 Subject: [PATCH 0149/1201] [setup.py] Add more relevant classifiers --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index 7dbb5805f..a1a08f1e2 100644 --- a/setup.py +++ b/setup.py @@ -124,6 +124,8 @@ setup( 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'License :: Public Domain', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', @@ -132,6 +134,12 @@ setup( 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: Implementation', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: IronPython', + 'Programming Language :: Python :: Implementation :: Jython', + 'Programming Language :: Python :: Implementation :: PyPy', ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From bd2d553c7b1529f793c2b7343c514a558543fc0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Nov 2018 02:01:22 +0700 Subject: [PATCH 0150/1201] [travis] Add python 3.7 build --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 92f326860..1ea640071 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,12 @@ env: - YTDL_TEST_SET=download matrix: include: + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - env: JYTHON=true; YTDL_TEST_SET=download fast_finish: true From 157eef3e635230cbba0dd0c74f7115029867533e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Nov 2018 02:08:41 +0700 Subject: [PATCH 0151/1201] [setup.py] Add python 3.8 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a1a08f1e2..dfb669ad2 100644 --- a/setup.py +++ b/setup.py @@ -135,6 +135,7 @@ setup( 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: IronPython', From 305ce767d586e8796d873270abf771e69ff5586c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Nov 2018 02:34:35 +0700 Subject: [PATCH 0152/1201] [travis] Add python 3.8-dev build --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 1ea640071..79287ccf6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,12 @@ matrix: - python: 3.7 dist: xenial env: YTDL_TEST_SET=download + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - env: JYTHON=true; YTDL_TEST_SET=download fast_finish: true From 560020da3049bec19e5714e9e24fc90fadd06582 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 23:19:46 +0100 Subject: [PATCH 0153/1201] [mixcloud] fallback to hardcoded decryption key(closes #18016) --- youtube_dl/extractor/mixcloud.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index b7bccb504..a2d19d3ef 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -161,11 +161,17 @@ class MixcloudIE(InfoExtractor): stream_info = info_json['streamInfo'] formats = [] + def decrypt_url(f_url): + for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): + decrypted_url = self._decrypt_xor_cipher(k, compat_b64decode(f_url)) + if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): + return decrypted_url + for url_key in ('url', 'hlsUrl', 'dashUrl'): format_url = stream_info.get(url_key) if not format_url: continue - decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url)) + decrypted = decrypt_url(format_url) if not decrypted: continue if url_key == 'hlsUrl': From 6f2883a2df45ca89d272bc8a0975f09758af5eb3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Nov 2018 23:25:38 +0100 Subject: [PATCH 0154/1201] [mixcloud] base64 decode before decryption --- youtube_dl/extractor/mixcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index a2d19d3ef..bcac13ec5 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -163,7 +163,7 @@ class MixcloudIE(InfoExtractor): def decrypt_url(f_url): for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): - decrypted_url = self._decrypt_xor_cipher(k, compat_b64decode(f_url)) + decrypted_url = self._decrypt_xor_cipher(k, f_url) if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): return decrypted_url @@ -171,7 +171,7 @@ class MixcloudIE(InfoExtractor): format_url = stream_info.get(url_key) if not format_url: continue - decrypted = decrypt_url(format_url) + decrypted = decrypt_url(compat_b64decode(format_url)) if not decrypted: continue if url_key == 'hlsUrl': From 66173211c4177d36612486acfd99fc4634b8004e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 23 Nov 2018 00:14:43 +0700 Subject: [PATCH 0155/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 0083c4631..beb002041 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version <unreleased> + +Core ++ [setup.py] Add more relevant classifiers + +Extractors +* [mixcloud] Fallback to hardcoded decryption key (#18016) +* [nbc:news] Fix article extraction (#16194) +* [foxsports] Fix extraction (#17543) +* [loc] Relax regular expression and improve formats extraction ++ [ciscolive] Add support for ciscolive.cisco.com (#17984) +* [nzz] Relax kaltura regex (#18228) +* [sixplay] Fix formats extraction +* [bitchute] Improve title extraction +* [kaltura] Limit requested MediaEntry fields ++ [americastestkitchen] Add support for zype embeds (#18225) ++ [pornhub] Add pornhub.net alias +* [nova:embed] Fix extraction (#18222) + + version 2018.11.18 Extractors From d861a9d5814408973e0715bb9160fb7db34fbcd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 23 Nov 2018 00:16:45 +0700 Subject: [PATCH 0156/1201] release 2018.11.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 905576364..35cc8d6d0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.11.18 +[debug] youtube-dl version 2018.11.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index beb002041..f82c7ea35 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.11.23 Core + [setup.py] Add more relevant classifiers diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9009f7e9e..7d72ad82d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -163,6 +163,8 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** + - **CiscoLiveSearch** + - **CiscoLiveSession** - **CJSW** - **cliphunter** - **Clippit** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7f5ad7bf4..4956365d0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.11.18' +__version__ = '2018.11.23' From 6864855eb111dbf6e0efe9ed086f48efa1d9f209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 23 Nov 2018 00:43:42 +0700 Subject: [PATCH 0157/1201] [tests] Fix invalid escape sequences --- test/test_compat.py | 2 +- test/test_postprocessors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index d6c54e135..51fe6aa0b 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase): def test_compat_expanduser(self): old_home = os.environ.get('HOME') - test_str = 'C:\Documents and Settings\тест\Application Data' + test_str = r'C:\Documents and Settings\тест\Application Data' compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) compat_setenv('HOME', old_home or '') diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index addb69d6f..4209d1d9a 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP class TestMetadataFromTitle(unittest.TestCase): def test_format_to_regex(self): pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s') - self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)') + self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)') From 641e86e3cf751f1050ca331b9d13152bd0e18558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Nov 2018 21:47:41 +0700 Subject: [PATCH 0158/1201] [wistia] Add support for fast.wistia.com (closes #18287) --- youtube_dl/extractor/wistia.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 2182d6fd4..01a51275e 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -12,7 +12,7 @@ from ..utils import ( class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -35,12 +35,15 @@ class WistiaIE(InfoExtractor): # with hls video 'url': 'wistia:807fafadvk', 'only_matching': True, + }, { + 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', + 'only_matching': True, }] @staticmethod def _extract_url(webpage): match = re.search( - r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) + r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) if match: return unescapeHTML(match.group('url')) From d19600df07128c73ef7242af7e1cd8c819951aba Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Sat, 24 Nov 2018 16:14:27 +0100 Subject: [PATCH 0159/1201] [joj] Fix extraction (closes #18280) --- youtube_dl/extractor/joj.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py index d9f8dbfd2..62b28e980 100644 --- a/youtube_dl/extractor/joj.py +++ b/youtube_dl/extractor/joj.py @@ -61,7 +61,7 @@ class JojIE(InfoExtractor): bitrates = self._parse_json( self._search_regex( - r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates', + r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates', default='{}'), video_id, transform_source=js_to_json, fatal=False) From ca01d178844129bd4b6ed74740fbd30e7f84c1c2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 28 Nov 2018 19:53:22 +0100 Subject: [PATCH 0160/1201] [vimeo] Add support for VHX(Vimeo OTT)(#14835) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vimeo.py | 85 +++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 60e6175b1..cd91c0fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1303,6 +1303,7 @@ from .vimeo import ( VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, + VHXEmbedIE, ) from .vimple import VimpleIE from .vine import ( diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 88f4d9979..6353c6831 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -14,10 +14,13 @@ from ..compat import ( from ..utils import ( determine_ext, ExtractorError, + js_to_json, InAdvancePagedList, int_or_none, merge_dicts, NO_DEFAULT, + parse_filesize, + qualities, RegexNotFoundError, sanitized_Request, smuggle_url, @@ -27,7 +30,6 @@ from ..utils import ( unsmuggle_url, urlencode_postdata, unescapeHTML, - parse_filesize, ) @@ -1063,3 +1065,84 @@ class VimeoLikesIE(InfoExtractor): 'description': description, 'entries': pl, } + + +class VHXEmbedIE(InfoExtractor): + IE_NAME = 'vhx:embed' + _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' + + def _call_api(self, video_id, access_token, path='', query=None): + return self._download_json( + 'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={ + 'Authorization': 'Bearer ' + access_token, + }, query=query) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + credentials = self._parse_json(self._search_regex( + r'(?s)credentials\s*:\s*({.+?}),', webpage, + 'config'), video_id, js_to_json) + access_token = credentials['access_token'] + + query = {} + for k, v in credentials.items(): + if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined': + if k == 'authUserToken': + query['auth_user_token'] = v + else: + query[k] = v + files = self._call_api(video_id, access_token, '/files', query) + + formats = [] + for f in files: + href = try_get(f, lambda x: x['_links']['source']['href']) + if not href: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + href, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif method == 'dash': + formats.extend(self._extract_mpd_formats( + href, video_id, mpd_id='dash', fatal=False)) + else: + fmt = { + 'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])), + 'format_id': 'http', + 'preference': 1, + 'url': href, + 'vcodec': f.get('codec'), + } + quality = f.get('quality') + if quality: + fmt.update({ + 'format_id': 'http-' + quality, + 'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)), + }) + formats.append(fmt) + self._sort_formats(formats) + + video_data = self._call_api(video_id, access_token) + title = video_data.get('title') or video_data['name'] + + q = qualities(['small', 'medium', 'large', 'source']) + thumbnails = [] + for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items(): + thumbnails.append({ + 'id': thumbnail_id, + 'url': thumbnail_url, + 'preference': q(thumbnail_id), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])), + 'formats': formats, + 'thumbnails': thumbnails, + 'timestamp': unified_timestamp(video_data.get('created_at')), + 'view_count': int_or_none(video_data.get('plays_count')), + } From d9df8f120b325766181fb474a8c534e51df78f17 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 28 Nov 2018 20:13:36 +0100 Subject: [PATCH 0161/1201] [vimeo] extract VHX subtitles --- youtube_dl/extractor/vimeo.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6353c6831..5e15f060b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1127,6 +1127,17 @@ class VHXEmbedIE(InfoExtractor): video_data = self._call_api(video_id, access_token) title = video_data.get('title') or video_data['name'] + subtitles = {} + for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []: + lang = subtitle.get('srclang') or subtitle.get('label') + for _link in subtitle.get('_links', {}).values(): + href = _link.get('href') + if not href: + continue + subtitles.setdefault(lang, []).append({ + 'url': href, + }) + q = qualities(['small', 'medium', 'large', 'source']) thumbnails = [] for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items(): @@ -1142,6 +1153,7 @@ class VHXEmbedIE(InfoExtractor): 'description': video_data.get('description'), 'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])), 'formats': formats, + 'subtitles': subtitles, 'thumbnails': thumbnails, 'timestamp': unified_timestamp(video_data.get('created_at')), 'view_count': int_or_none(video_data.get('plays_count')), From 053e5b12b2e38b7d343aafbb7dc13fb8e4933015 Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Thu, 29 Nov 2018 18:12:18 +0100 Subject: [PATCH 0162/1201] [azmedien] Fix extraction (closes #18334) --- youtube_dl/extractor/azmedien.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index a57a5f114..fcbdc71b9 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -36,7 +36,6 @@ class AZMedienIE(InfoExtractor): 'id': '1_anruz3wy', 'ext': 'mp4', 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen', - 'description': 'md5:dd9f96751ec9c35e409a698a328402f3', 'uploader_id': 'TVOnline', 'upload_date': '20180930', 'timestamp': 1538328802, @@ -53,15 +52,12 @@ class AZMedienIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') video_id = mobj.group('id') entry_id = mobj.group('kaltura_id') if not entry_id: - webpage = self._download_webpage(url, video_id) - api_path = self._search_regex( - r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']', - webpage, 'api path') - api_url = 'https://www.%s%s' % (mobj.group('host'), api_path) + api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0]) payload = { 'query': '''query VideoContext($articleId: ID!) { article: node(id: $articleId) { From adbbdefc8126a933d9ff0a6e603fb312e4b4cbdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 30 Nov 2018 00:48:15 +0700 Subject: [PATCH 0163/1201] [hotstar] Add support for alternative app state layout (closes #18320) --- youtube_dl/extractor/hotstar.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index bf5717f1b..45aa5e7ea 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -43,6 +43,7 @@ class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _TESTS = [{ + # contentData 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { 'id': '1000076273', @@ -57,6 +58,10 @@ class HotStarIE(HotStarBaseIE): # m3u8 download 'skip_download': True, } + }, { + # contentDetail + 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', + 'only_matching': True, }, { 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583', 'only_matching': True, @@ -74,8 +79,12 @@ class HotStarIE(HotStarBaseIE): r'<script>window\.APP_STATE\s*=\s*({.+?})</script>', webpage, 'app state'), video_id) video_data = {} + getters = ( + lambda x, k=k: x['initialState']['content%s' % k]['content'] + for k in ('Data', 'Detail') + ) for v in app_state.values(): - content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict) + content = try_get(v, getters, dict) if content and content.get('contentId') == video_id: video_data = content From 16597c2f9492651c55e01f441ee9cb5c276209cb Mon Sep 17 00:00:00 2001 From: Jimm Stout <jamesstout1@gmail.com> Date: Thu, 29 Nov 2018 13:07:07 -0500 Subject: [PATCH 0164/1201] [gfycat] Update API endpoint (closes #18333) --- youtube_dl/extractor/gfycat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index a0670b645..c1b36a59b 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor): video_id = self._match_id(url) gfy = self._download_json( - 'http://gfycat.com/cajax/get/%s' % video_id, + 'https://api.gfycat.com/v1/gfycats/%s' % video_id, video_id, 'Downloading video info') if 'error' in gfy: raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True) From f012823082c893c0fc1f96afb8a91f5b1c1ae07a Mon Sep 17 00:00:00 2001 From: Hakim Boyles <hak@volkanite.net> Date: Thu, 29 Nov 2018 14:20:27 -0400 Subject: [PATCH 0165/1201] [lynda] Fix authentication (closes #18158) --- youtube_dl/extractor/lynda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 4ba61cd8a..3084c6dff 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -15,7 +15,7 @@ from ..utils import ( class LyndaBaseIE(InfoExtractor): - _SIGNIN_URL = 'https://www.lynda.com/signin' + _SIGNIN_URL = 'https://www.lynda.com/signin/lynda' _PASSWORD_URL = 'https://www.lynda.com/signin/password' _USER_URL = 'https://www.lynda.com/signin/user' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' From 3430ff9b07d4dc9dd39617af54aeffb381d88737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 1 Dec 2018 16:45:51 +0700 Subject: [PATCH 0166/1201] [pornhub] Use actual URL host for requests (closes #18359) --- youtube_dl/extractor/pornhub.py | 34 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index c9c884095..e377de196 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) @@ -129,7 +129,7 @@ class PornHubIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return re.findall( - r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)', + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)', webpage) def _extract_count(self, pattern, webpage, name): @@ -137,14 +137,16 @@ class PornHubIE(InfoExtractor): pattern, webpage, '%s count' % name, fatal=False)) def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') or 'pornhub.com' + video_id = mobj.group('id') - self._set_cookie('pornhub.com', 'age_verified', '1') + self._set_cookie(host, 'age_verified', '1') def dl_webpage(platform): - self._set_cookie('pornhub.com', 'platform', platform) + self._set_cookie(host, 'platform', platform) return self._download_webpage( - 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id, + 'http://www.%s/view_video.php?viewkey=%s' % (host, video_id), video_id, 'Downloading %s webpage' % platform) webpage = dl_webpage('pc') @@ -306,7 +308,7 @@ class PornHubIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor): - def _extract_entries(self, webpage): + def _extract_entries(self, webpage, host): # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see # https://github.com/rg3/youtube-dl/issues/11594). @@ -316,7 +318,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): return [ self.url_result( - 'http://www.pornhub.com/%s' % video_url, + 'http://www.%s/%s' % (host, video_url), PornHubIE.ie_key(), video_title=title) for video_url, title in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', @@ -324,11 +326,13 @@ class PornHubPlaylistBaseIE(InfoExtractor): ] def _real_extract(self, url): - playlist_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + playlist_id = mobj.group('id') webpage = self._download_webpage(url, playlist_id) - entries = self._extract_entries(webpage) + entries = self._extract_entries(webpage, host) playlist = self._parse_json( self._search_regex( @@ -343,7 +347,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/playlist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.pornhub.com/playlist/4667351', 'info_dict': { @@ -358,7 +362,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' + _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' _TESTS = [{ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { @@ -399,7 +403,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }] def _real_extract(self, url): - user_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + user_id = mobj.group('id') entries = [] for page_num in itertools.count(1): @@ -411,7 +417,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: break raise - page_entries = self._extract_entries(webpage) + page_entries = self._extract_entries(webpage, host) if not page_entries: break entries.extend(page_entries) From aa374bc78e5e4dbb8453bd367ae0e3c0db702a7a Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Sat, 1 Dec 2018 18:05:15 +0100 Subject: [PATCH 0167/1201] [utils] Fix random_birthday to generate existing dates only --- youtube_dl/utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e84d35d4d..0b1c7cd6c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3948,8 +3948,12 @@ def write_xattr(path, key, value): def random_birthday(year_field, month_field, day_field): + start_date = datetime.date(1950, 1, 1) + end_date = datetime.date(1995, 12, 31) + offset = random.randint(0, (end_date - start_date).days) + random_date = start_date + datetime.timedelta(offset) return { - year_field: str(random.randint(1950, 1995)), - month_field: str(random.randint(1, 12)), - day_field: str(random.randint(1, 31)), + year_field: str(random_date.year), + month_field: str(random_date.month), + day_field: str(random_date.day), } From 1ead840d2c18d4add340117f676fd6694f0650d3 Mon Sep 17 00:00:00 2001 From: Ken Swenson <flat@esoteric.moe> Date: Fri, 9 Nov 2018 16:49:20 -0500 Subject: [PATCH 0168/1201] [tiktok] Add extractor (closes #18108) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tiktok.py | 79 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/tiktok.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cd91c0fcb..547331078 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1124,6 +1124,7 @@ from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE +from .tiktok import TikTokIE from .tinypic import TinyPicIE from .tmz import ( TMZIE, diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py new file mode 100644 index 000000000..d71b09c66 --- /dev/null +++ b/youtube_dl/extractor/tiktok.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_str, + int_or_none, + str_or_none, + try_get, + url_or_none, +) + + +class TikTokIE(InfoExtractor): + _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://m.tiktok.com/v/6606727368545406213.html', + 'md5': 'd584b572e92fcd48888051f238022420', + 'info_dict': { + 'id': '6606727368545406213', + 'ext': 'mp4', + 'title': 'Zureeal on TikTok', + 'thumbnail': r're:^https?://.*~noop.image', + 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay', + 'uploader': 'Zureeal', + 'width': 540, + 'height': 960, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + data = self._parse_json( + self._search_regex( + r'var\s+data\s*=\s*({.+?});', webpage, 'data' + ), video_id) + + title = self._og_search_title(webpage) + + description = str_or_none(try_get(data, lambda x: x['desc'])) + width = int_or_none(try_get(data, lambda x: x['video']['width'])) + height = int_or_none(try_get(data, lambda x: x['video']['height'])) + + formats = [] + + for count, (key, label) in enumerate((('play_addr_lowbr', 'Low'), ('play_addr', 'Normal'), ('download_addr', 'Download')), -2): + for format in try_get(data, lambda x: x['video'][key]['url_list']): + format_url = url_or_none(format) + if not format_url: + continue + formats.append({ + 'url': format_url, + 'ext': 'mp4', + 'height': height, + 'width': width, + 'format_note': label, + 'quality': count + }) + + self._sort_formats(formats) + + uploader = try_get(data, lambda x: x['author']['nickname'], compat_str) + + thumbnail = url_or_none( + try_get( + data, lambda x: x['video']['cover']['url_list'][0], compat_str)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': uploader, + 'formats': formats, + 'thumbnail': thumbnail, + 'width': width, + 'height': height, + } From ce18a19be9af1e227e7162636cbf6b277adc1b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Dec 2018 02:39:22 +0700 Subject: [PATCH 0169/1201] [tiktok] Improve extraction and add support for user pages (closes #18135) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/tiktok.py | 136 ++++++++++++++++++----------- 2 files changed, 91 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 547331078..0baed6b27 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1124,7 +1124,10 @@ from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE -from .tiktok import TikTokIE +from .tiktok import ( + TikTokIE, + TikTokUserIE, +) from .tinypic import TinyPicIE from .tmz import ( TMZIE, diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index d71b09c66..083e9f36d 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( compat_str, + ExtractorError, int_or_none, str_or_none, try_get, @@ -11,69 +12,106 @@ from ..utils import ( ) -class TikTokIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>[0-9]+)' +class TikTokBaseIE(InfoExtractor): + def _extract_aweme(self, data): + video = data['video'] + description = str_or_none(try_get(data, lambda x: x['desc'])) + width = int_or_none(try_get(data, lambda x: video['width'])) + height = int_or_none(try_get(data, lambda x: video['height'])) + + format_urls = set() + formats = [] + for format_id in ( + 'play_addr_lowbr', 'play_addr', 'play_addr_h264', + 'download_addr'): + for format in try_get( + video, lambda x: x[format_id]['url_list'], list) or []: + format_url = url_or_none(format) + if not format_url: + continue + if format_url in format_urls: + continue + format_urls.add(format_url) + formats.append({ + 'url': format_url, + 'ext': 'mp4', + 'height': height, + 'width': width, + }) + self._sort_formats(formats) + + thumbnail = url_or_none(try_get( + video, lambda x: x['cover']['url_list'][0], compat_str)) + uploader = try_get(data, lambda x: x['author']['nickname'], compat_str) + timestamp = int_or_none(data.get('create_time')) + comment_count = int_or_none(data.get('comment_count')) or int_or_none( + try_get(data, lambda x: x['statistics']['comment_count'])) + repost_count = int_or_none(try_get( + data, lambda x: x['statistics']['share_count'])) + + aweme_id = data['aweme_id'] + + return { + 'id': aweme_id, + 'title': uploader or aweme_id, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'timestamp': timestamp, + 'comment_count': comment_count, + 'repost_count': repost_count, + 'formats': formats, + } + + +class TikTokIE(TikTokBaseIE): + _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)' _TEST = { 'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'md5': 'd584b572e92fcd48888051f238022420', 'info_dict': { 'id': '6606727368545406213', 'ext': 'mp4', - 'title': 'Zureeal on TikTok', - 'thumbnail': r're:^https?://.*~noop.image', + 'title': 'Zureeal', 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay', + 'thumbnail': r're:^https?://.*~noop.image', 'uploader': 'Zureeal', - 'width': 540, - 'height': 960, + 'timestamp': 1538248586, + 'upload_date': '20180929', + 'comment_count': int, + 'repost_count': int, } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + data = self._parse_json(self._search_regex( + r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) + return self._extract_aweme(data) - data = self._parse_json( - self._search_regex( - r'var\s+data\s*=\s*({.+?});', webpage, 'data' - ), video_id) - title = self._og_search_title(webpage) +class TikTokUserIE(TikTokBaseIE): + _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)' + _TEST = { + 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', + 'info_dict': { + 'id': '188294915489964032', + }, + 'playlist_mincount': 24, + } - description = str_or_none(try_get(data, lambda x: x['desc'])) - width = int_or_none(try_get(data, lambda x: x['video']['width'])) - height = int_or_none(try_get(data, lambda x: x['video']['height'])) - - formats = [] - - for count, (key, label) in enumerate((('play_addr_lowbr', 'Low'), ('play_addr', 'Normal'), ('download_addr', 'Download')), -2): - for format in try_get(data, lambda x: x['video'][key]['url_list']): - format_url = url_or_none(format) - if not format_url: - continue - formats.append({ - 'url': format_url, - 'ext': 'mp4', - 'height': height, - 'width': width, - 'format_note': label, - 'quality': count - }) - - self._sort_formats(formats) - - uploader = try_get(data, lambda x: x['author']['nickname'], compat_str) - - thumbnail = url_or_none( - try_get( - data, lambda x: x['video']['cover']['url_list'][0], compat_str)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'uploader': uploader, - 'formats': formats, - 'thumbnail': thumbnail, - 'width': width, - 'height': height, - } + def _real_extract(self, url): + user_id = self._match_id(url) + data = self._download_json( + 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id, + query={'_signature': '_'}) + entries = [] + for aweme in data['aweme_list']: + try: + entry = self._extract_aweme(aweme) + except ExtractorError: + continue + entry['extractor_key'] = TikTokIE.ie_key() + entries.append(entry) + return self.playlist_result(entries, user_id) From 1fa59a928e48ac81e2afb5dfe134fa6c40f64e44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Dec 2018 00:06:54 +0700 Subject: [PATCH 0170/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index f82c7ea35..ddd4bec14 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +version <unreleased> + +Core +* [utils] Fix random_birthday to generate existing dates only (#18284) + +Extractors ++ [tiktok] Add support for tiktok.com (#18108, #18135) +* [pornhub] Use actual URL host for requests (#18359) +* [lynda] Fix authentication (#18158, #18217) +* [gfycat] Update API endpoint (#18333, #18343) ++ [hotstar] Add support for alternative app state layout (#18320) +* [azmedien] Fix extraction (#18334, #18336) ++ [vimeo] Add support for VHX (Vimeo OTT) (#14835) +* [joj] Fix extraction (#18280, #18281) ++ [wistia] Add support for fast.wistia.com (#18287) + + version 2018.11.23 Core From ab896fa894d2395b886e5bb8168ca0e0e6e9517d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Dec 2018 00:10:20 +0700 Subject: [PATCH 0171/1201] release 2018.12.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 35cc8d6d0..4b35244a8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.11.23 +[debug] youtube-dl version 2018.12.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ddd4bec14..689d07826 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2018.12.03 Core * [utils] Fix random_birthday to generate existing dates only (#18284) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7d72ad82d..837b0199b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -885,6 +885,8 @@ - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** + - **TikTok** + - **TikTokUser** - **tinypic**: tinypic.com videos - **TMZ** - **TMZArticle** @@ -979,6 +981,7 @@ - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** + - **vhx:embed** - **Viafree** - **vice** - **vice:article** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4956365d0..8e1203892 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.11.23' +__version__ = '2018.12.03' From 5547014ad972a4364c4d4a613db6d3a18f25950e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 2 Dec 2018 20:01:36 +0100 Subject: [PATCH 0172/1201] [gamespot] add support reviews URLs --- youtube_dl/extractor/gamespot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index ab647dd41..4236a5ed8 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', @@ -41,6 +41,9 @@ class GameSpotIE(OnceIE): }, { 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/', 'only_matching': True, + }, { + 'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/', + 'only_matching': True, }] def _real_extract(self, url): From 8bb0c9cc16b841ab89e15e620e00ee954ab316ed Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 5 Dec 2018 07:03:00 +0100 Subject: [PATCH 0173/1201] [tbs] fix info extraction(fixes #18403) --- youtube_dl/extractor/tbs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index 784f8ed66..e8a7c65e0 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -16,7 +16,7 @@ from ..utils import ( class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))' _TESTS = [{ 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', 'info_dict': { @@ -40,12 +40,12 @@ class TBSIE(TurnerBaseIE): }] def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() + site, path, display_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) drupal_settings = self._parse_json(self._search_regex( r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', webpage, 'drupal setting'), display_id) - video_data = drupal_settings['turner_playlist'][0] + video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path) media_id = video_data['mediaID'] title = video_data['title'] From ae9d77dab54630ac480bd4082468d543a15c341e Mon Sep 17 00:00:00 2001 From: v-delta <45652398+v-delta@users.noreply.github.com> Date: Thu, 6 Dec 2018 17:24:35 +0100 Subject: [PATCH 0174/1201] [yourporn] Fix extraction (closes #18424) --- youtube_dl/extractor/yourporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 6602f7c03..a9951f3b8 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -26,7 +26,7 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]) + video_id)[video_id]).replace('/cdn/', '/cdn2/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', From 33cc1ea586480ccc60fd25f2d42cfb44eec605c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Dec 2018 00:00:06 +0700 Subject: [PATCH 0175/1201] [nrktv] Relax _VALID_URL (closes #18304, closes #18387) --- youtube_dl/extractor/nrk.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index a231735fb..c5001ef48 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -248,7 +248,7 @@ class NRKTVIE(NRKBaseIE): _VALID_URL = r'''(?x) https?:// (?:tv|radio)\.nrk(?:super)?\.no/ - (?:serie/[^/]+|program)/ + (?:serie(?:/[^/]+){1,2}|program)/ (?![Ee]pisodes)%s (?:/\d{2}-\d{2}-\d{4})? (?:\#del=(?P<part_id>\d+))? @@ -362,6 +362,9 @@ class NRKTVIE(NRKBaseIE): }, { 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 'only_matching': True, + }, { + 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller', + 'only_matching': True, }] From 15699ec8b0a9cb8d519b721a5cb8199afe62fc3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Dec 2018 00:49:24 +0700 Subject: [PATCH 0176/1201] [nrktv:season,series] Fix extraction and update tests (closes #17159, closes #17258) --- youtube_dl/extractor/nrk.py | 68 +++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c5001ef48..48bc6fd7a 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE): _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', - 'md5': '2f7f6eeb2aacdd99885f355428715cfa', + 'md5': '706f34cdf1322577589e369e522b50ef', 'info_dict': { 'id': '150533', 'ext': 'mp4', 'title': 'Dompap og andre fugler i Piip-Show', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', - 'duration': 263, + 'duration': 262, } }, { # audio @@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE): _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', - 'md5': '4e9ca6629f09e588ed240fb11619922a', + 'md5': '9a167e54d04671eb6317a37b7bc8a280', 'info_dict': { 'id': 'MUHH48000314AA', 'ext': 'mp4', 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 'duration': 1741, - 'series': '20 spørsmål - TV', + 'series': '20 spørsmål', 'episode': '23.05.2014', }, }, { @@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE): 'id': 'MSPO40010515AH', 'ext': 'mp4', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)', - 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', 'duration': 772, 'series': 'Tour de Ski', 'episode': '06.01.2015', @@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE): 'id': 'MSPO40010515BH', 'ext': 'mp4', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)', - 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', 'duration': 6175, 'series': 'Tour de Ski', 'episode': '06.01.2015', @@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE): 'info_dict': { 'id': 'MSPO40010515', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', - 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', }, 'expected_warnings': ['Video is geo restricted'], }, { @@ -406,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor): def _extract_series(self, webpage, display_id, fatal=True): config = self._parse_json( self._search_regex( - r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config', - default='{}' if not fatal else NO_DEFAULT), + (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;', + r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'), + webpage, 'config', default='{}' if not fatal else NO_DEFAULT), display_id, fatal=False) if not config: return - return try_get(config, lambda x: x['series'], dict) + return try_get( + config, + (lambda x: x['initialState']['series'], lambda x: x['series']), + dict) + + def _extract_seasons(self, seasons): + if not isinstance(seasons, list): + return [] + entries = [] + for season in seasons: + entries.extend(self._extract_episodes(season)) + return entries def _extract_episodes(self, season): - entries = [] if not isinstance(season, dict): - return entries - episodes = season.get('episodes') - if not isinstance(episodes, list): - return entries - for episode in episodes: + return [] + return self._extract_entries(season.get('episodes')) + + def _extract_entries(self, entry_list): + if not isinstance(entry_list, list): + return [] + entries = [] + for episode in entry_list: nrk_id = episode.get('prfId') if not nrk_id or not isinstance(nrk_id, compat_str): continue @@ -465,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _TESTS = [{ - # new layout + # new layout, seasons 'url': 'https://tv.nrk.no/serie/backstage', 'info_dict': { 'id': 'backstage', @@ -474,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): }, 'playlist_mincount': 60, }, { - # old layout + # new layout, instalments 'url': 'https://tv.nrk.no/serie/groenn-glede', 'info_dict': { 'id': 'groenn-glede', 'title': 'Grønn glede', 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608', }, - 'playlist_mincount': 9, + 'playlist_mincount': 10, }, { - 'url': 'http://tv.nrksuper.no/serie/labyrint', + # old layout + 'url': 'https://tv.nrksuper.no/serie/labyrint', 'info_dict': { 'id': 'labyrint', 'title': 'Labyrint', - 'description': 'md5:58afd450974c89e27d5a19212eee7115', + 'description': 'md5:318b597330fdac5959247c9b69fdb1ec', }, 'playlist_mincount': 3, }, { @@ -520,11 +535,11 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): description = try_get( series, lambda x: x['titles']['subtitle'], compat_str) entries = [] - for season in series['seasons']: - entries.extend(self._extract_episodes(season)) + entries.extend(self._extract_seasons(series.get('seasons'))) + entries.extend(self._extract_entries(series.get('instalments'))) return self.playlist_result(entries, series_id, title, description) - # Old layout (e.g. https://tv.nrk.no/serie/groenn-glede) + # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint) entries = [ self.url_result( 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format( @@ -536,6 +551,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): 'seriestitle', webpage, 'title', default=None) or self._og_search_title( webpage, fatal=False) + if title: + title = self._search_regex( + r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title) description = self._html_search_meta( 'series_description', webpage, @@ -596,7 +614,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE): 'title': 'Rivertonprisen til Karin Fossum', 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', }, - 'playlist_count': 5, + 'playlist_count': 2, }] def _extract_title(self, webpage): From c976873c5b2912c06ff53e5193640ee8627edee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Dec 2018 00:54:58 +0700 Subject: [PATCH 0177/1201] [nrktv:series] Add support for extra materials --- youtube_dl/extractor/nrk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 48bc6fd7a..072f920a9 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -537,6 +537,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): entries = [] entries.extend(self._extract_seasons(series.get('seasons'))) entries.extend(self._extract_entries(series.get('instalments'))) + entries.extend(self._extract_episodes(series.get('extraMaterial'))) return self.playlist_result(entries, series_id, title, description) # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint) From dfe0a3a9d2e07ac1e5ad221912d03b999ebb4d75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Dec 2018 03:27:11 +0700 Subject: [PATCH 0178/1201] [lecturio] Add extractor (closes #18405) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/lecturio.py | 186 +++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 youtube_dl/extractor/lecturio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0baed6b27..e5f18a75d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -553,6 +553,10 @@ from .lcp import ( ) from .learnr import LearnrIE from .lecture2go import Lecture2GoIE +from .lecturio import ( + LecturioIE, + LecturioCourseIE, +) from .leeco import ( LeIE, LePlaylistIE, diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py new file mode 100644 index 000000000..62ff28e02 --- /dev/null +++ b/youtube_dl/extractor/lecturio.py @@ -0,0 +1,186 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + extract_attributes, + ExtractorError, + float_or_none, + int_or_none, + str_or_none, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class LecturioBaseIE(InfoExtractor): + _LOGIN_URL = 'https://app.lecturio.com/en/login' + _NETRC_MACHINE = 'lecturio' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + # Sets some cookies + _, urlh = self._download_webpage_handle( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(url_handle): + return self._LOGIN_URL not in compat_str(url_handle.geturl()) + + # Already logged in + if is_logged(urlh): + return + + login_form = { + 'signin[email]': username, + 'signin[password]': password, + 'signin[remember]': 'on', + } + + response, urlh = self._download_webpage_handle( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + # Logged in successfully + if is_logged(urlh): + return + + errors = self._html_search_regex( + r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response, + 'errors', default=None) + if errors: + raise ExtractorError('Unable to login: %s' % errors, expected=True) + raise ExtractorError('Unable to log in') + + +class LecturioIE(LecturioBaseIE): + _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture' + _TEST = { + 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', + 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', + 'info_dict': { + 'id': '39634', + 'ext': 'mp4', + 'title': 'Important Concepts and Terms – Introduction to Microbiology', + }, + 'skip': 'Requires lecturio account credentials', + } + + _CC_LANGS = { + 'German': 'de', + 'English': 'en', + 'Spanish': 'es', + 'French': 'fr', + 'Polish': 'pl', + 'Russian': 'ru', + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, + display_id) + + lecture_id = self._search_regex( + r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id') + + api_url = self._search_regex( + r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'api url', group='url') + + video = self._download_json(api_url, display_id) + + title = video['title'].strip() + + formats = [] + for format_ in video['content']['media']: + if not isinstance(format_, dict): + continue + file_ = format_.get('file') + if not file_: + continue + ext = determine_ext(file_) + if ext == 'smil': + # smil contains only broken RTMP formats anyway + continue + file_url = url_or_none(file_) + if not file_url: + continue + label = str_or_none(format_.get('label')) + filesize = int_or_none(format_.get('fileSize')) + formats.append({ + 'url': file_url, + 'format_id': label, + 'filesize': float_or_none(filesize, invscale=1000) + }) + self._sort_formats(formats) + + subtitles = {} + automatic_captions = {} + cc = self._parse_json( + self._search_regex( + r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles', + default='{}'), display_id, fatal=False) + for cc_label, cc_url in cc.items(): + cc_url = url_or_none(cc_url) + if not cc_url: + continue + sub_dict = automatic_captions if 'auto-translated' in cc_label else subtitles + lang = self._search_regex( + r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0]) + sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({ + 'url': cc_url, + }) + + return { + 'id': lecture_id, + 'title': title, + 'formats': formats, + 'subtitles': subtitles, + 'automatic_captions': automatic_captions, + } + + +class LecturioCourseIE(LecturioBaseIE): + _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course' + _TEST = { + 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', + 'info_dict': { + 'id': 'microbiology-introduction', + 'title': 'Microbiology: Introduction', + }, + 'playlist_count': 45, + 'skip': 'Requires lecturio account credentials', + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer( + r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>', + webpage): + params = extract_attributes(mobj.group(0)) + lecture_url = urljoin(url, params.get('data-url')) + lecture_id = params.get('data-id') + entries.append(self.url_result( + lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) + + title = self._search_regex( + r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage, + 'title', default=None) + + return self.playlist_result(entries, display_id, title) From ebb0449049c198f04103502c95a13171b854d1c7 Mon Sep 17 00:00:00 2001 From: ealgase <mostdigitsofpi@gmail.com> Date: Thu, 6 Dec 2018 15:36:08 -0500 Subject: [PATCH 0179/1201] [xvideos] Switch to HTTPS (closes #18422) --- youtube_dl/extractor/xvideos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index efee95651..ec2d913fc 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -45,7 +45,7 @@ class XVideosIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.xvideos.com/video%s/' % video_id, video_id) + 'https://www.xvideos.com/video%s/' % video_id, video_id) mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) if mobj: From 8c5879715f4d979b83c49d44a9094307247097ba Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Thu, 6 Dec 2018 21:41:02 +0100 Subject: [PATCH 0180/1201] [ard:mediathek] Fix title and description extraction (closes #18349) --- youtube_dl/extractor/ard.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 6bf8f61eb..84e96f769 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -173,13 +173,18 @@ class ARDMediathekIE(InfoExtractor): title = self._html_search_regex( [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', r'<meta name="dcterms\.title" content="(.*?)"/>', - r'<h4 class="headline">(.*?)</h4>'], + r'<h4 class="headline">(.*?)</h4>', + r'<title[^>]*>(.*?)'], webpage, 'title') description = self._html_search_meta( 'dcterms.abstract', webpage, 'description', default=None) if description is None: description = self._html_search_meta( - 'description', webpage, 'meta description') + 'description', webpage, 'meta description', default=None) + if description is None: + description = self._html_search_regex( + r'(.+?)

', + webpage, 'teaser text', default=None) # Thumbnail is sometimes not present. # It is in the mobile version, but that seems to use a different URL From c3c098dcf2826ab4d668a92c9137cca2c0c42a4f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 7 Dec 2018 18:52:01 +0100 Subject: [PATCH 0181/1201] [hotstar] fix video data extraction(closes #18386) --- youtube_dl/extractor/hotstar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 45aa5e7ea..8de9c4faf 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -79,7 +79,7 @@ class HotStarIE(HotStarBaseIE): r'', webpage, 'app state'), video_id) video_data = {} - getters = ( + getters = list( lambda x, k=k: x['initialState']['content%s' % k]['content'] for k in ('Data', 'Detail') ) @@ -87,6 +87,7 @@ class HotStarIE(HotStarBaseIE): content = try_get(v, getters, dict) if content and content.get('contentId') == video_id: video_data = content + break title = video_data['title'] From 9235b5091cedcc21c8dc32d4b292340edeee4ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Dec 2018 23:57:40 +0700 Subject: [PATCH 0182/1201] [iprima] Relax _VALID_URL (closes #18453) --- youtube_dl/extractor/iprima.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 1d58d6e85..11a6629d2 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -12,7 +12,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P[^?#]+)' + _VALID_URL = r'https?://(?:play|prima|www)\.iprima\.cz/(?:[^/]+/)*(?P[^/?#&]+)' _GEO_BYPASS = False _TESTS = [{ @@ -41,6 +41,9 @@ class IPrimaIE(InfoExtractor): # iframe prima.iprima.cz 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', 'only_matching': True, + }, { + 'url': 'http://www.iprima.cz/filmy/desne-rande', + 'only_matching': True, }] def _real_extract(self, url): From 1d88b3e6e6e59e4b52305faf6c1bf1fd69c555ee Mon Sep 17 00:00:00 2001 From: aegamesi Date: Sat, 29 Apr 2017 22:56:33 -0700 Subject: [PATCH 0183/1201] [YoutubeDL] Recognize expires=0 as session cookies and send session cookies with requests --- youtube_dl/YoutubeDL.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 38ba43a97..2433f74f4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2300,7 +2300,13 @@ class YoutubeDL(object): self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): - self.cookiejar.load() + self.cookiejar.load(ignore_discard=True, ignore_expires=True) + # Force CookieJar to treat 'expires=0' cookies as session/discard cookies + # Fixes https://bugs.python.org/issue17164 + for cookie in self.cookiejar: + if cookie.expires == 0: + cookie.expires = None + cookie.discard = True cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: From 1bab3437046646da4ebe2b8e0c7fdc25aa1072ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Dec 2018 06:00:32 +0700 Subject: [PATCH 0184/1201] [YoutubeDL] Introduce YoutubeDLCookieJar and clarify the rationale behind session cookies (closes #12929) --- youtube_dl/YoutubeDL.py | 12 +++--------- youtube_dl/utils.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2433f74f4..4493fd0e1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -88,6 +88,7 @@ from .utils import ( version_tuple, write_json_file, write_string, + YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, ) @@ -558,7 +559,7 @@ class YoutubeDL(object): self.restore_console_title() if self.params.get('cookiefile') is not None: - self.cookiejar.save() + self.cookiejar.save(ignore_discard=True, ignore_expires=True) def trouble(self, message=None, tb=None): """Determine action to take when a download problem appears. @@ -2297,16 +2298,9 @@ class YoutubeDL(object): self.cookiejar = compat_cookiejar.CookieJar() else: opts_cookiefile = expand_path(opts_cookiefile) - self.cookiejar = compat_cookiejar.MozillaCookieJar( - opts_cookiefile) + self.cookiejar = YoutubeDLCookieJar(opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): self.cookiejar.load(ignore_discard=True, ignore_expires=True) - # Force CookieJar to treat 'expires=0' cookies as session/discard cookies - # Fixes https://bugs.python.org/issue17164 - for cookie in self.cookiejar: - if cookie.expires == 0: - cookie.expires = None - cookie.discard = True cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0b1c7cd6c..62e769fd5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -39,6 +39,7 @@ from .compat import ( compat_HTMLParser, compat_basestring, compat_chr, + compat_cookiejar, compat_ctypes_WINFUNCTYPE, compat_etree_fromstring, compat_expanduser, @@ -1139,6 +1140,33 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): req, **kwargs) +class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + # Store session cookies with `expires` set to 0 instead of an empty + # string + for cookie in self: + if cookie.expires is None: + cookie.expires = 0 + compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires) + # Session cookies are denoted by either `expires` field set to + # an empty string or 0. MozillaCookieJar only recognizes the former + # (see [1]). So we need force the latter to be recognized as session + # cookies on our own. + # Session cookies may be important for cookies-based authentication, + # e.g. usually, when user does not check 'Remember me' check box while + # logging in on a site, some important cookies are stored as session + # cookies so that not recognizing them will result in failed login. + # 1. https://bugs.python.org/issue17164 + for cookie in self: + # Treat `expires=0` cookies as session cookies + if cookie.expires == 0: + cookie.expires = None + cookie.discard = True + + class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): def __init__(self, cookiejar=None): compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) From 5f47a60c5d66d65b505131a672a3bad67ddaa00f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 9 Dec 2018 09:35:17 +0100 Subject: [PATCH 0185/1201] [imgur] improve gallery and album detection and extraction(closes #9133)(closes #16577)(closes #17223)(closes #18404) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/imgur.py | 86 +++++++++++++++--------------- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e5f18a75d..e28db6968 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -483,6 +483,7 @@ from .imdb import ( from .imgur import ( ImgurIE, ImgurAlbumIE, + ImgurGalleryIE, ) from .ina import InaIE from .inc import IncIE diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index ecc958a17..0eb54db3f 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -12,7 +12,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -20,28 +20,9 @@ class ImgurIE(InfoExtractor): 'id': 'A61SaA1', 'ext': 'mp4', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', - 'description': 'Imgur: The magic of the Internet', }, }, { 'url': 'https://imgur.com/A61SaA1', - 'info_dict': { - 'id': 'A61SaA1', - 'ext': 'mp4', - 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', - 'description': 'Imgur: The magic of the Internet', - }, - }, { - 'url': 'https://imgur.com/gallery/YcAQlkx', - 'info_dict': { - 'id': 'YcAQlkx', - 'ext': 'mp4', - 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', - } - }, { - 'url': 'http://imgur.com/topic/Funny/N8rOudd', - 'only_matching': True, - }, { - 'url': 'http://imgur.com/r/aww/VQcQPhM', 'only_matching': True, }, { 'url': 'https://i.imgur.com/crGpqCV.mp4', @@ -50,8 +31,8 @@ class ImgurIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id) - webpage = self._download_webpage(gifv_url, video_id) + webpage = self._download_webpage( + 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id) width = int_or_none(self._og_search_property( 'video:width', webpage, default=None)) @@ -72,7 +53,6 @@ class ImgurIE(InfoExtractor): 'format_id': m.group('type').partition('/')[2], 'url': self._proto_relative_url(m.group('src')), 'ext': mimetype2ext(m.group('type')), - 'acodec': 'none', 'width': width, 'height': height, 'http_headers': { @@ -107,44 +87,64 @@ class ImgurIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'description': self._og_search_description(webpage, default=None), 'title': self._og_search_title(webpage), } -class ImgurAlbumIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P[a-zA-Z0-9]{5})(?:[/?#&]+)?$' +class ImgurGalleryIE(InfoExtractor): + IE_NAME = 'imgur:gallery' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://imgur.com/gallery/Q95ko', 'info_dict': { 'id': 'Q95ko', + 'title': 'Adding faces make every GIF better', }, 'playlist_count': 25, }, { - 'url': 'http://imgur.com/a/j6Orj', + 'url': 'http://imgur.com/topic/Aww/ll5Vk', 'only_matching': True, }, { - 'url': 'http://imgur.com/topic/Aww/ll5Vk', + 'url': 'https://imgur.com/gallery/YcAQlkx', + 'info_dict': { + 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + } + }, { + 'url': 'http://imgur.com/topic/Funny/N8rOudd', + 'only_matching': True, + }, { + 'url': 'http://imgur.com/r/aww/VQcQPhM', 'only_matching': True, }] def _real_extract(self, url): - album_id = self._match_id(url) + gallery_id = self._match_id(url) - album_images = self._download_json( - 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, - album_id, fatal=False) + data = self._download_json( + 'https://imgur.com/gallery/%s.json' % gallery_id, + gallery_id)['data']['image'] - if album_images: - data = album_images.get('data') - if data and isinstance(data, dict): - images = data.get('images') - if images and isinstance(images, list): - entries = [ - self.url_result('http://imgur.com/%s' % image['hash']) - for image in images if image.get('hash')] - return self.playlist_result(entries, album_id) + if data.get('is_album'): + entries = [ + self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash']) + for image in data['album_images']['images'] if image.get('hash')] + return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description')) - # Fallback to single video - return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key()) + return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id) + + +class ImgurAlbumIE(ImgurGalleryIE): + IE_NAME = 'imgur:album' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P[a-zA-Z0-9]+)' + + _TESTS = [{ + 'url': 'http://imgur.com/a/j6Orj', + 'info_dict': { + 'id': 'j6Orj', + 'title': 'A Literary Analysis of "Star Wars: The Force Awakens"', + }, + 'playlist_count': 12, + }] From 3ad6dabd33307c0125fd462c4988083e360c40ad Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 9 Dec 2018 10:04:00 +0100 Subject: [PATCH 0186/1201] [aenetworks] add support for History Vault(closes #18460) --- youtube_dl/extractor/aenetworks.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 398e56ea3..85ec6392d 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE): class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' - IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' + IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = r'''(?x) https?:// (?:www\.)? (?P - (?:history|aetv|mylifetime|lifetimemovieclub)\.com| + (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com| fyi\.tv )/ (?: shows/(?P[^/]+(?:/[^/]+){0,2})| movies/(?P[^/]+)(?:/full-movie)?| - specials/(?P[^/]+)/full-special + specials/(?P[^/]+)/full-special| + collections/[^/]+/(?P[^/]+) ) ''' _TESTS = [{ @@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE): }, { 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', 'only_matching': True + }, { + 'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward', + 'only_matching': True }] _DOMAIN_TO_REQUESTOR_ID = { 'history.com': 'HISTORY', @@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE): } def _real_extract(self, url): - domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups() - display_id = show_path or movie_display_id or special_display_id - webpage = self._download_webpage(url, display_id) + domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups() + display_id = show_path or movie_display_id or special_display_id or collection_display_id + webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers()) if show_path: url_parts = show_path.split('/') url_parts_len = len(url_parts) From 5ee7ae5c7577c9c137a7b38edd5ad01ae3a40376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Dec 2018 22:28:24 +0700 Subject: [PATCH 0187/1201] [teachable] Add support for teachable based platform sites (closes #5451, closes #18150, closes #18272) --- youtube_dl/extractor/extractors.py | 8 +- youtube_dl/extractor/generic.py | 5 + .../extractor/{upskill.py => teachable.py} | 129 ++++++++++++++---- 3 files changed, 115 insertions(+), 27 deletions(-) rename youtube_dl/extractor/{upskill.py => teachable.py} (52%) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e28db6968..6a5d12ab1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1091,6 +1091,10 @@ from .tass import TassIE from .tastytrade import TastyTradeIE from .tbs import TBSIE from .tdslifeway import TDSLifewayIE +from .teachable import ( + TeachableIE, + TeachableCourseIE, +) from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, @@ -1240,10 +1244,6 @@ from .uplynk import ( UplynkIE, UplynkPreplayIE, ) -from .upskill import ( - UpskillIE, - UpskillCourseIE, -) from .urort import UrortIE from .urplay import URPlayIE from .usanetwork import USANetworkIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 59cf03faf..65b482333 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -109,6 +109,7 @@ from .vice import ViceIE from .xfileshare import XFileShareIE from .cloudflarestream import CloudflareStreamIE from .peertube import PeerTubeIE +from .teachable import TeachableIE from .indavideo import IndavideoEmbedIE from .apa import APAIE from .foxnews import FoxNewsIE @@ -3112,6 +3113,10 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key()) + teachable_url = TeachableIE._extract_url(webpage, url) + if teachable_url: + return self.url_result(teachable_url) + indavideo_urls = IndavideoEmbedIE._extract_urls(webpage) if indavideo_urls: return self.playlist_from_matches( diff --git a/youtube_dl/extractor/upskill.py b/youtube_dl/extractor/teachable.py similarity index 52% rename from youtube_dl/extractor/upskill.py rename to youtube_dl/extractor/teachable.py index 30297b4dd..a0c46b2e6 100644 --- a/youtube_dl/extractor/upskill.py +++ b/youtube_dl/extractor/teachable.py @@ -14,20 +14,38 @@ from ..utils import ( ) -class UpskillBaseIE(InfoExtractor): - _LOGIN_URL = 'http://upskillcourses.com/sign_in' - _NETRC_MACHINE = 'upskill' +class TeachableBaseIE(InfoExtractor): + _NETRC_MACHINE = 'teachable' + _URL_PREFIX = 'teachable:' + + _SITES = { + # Only notable ones here + 'upskillcourses.com': 'upskill', + 'academy.gns3.com': 'gns3', + 'academyhacker.com': 'academyhacker', + 'stackskills.com': 'stackskills', + 'market.saleshacker.com': 'saleshacker', + 'learnability.org': 'learnability', + 'edurila.com': 'edurila', + } + + _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) def _real_initialize(self): - self._login() + self._logged_in = False - def _login(self): - username, password = self._get_login_info() + def _login(self, site): + if self._logged_in: + return + + username, password = self._get_login_info( + netrc_machine=self._SITES.get(site, site)) if username is None: return login_page, urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Downloading login page') + 'https://%s/sign_in' % site, None, + 'Downloading %s login page' % site) login_url = compat_str(urlh.geturl()) @@ -46,18 +64,24 @@ class UpskillBaseIE(InfoExtractor): post_url = urljoin(login_url, post_url) response = self._download_webpage( - post_url, None, 'Logging in', + post_url, None, 'Logging in to %s' % site, data=urlencode_postdata(login_form), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': login_url, }) + if '>I accept the new Privacy Policy<' in response: + raise ExtractorError( + 'Unable to login: %s asks you to accept new Privacy Policy. ' + 'Go to https://%s/ and accept.' % (site, site), expected=True) + # Successful login if any(re.search(p, response) for p in ( r'class=["\']user-signout', r']+\bhref=["\']/sign_out', r'>\s*Log out\s*<')): + self._logged_in = True return message = get_element_by_class('alert', response) @@ -68,8 +92,14 @@ class UpskillBaseIE(InfoExtractor): raise ExtractorError('Unable to log in') -class UpskillIE(UpskillBaseIE): - _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P\d+)' +class TeachableIE(TeachableBaseIE): + _VALID_URL = r'''(?x) + (?: + %shttps?://(?P[^/]+)| + https?://(?:www\.)?(?P%s) + ) + /courses/[^/]+/lectures/(?P\d+) + ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [{ 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', @@ -77,7 +107,7 @@ class UpskillIE(UpskillBaseIE): 'id': 'uzw6zw58or', 'ext': 'mp4', 'title': 'Welcome to the Course!', - 'description': 'md5:8d66c13403783370af62ca97a7357bdd', + 'description': 'md5:65edb0affa582974de4625b9cdea1107', 'duration': 138.763, 'timestamp': 1479846621, 'upload_date': '20161122', @@ -88,10 +118,38 @@ class UpskillIE(UpskillBaseIE): }, { 'url': 'http://upskillcourses.com/courses/119763/lectures/1747100', 'only_matching': True, + }, { + 'url': 'https://academy.gns3.com/courses/423415/lectures/6885939', + 'only_matching': True, + }, { + 'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', + 'only_matching': True, }] + @staticmethod + def _is_teachable(webpage): + return 'teachableTracker.linker:autoLink' in webpage and re.search( + r']+href=["\']https?://process\.fs\.teachablecdn\.com', + webpage) + + @staticmethod + def _extract_url(webpage, source_url): + if not TeachableIE._is_teachable(webpage): + print('NOT TEACHABLE') + return + if re.match(r'https?://[^/]+/(?:courses|p)', source_url): + return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url) + def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + site = mobj.group('site') or mobj.group('site_t') + video_id = mobj.group('id') + + self._login(site) + + prefixed = url.startswith(self._URL_PREFIX) + if prefixed: + url = url[len(self._URL_PREFIX):] webpage = self._download_webpage(url, video_id) @@ -113,12 +171,18 @@ class UpskillIE(UpskillBaseIE): } -class UpskillCourseIE(UpskillBaseIE): - _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P[^/?#&]+)' +class TeachableCourseIE(TeachableBaseIE): + _VALID_URL = r'''(?x) + (?: + %shttps?://(?P[^/]+)| + https?://(?:www\.)?(?P%s) + ) + /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+) + ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [{ 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/', 'info_dict': { - 'id': '119763', + 'id': 'essential-web-developer-course', 'title': 'The Essential Web Developer Course (Free)', }, 'playlist_count': 192, @@ -128,21 +192,37 @@ class UpskillCourseIE(UpskillBaseIE): }, { 'url': 'http://upskillcourses.com/courses/enrolled/119763', 'only_matching': True, + }, { + 'url': 'https://academy.gns3.com/courses/enrolled/423415', + 'only_matching': True, + }, { + 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', + 'only_matching': True, + }, { + 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', + 'only_matching': True, }] @classmethod def suitable(cls, url): - return False if UpskillIE.suitable(url) else super( - UpskillCourseIE, cls).suitable(url) + return False if TeachableIE.suitable(url) else super( + TeachableCourseIE, cls).suitable(url) def _real_extract(self, url): - course_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + site = mobj.group('site') or mobj.group('site_t') + course_id = mobj.group('id') + + self._login(site) + + prefixed = url.startswith(self._URL_PREFIX) + if prefixed: + prefix = self._URL_PREFIX + url = url[len(prefix):] webpage = self._download_webpage(url, course_id) - course_id = self._search_regex( - r'data-course-id=["\'](\d+)', webpage, 'course id', - default=course_id) + url_base = 'https://%s/' % site entries = [] @@ -162,10 +242,13 @@ class UpskillCourseIE(UpskillBaseIE): title = self._html_search_regex( r']+class=["\']lecture-name[^>]+>([^<]+)', li, 'title', default=None) + entry_url = urljoin(url_base, lecture_url) + if prefixed: + entry_url = self._URL_PREFIX + entry_url entries.append( self.url_result( - urljoin('http://upskillcourses.com/', lecture_url), - ie=UpskillIE.ie_key(), video_id=lecture_id, + entry_url, + ie=TeachableIE.ie_key(), video_id=lecture_id, video_title=clean_html(title))) course_title = self._html_search_regex( From 9e02c2c704d15d56b92ebe2fc8d481f99f5d106d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Dec 2018 22:56:33 +0700 Subject: [PATCH 0188/1201] [YoutubeDLCookieJar] Add test for keeping session cookies --- test/test_YoutubeDLCookieJar.py | 34 +++++++++++++++++++++++ test/testdata/cookies/session_cookies.txt | 6 ++++ 2 files changed, 40 insertions(+) create mode 100644 test/test_YoutubeDLCookieJar.py create mode 100644 test/testdata/cookies/session_cookies.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py new file mode 100644 index 000000000..6a8243590 --- /dev/null +++ b/test/test_YoutubeDLCookieJar.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import os +import re +import sys +import tempfile +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import YoutubeDLCookieJar + + +class TestYoutubeDLCookieJar(unittest.TestCase): + def test_keep_session_cookies(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + tf = tempfile.NamedTemporaryFile(delete=False) + try: + cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) + temp = tf.read().decode('utf-8') + self.assertTrue(re.search( + r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) + self.assertTrue(re.search( + r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp)) + finally: + tf.close() + os.remove(tf.name) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt new file mode 100644 index 000000000..91e5c9231 --- /dev/null +++ b/test/testdata/cookies/session_cookies.txt @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value +www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue From 24cc64254c03c56229654b13d833c600d7048bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Dec 2018 23:08:16 +0700 Subject: [PATCH 0189/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 689d07826..6afee3a28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version + +Core +* [YoutubeDL] Keep session cookies in cookie file between runs +* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929) + +Extractors ++ [teachable] Add support for teachable platform sites (#5451, #18150, #18272) ++ [aenetworks] Add support for historyvault.com (#18460) +* [imgur] Improve gallery and album detection and extraction (#9133, #16577, + #17223, #18404) +* [iprima] Relax URL regular expression (#18453) +* [hotstar] Fix video data extraction (#18386) +* [ard:mediathek] Fix title and description extraction (#18349, #18371) +* [xvideos] Switch to HTTPS (#18422, #18427) ++ [lecturio] Add support for lecturio.com (#18405) ++ [nrktv:series] Add support for extra materials +* [nrktv:season,series] Fix extraction (#17159, #17258) +* [nrktv] Relax URL regular expression (#18304, #18387) +* [yourporn] Fix extraction (#18424, #18425) +* [tbs] Fix info extraction (#18403) ++ [gamespot] Add support for review URLs + + version 2018.12.03 Core From cefe42c412168e95b78dcefc5cdba608dba7dd02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Dec 2018 23:11:32 +0700 Subject: [PATCH 0190/1201] release 2018.12.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 11 +++++++---- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4b35244a8..1ccf410a7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.12.03 +[debug] youtube-dl version 2018.12.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6afee3a28..9aef67b52 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.12.09 Core * [YoutubeDL] Keep session cookies in cookie file between runs diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 837b0199b..31d20f255 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -33,7 +33,7 @@ - **AdobeTVShow** - **AdobeTVVideo** - **AdultSwim** - - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network + - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **afreecatv**: afreecatv.com - **AirMozilla** - **AliExpressLive** @@ -376,7 +376,8 @@ - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists - **Imgur** - - **ImgurAlbum** + - **imgur:album** + - **imgur:gallery** - **Ina** - **Inc** - **IndavideoEmbed** @@ -435,6 +436,8 @@ - **Le**: 乐视网 - **Learnr** - **Lecture2Go** + - **Lecturio** + - **LecturioCourse** - **LEGO** - **Lemonde** - **Lenta** @@ -853,6 +856,8 @@ - **TastyTrade** - **TBS** - **TDSLifeway** + - **Teachable** + - **TeachableCourse** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** @@ -961,8 +966,6 @@ - **uol.com.br** - **uplynk** - **uplynk:preplay** - - **Upskill** - - **UpskillCourse** - **Urort**: NRK P3 Urørt - **URPlay** - **USANetwork** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8e1203892..d15b9583f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.12.03' +__version__ = '2018.12.09' From 59c3940165efaee6705ff06ef4fc4ac2c701107d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Dec 2018 01:37:10 +0700 Subject: [PATCH 0191/1201] [ard:mediathek] Add support for classic.ardmediathek.de (closes #18473) --- youtube_dl/extractor/ard.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 84e96f769..2e1536a1b 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -21,7 +21,7 @@ from ..compat import compat_etree_fromstring class ARDMediathekIE(InfoExtractor): IE_NAME = 'ARD:mediathek' - _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' + _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' _TESTS = [{ # available till 26.07.2022 @@ -51,6 +51,9 @@ class ARDMediathekIE(InfoExtractor): # audio 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', 'only_matching': True, + }, { + 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', + 'only_matching': True, }] def _extract_media_info(self, media_info_url, webpage, video_id): From 6e29458f2479c4535f8605032b9492f52bb37f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Dec 2018 04:30:00 +0700 Subject: [PATCH 0192/1201] [test/testdata/cookies/session_cookies.txt] Fix empty expires test data --- test/testdata/cookies/session_cookies.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt index 91e5c9231..f6996f031 100644 --- a/test/testdata/cookies/session_cookies.txt +++ b/test/testdata/cookies/session_cookies.txt @@ -2,5 +2,5 @@ # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. +www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value -www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue From 102a4e54c5c0819233773cc15398bc901a218f4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Dec 2018 10:10:28 +0700 Subject: [PATCH 0193/1201] [teachable] Remove debug output --- youtube_dl/extractor/teachable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index a0c46b2e6..47ac95ee8 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -135,7 +135,6 @@ class TeachableIE(TeachableBaseIE): @staticmethod def _extract_url(webpage, source_url): if not TeachableIE._is_teachable(webpage): - print('NOT TEACHABLE') return if re.match(r'https?://[^/]+/(?:courses|p)', source_url): return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url) From 13e17cd28e7f9e3bd8be4fa7b073a5cb96f5959f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 10 Dec 2018 14:59:57 +0100 Subject: [PATCH 0194/1201] [uol] fix format url extraction(closes 18480) --- youtube_dl/extractor/uol.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index e67083004..08f0c072e 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -61,7 +61,7 @@ class UOLIE(InfoExtractor): 'height': 360, }, '5': { - 'width': 1080, + 'width': 1280, 'height': 720, }, '6': { @@ -80,6 +80,10 @@ class UOLIE(InfoExtractor): 'width': 568, 'height': 320, }, + '11': { + 'width': 640, + 'height': 360, + } } def _real_extract(self, url): @@ -111,19 +115,31 @@ class UOLIE(InfoExtractor): 'ver': video_data.get('numRevision', 2), 'r': 'http://mais.uol.com.br', } + for k in ('token', 'sign'): + v = video_data.get(k) + if v: + query[k] = v + formats = [] for f in video_data.get('formats', []): f_url = f.get('url') or f.get('secureUrl') if not f_url: continue + f_url = update_url_query(f_url, query) format_id = str_or_none(f.get('id')) + if format_id == '10': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + continue fmt = { 'format_id': format_id, - 'url': update_url_query(f_url, query), + 'url': f_url, + 'source_preference': 1, } fmt.update(self._FORMATS.get(format_id, {})) formats.append(fmt) - self._sort_formats(formats) + self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext')) tags = [] for tag in video_data.get('tags', []): From 0a05cfabb6338be750474a7286ce0d72a4d7c142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Dec 2018 23:45:02 +0700 Subject: [PATCH 0195/1201] [lecturio] Improve subtitles extraction (closes #18488) --- youtube_dl/extractor/lecturio.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 62ff28e02..0f1265cdf 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -136,9 +136,15 @@ class LecturioIE(LecturioBaseIE): cc_url = url_or_none(cc_url) if not cc_url: continue - sub_dict = automatic_captions if 'auto-translated' in cc_label else subtitles lang = self._search_regex( - r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0]) + r'/([a-z]{2})_', cc_url, 'lang', + default=cc_label.split()[0] if cc_label else 'en') + original_lang = self._search_regex( + r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang', + default=None) + sub_dict = (automatic_captions + if 'auto-translated' in cc_label or original_lang + else subtitles) sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({ 'url': cc_url, }) From 8fe104947d6b772ccacd0b8af51d1adb16865e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Dec 2018 22:25:12 +0700 Subject: [PATCH 0196/1201] [youtube] Fix multifeed extraction (closes #18531) --- youtube_dl/extractor/youtube.py | 52 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3f49f3889..c582ab2ff 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1712,30 +1712,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: video_description = '' - if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False): + if not smuggled_data.get('force_singlefeed', False): if not self._downloader.params.get('noplaylist'): - entries = [] - feed_ids = [] - multifeed_metadata_list = video_info['multifeed_metadata_list'][0] - for feed in multifeed_metadata_list.split(','): - # Unquote should take place before split on comma (,) since textual - # fields may contain comma as well (see - # https://github.com/rg3/youtube-dl/issues/8536) - feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) - entries.append({ - '_type': 'url_transparent', - 'ie_key': 'Youtube', - 'url': smuggle_url( - '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]), - {'force_singlefeed': True}), - 'title': '%s (%s)' % (video_title, feed_data['title'][0]), - }) - feed_ids.append(feed_data['id'][0]) - self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) - return self.playlist_result(entries, video_id, video_title, video_description) - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + multifeed_metadata_list = try_get( + player_response, + lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'], + compat_str) or try_get( + video_info, lambda x: x['multifeed_metadata_list'][0], compat_str) + if multifeed_metadata_list: + entries = [] + feed_ids = [] + for feed in multifeed_metadata_list.split(','): + # Unquote should take place before split on comma (,) since textual + # fields may contain comma as well (see + # https://github.com/rg3/youtube-dl/issues/8536) + feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) + entries.append({ + '_type': 'url_transparent', + 'ie_key': 'Youtube', + 'url': smuggle_url( + '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]), + {'force_singlefeed': True}), + 'title': '%s (%s)' % (video_title, feed_data['title'][0]), + }) + feed_ids.append(feed_data['id'][0]) + self.to_screen( + 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' + % (', '.join(feed_ids), video_id)) + return self.playlist_result(entries, video_id, video_title, video_description) + else: + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) if view_count is None: view_count = extract_view_count(video_info) From 7f41a598b3fba1bcab2817de64a08941200aa3c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Dec 2018 23:08:14 +0700 Subject: [PATCH 0197/1201] [safari] Add support for learning.oreilly.com (closes #18510) --- youtube_dl/extractor/safari.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 30e2a38b4..c0d32a1b9 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -15,10 +15,10 @@ from ..utils import ( class SafariBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' + _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' _NETRC_MACHINE = 'safari' - _API_BASE = 'https://www.safaribooksonline.com/api/v1' + _API_BASE = 'https://learning.oreilly.com/api/v1' _API_FORMAT = 'json' LOGGED_IN = False @@ -76,7 +76,7 @@ class SafariIE(SafariBaseIE): IE_DESC = 'safaribooksonline.com online video' _VALID_URL = r'''(?x) https?:// - (?:www\.)?safaribooksonline\.com/ + (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ (?: library/view/[^/]+/(?P[^/]+)/(?P[^/?\#&]+)\.html| videos/[^/]+/[^/]+/(?P[^-]+-[^/?\#&]+) @@ -104,6 +104,9 @@ class SafariIE(SafariBaseIE): }, { 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', + 'only_matching': True, }] _PARTNER_ID = '1926081' @@ -160,7 +163,7 @@ class SafariIE(SafariBaseIE): class SafariApiIE(SafariBaseIE): IE_NAME = 'safari:api' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', @@ -185,7 +188,7 @@ class SafariCourseIE(SafariBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?safaribooksonline\.com/ + (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ (?: library/view/[^/]+| api/v1/book| @@ -213,6 +216,9 @@ class SafariCourseIE(SafariBaseIE): }, { 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', + 'only_matching': True, }] @classmethod From c984196cf1c9eb34725091e07a4f094b7eea1d4f Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sat, 15 Dec 2018 23:59:17 +0700 Subject: [PATCH 0198/1201] [README.md] Bind info dict URLs to a fixed blob (closes #18492) --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 35c3de512..a1d2904c0 100644 --- a/README.md +++ b/README.md @@ -1024,7 +1024,7 @@ After you have ensured this site is distributing its content legally, you can fo ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: @@ -1045,7 +1045,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: - `id` (media identifier) - `title` (media title) @@ -1053,7 +1053,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. -[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example From 21c340b83fb41094ef59b87d52c4eb1d90d1df04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 16 Dec 2018 19:35:48 +0700 Subject: [PATCH 0199/1201] [youtube] Fix mark watched (closes #18546) --- youtube_dl/extractor/youtube.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c582ab2ff..44c25c11c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -48,6 +48,7 @@ from ..utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + url_or_none, urlencode_postdata, ) @@ -1386,8 +1387,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._downloader.report_warning(err_msg) return {} - def _mark_watched(self, video_id, video_info): - playback_url = video_info.get('videostats_playback_base_url', [None])[0] + def _mark_watched(self, video_id, video_info, player_response): + playback_url = url_or_none(try_get( + player_response, + lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get( + video_info, lambda x: x['videostats_playback_base_url'][0])) if not playback_url: return parsed_playback_url = compat_urlparse.urlparse(playback_url) @@ -2122,7 +2126,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._sort_formats(formats) - self.mark_watched(video_id, video_info) + self.mark_watched(video_id, video_info, player_response) return { 'id': video_id, From c8b37510868b20b0829583557899d70db2ea6243 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 16 Dec 2018 14:28:40 +0100 Subject: [PATCH 0200/1201] [vrv] fix initial state extraction --- youtube_dl/extractor/vrv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index ac0819c7c..483a3be3a 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -120,8 +120,10 @@ class VRVIE(VRVBaseIE): url, video_id, headers=self.geo_verification_headers()) media_resource = self._parse_json(self._search_regex( - r'window\.__INITIAL_STATE__\s*=\s*({.+?})', - webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} + [ + r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:|;)', + r'window\.__INITIAL_STATE__\s*=\s*({.+})' + ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} video_data = media_resource.get('json') if not video_data: From 90046d77616f8f24e3716c64443118a9dcbec996 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Sun, 16 Dec 2018 17:10:36 -0400 Subject: [PATCH 0201/1201] [iprima] Relax _VALID_URL (closes #18515) --- youtube_dl/extractor/iprima.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 11a6629d2..11bbeb592 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -12,7 +12,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://(?:play|prima|www)\.iprima\.cz/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P[^/?#&]+)' _GEO_BYPASS = False _TESTS = [{ @@ -44,6 +44,21 @@ class IPrimaIE(InfoExtractor): }, { 'url': 'http://www.iprima.cz/filmy/desne-rande', 'only_matching': True, + }, { + 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby', + 'only_matching': True, + }, { + 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy', + 'only_matching': True, + }, { + 'url': 'https://cool.iprima.cz/derava-silnice-nevadi', + 'only_matching': True, + }, { + 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi', + 'only_matching': True, + }, { + 'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1', + 'only_matching': True, }] def _real_extract(self, url): From 252e172dea96b90191682afac837535de4d33107 Mon Sep 17 00:00:00 2001 From: Tim Landscheidt Date: Sun, 16 Dec 2018 21:29:12 +0000 Subject: [PATCH 0202/1201] [acast] Add support for embed.acast.com --- youtube_dl/extractor/acast.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 6d846ea7a..1fbff705d 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -17,25 +17,8 @@ from ..utils import ( class ACastIE(InfoExtractor): IE_NAME = 'acast' - _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)' + _VALID_URL = r'https?://(?:(?:embed|www)\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)' _TESTS = [{ - # test with one bling - 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan', - 'md5': 'ada3de5a1e3a2a381327d749854788bb', - 'info_dict': { - 'id': '57de3baa-4bb0-487e-9418-2692c1277a34', - 'ext': 'mp3', - 'title': '"Where Are You?": Taipei 101, Taiwan', - 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', - 'timestamp': 1196172000, - 'upload_date': '20071127', - 'duration': 211, - 'creator': 'Concierge', - 'series': 'Condé Nast Traveler Podcast', - 'episode': '"Where Are You?": Taipei 101, Taiwan', - } - }, { - # test with multiple blings 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', 'info_dict': { @@ -50,6 +33,9 @@ class ACastIE(InfoExtractor): 'series': 'Spår', 'episode': '2. Raggarmordet - Röster ur det förflutna', } + }, { + 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', + 'only_matching': True, }] def _real_extract(self, url): From 50a498a68e2f6754f5b26e5ad1f6eabfd9aeeb14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Dec 2018 04:32:59 +0700 Subject: [PATCH 0203/1201] [acast] Extend _VALID_URL --- youtube_dl/extractor/acast.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 1fbff705d..b32f74a37 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -17,7 +17,14 @@ from ..utils import ( class ACastIE(InfoExtractor): IE_NAME = 'acast' - _VALID_URL = r'https?://(?:(?:embed|www)\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:embed|www)\.)?acast\.com/| + play\.acast\.com/s/ + ) + (?P[^/]+)/(?P[^/#?]+) + ''' _TESTS = [{ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', @@ -36,6 +43,9 @@ class ACastIE(InfoExtractor): }, { 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', + 'only_matching': True, }] def _real_extract(self, url): From 1c82122741783adf6653df25fa81ef0f95a22279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Dec 2018 04:51:57 +0700 Subject: [PATCH 0204/1201] [ard:beta] Relax _VALID_URL (closes #18441) --- youtube_dl/extractor/ard.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 2e1536a1b..a5df7f0f9 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -56,6 +56,10 @@ class ARDMediathekIE(InfoExtractor): 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) + def _extract_media_info(self, media_info_url, webpage, video_id): media_info = self._download_json( media_info_url, video_id, 'Downloading media JSON') @@ -296,7 +300,7 @@ class ARDIE(InfoExtractor): class ARDBetaMediathekIE(InfoExtractor): - _VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P[a-zA-Z0-9]+)/(?P[^/?#]+)' + _VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P[a-zA-Z0-9]+)(?:/(?P[^/?#]+))?' _TESTS = [{ 'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita', 'md5': '2d02d996156ea3c397cfc5036b5d7f8f', @@ -310,12 +314,18 @@ class ARDBetaMediathekIE(InfoExtractor): 'upload_date': '20180826', 'ext': 'mp4', }, + }, { + 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/', + 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('video_id') - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or video_id webpage = self._download_webpage(url, display_id) data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json') From 752582183a1942b12880139039137b3a60962611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Dec 2018 05:29:59 +0700 Subject: [PATCH 0205/1201] [ard:beta] Improve extraction robustness, fix subtitles extraction, improve geo restricted videos extraction --- youtube_dl/extractor/ard.py | 58 +++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index a5df7f0f9..8adae4644 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,13 +8,16 @@ from .generic import GenericIE from ..utils import ( determine_ext, ExtractorError, - qualities, int_or_none, parse_duration, + qualities, + str_or_none, + try_get, unified_strdate, - xpath_text, + unified_timestamp, update_url_query, url_or_none, + xpath_text, ) from ..compat import compat_etree_fromstring @@ -336,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor): 'display_id': display_id, } formats = [] + subtitles = {} + geoblocked = False for widget in data.values(): - if widget.get('_geoblocked'): - raise ExtractorError('This video is not available due to geoblocking', expected=True) - + if widget.get('_geoblocked') is True: + geoblocked = True if '_duration' in widget: - res['duration'] = widget['_duration'] + res['duration'] = int_or_none(widget['_duration']) if 'clipTitle' in widget: res['title'] = widget['clipTitle'] if '_previewImage' in widget: res['thumbnail'] = widget['_previewImage'] if 'broadcastedOn' in widget: - res['upload_date'] = unified_strdate(widget['broadcastedOn']) + res['timestamp'] = unified_timestamp(widget['broadcastedOn']) if 'synopsis' in widget: res['description'] = widget['synopsis'] - if '_subtitleUrl' in widget: - res['subtitles'] = {'de': [{ + subtitle_url = url_or_none(widget.get('_subtitleUrl')) + if subtitle_url: + subtitles.setdefault('de', []).append({ 'ext': 'ttml', - 'url': widget['_subtitleUrl'], - }]} + 'url': subtitle_url, + }) if '_quality' in widget: - format_url = widget['_stream']['json'][0] - - if format_url.endswith('.f4m'): + format_url = url_or_none(try_get( + widget, lambda x: x['_stream']['json'][0])) + if not format_url: + continue + ext = determine_ext(format_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( format_url + '?hdcore=3.11.0', video_id, f4m_id='hds', fatal=False)) - elif format_url.endswith('m3u8'): + elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + format_url, video_id, 'mp4', m3u8_id='hls', + fatal=False)) else: + # HTTP formats are not available when geoblocked is True, + # other formats are fine though + if geoblocked: + continue + quality = str_or_none(widget.get('_quality')) formats.append({ - 'format_id': 'http-' + widget['_quality'], + 'format_id': ('http-' + quality) if quality else 'http', 'url': format_url, 'preference': 10, # Plain HTTP, that's nice }) + if not formats and geoblocked: + self.raise_geo_restricted( + msg='This video is not available due to geoblocking', + countries=['DE']) + self._sort_formats(formats) - res['formats'] = formats + res.update({ + 'subtitles': subtitles, + 'formats': formats, + }) return res From cbb3e4b14f59fe2dba5983f7e55308eba2ea025b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Dec 2018 05:34:55 +0700 Subject: [PATCH 0206/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9aef67b52..291ae42bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version + +Extractors +* [ard:beta] Improve geo restricted videos extraction +* [ard:beta] Fix subtitles extraction +* [ard:beta] Improve extraction robustness +* [ard:beta] Relax URL regular expression (#18441) +* [acast] Add support for embed.acast.com and play.acast.com (#18483) +* [iprima] Relax URL regular expression (#18515, #18540) +* [vrv] Fix initial state extraction (#18553) +* [youtube] Fix mark watched (#18546) ++ [safari] Add support for learning.oreilly.com (#18510) +* [youtube] Fix multifeed extraction (#18531) +* [lecturio] Improve subtitles extraction (#18488) +* [uol] Fix format URL extraction (#18480) ++ [ard:mediathek] Add support for classic.ardmediathek.de (#18473) + + version 2018.12.09 Core From 4cee62ade0d991eedb2feae927c44370be3c389e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Dec 2018 05:37:50 +0700 Subject: [PATCH 0207/1201] release 2018.12.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1ccf410a7..b84559932 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.12.09 +[debug] youtube-dl version 2018.12.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bbcb78808..cba87190d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -152,7 +152,7 @@ After you have ensured this site is distributing its content legally, you can fo ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: @@ -173,7 +173,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: - `id` (media identifier) - `title` (media title) @@ -181,7 +181,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. -[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example diff --git a/ChangeLog b/ChangeLog index 291ae42bb..510013e89 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.12.17 Extractors * [ard:beta] Improve geo restricted videos extraction diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d15b9583f..0461aac0c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.12.09' +__version__ = '2018.12.17' From 7216e9bff71b4c537bb8d56386d789bb83f921f9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 17 Dec 2018 16:34:51 +0100 Subject: [PATCH 0208/1201] [discovery] Add support for Scripps Networks watch domains(closes #17947) --- youtube_dl/extractor/discovery.py | 35 +++++++++++++++++-------- youtube_dl/extractor/scrippsnetworks.py | 29 +++++--------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 3589bd428..44fbc41bb 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -17,16 +17,29 @@ from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?P - discovery| - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc| - velocity + _VALID_URL = r'''(?x)https?:// + (?P + (?:www\.)? + (?: + discovery| + investigationdiscovery| + discoverylife| + animalplanet| + ahctv| + destinationamerica| + sciencechannel| + tlc| + velocity + )| + watch\. + (?: + hgtv| + foodnetwork| + travelchannel| + diynetwork| + cookingchanneltv| + motortrend + ) )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))''' _TESTS = [{ 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', @@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): if not access_token: access_token = self._download_json( - 'https://www.%s.com/anonymous' % site, display_id, query={ + 'https://%s.com/anonymous' % site, display_id, query={ 'authRel': 'authorization', 'client_id': try_get( react_data, lambda x: x['application']['apiClientId'], diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 4023aeef8..8b3275735 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -19,7 +19,7 @@ class ScrippsNetworksWatchIE(AWSIE): _VALID_URL = r'''(?x) https?:// watch\. - (?Phgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/ + (?Pgeniuskitchen)\.com/ (?: player\.[A-Z0-9]+\.html\#| show/(?:[^/]+/){2}| @@ -28,38 +28,23 @@ class ScrippsNetworksWatchIE(AWSIE): (?P\d+) ''' _TESTS = [{ - 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/', - 'md5': '26545fd676d939954c6808274bdb905a', + 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', 'info_dict': { - 'id': '4173834', + 'id': '4194875', 'ext': 'mp4', - 'title': 'Best Ever Treehouses', - 'description': "We're searching for the most over the top treehouses.", + 'title': 'Ample Hills Ice Cream Bike', + 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', 'uploader': 'ANV', - 'upload_date': '20170922', - 'timestamp': 1506056400, + 'upload_date': '20171011', + 'timestamp': 1507698000, }, 'params': { 'skip_download': True, }, 'add_ie': [AnvatoIE.ie_key()], - }, { - 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/', - 'only_matching': True, - }, { - 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646', - 'only_matching': True, - }, { - 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', - 'only_matching': True, }] _SNI_TABLE = { - 'hgtv': 'hgtv', - 'diynetwork': 'diy', - 'foodnetwork': 'food', - 'cookingchanneltv': 'cook', - 'travelchannel': 'trav', 'geniuskitchen': 'genius', } From 386d1fea79e861b11c77a7353c07eb162fbb4dae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 17 Dec 2018 23:43:12 +0700 Subject: [PATCH 0209/1201] [lecturio] Add support for lecturio.de (closes #18562) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/lecturio.py | 45 +++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a5d12ab1..d72f52e36 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -557,6 +557,7 @@ from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, LecturioCourseIE, + LecturioDeCourseIE, ) from .leeco import ( LeIE, diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 0f1265cdf..24f78d928 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -64,8 +64,14 @@ class LecturioBaseIE(InfoExtractor): class LecturioIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P[^/?#&]+)\.lecture' - _TEST = { + _VALID_URL = r'''(?x) + https:// + (?: + app\.lecturio\.com/[^/]+/(?P[^/?#&]+)\.lecture| + (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag + ) + ''' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', 'info_dict': { @@ -74,7 +80,10 @@ class LecturioIE(LecturioBaseIE): 'title': 'Important Concepts and Terms – Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] _CC_LANGS = { 'German': 'de', @@ -86,7 +95,8 @@ class LecturioIE(LecturioBaseIE): } def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') or mobj.group('id_de') webpage = self._download_webpage( 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, @@ -190,3 +200,30 @@ class LecturioCourseIE(LecturioBaseIE): 'title', default=None) return self.playlist_result(entries, display_id, title) + + +class LecturioDeCourseIE(LecturioBaseIE): + _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs' + _TEST = { + 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', + 'only_matching': True, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer( + r'(?s)]+\bdata-lecture-id=["\'](?P\d+).+?\bhref=(["\'])(?P(?:(?!\2).)+\.vortrag)\b[^>]+>', + webpage): + lecture_url = urljoin(url, mobj.group('url')) + lecture_id = mobj.group('id') + entries.append(self.url_result( + lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) + + title = self._search_regex( + r']*>([^<]+)', webpage, 'title', default=None) + + return self.playlist_result(entries, display_id, title) From cfd13c4c45f63c408a3e53ac31561c286c9a3acc Mon Sep 17 00:00:00 2001 From: Daan van Vugt Date: Mon, 17 Dec 2018 18:03:00 +0100 Subject: [PATCH 0210/1201] [mediasite] Relax _VALID_URL --- youtube_dl/extractor/mediasite.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py index 84876b883..de37df0c6 100644 --- a/youtube_dl/extractor/mediasite.py +++ b/youtube_dl/extractor/mediasite.py @@ -21,7 +21,7 @@ from ..utils import ( class MediasiteIE(InfoExtractor): - _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P[0-9a-f]{32,34})(?P\?[^#]+|)' + _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/livebroadcast/Presentation)/(?P[0-9a-f]{32,34})(?P\?[^#]+|)' _TESTS = [ { 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d', @@ -84,7 +84,11 @@ class MediasiteIE(InfoExtractor): 'timestamp': 1333983600, 'duration': 7794, } - } + }, + { + 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d', + 'only_matching': True, + }, ] # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) From 4ee184545455d1ebf4a9f97f86ade2c4a3b3e03b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 18 Dec 2018 01:55:13 +0700 Subject: [PATCH 0211/1201] [mediasite] Extend _VALID_URL even more --- youtube_dl/extractor/mediasite.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py index de37df0c6..ef9628e65 100644 --- a/youtube_dl/extractor/mediasite.py +++ b/youtube_dl/extractor/mediasite.py @@ -21,7 +21,7 @@ from ..utils import ( class MediasiteIE(InfoExtractor): - _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/livebroadcast/Presentation)/(?P[0-9a-f]{32,34})(?P\?[^#]+|)' + _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P[0-9a-f]{32,34})(?P\?[^#]+|)' _TESTS = [ { 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d', @@ -89,6 +89,10 @@ class MediasiteIE(InfoExtractor): 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d', 'only_matching': True, }, + { + 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d', + 'only_matching': True, + }, ] # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) From 65e29cdac3f3ee14e07c200293438426b1135719 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 18 Dec 2018 22:46:19 +0100 Subject: [PATCH 0212/1201] [twitter] pass referer with card request(closes #18579) --- youtube_dl/extractor/twitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index de41065d6..41d0b6be8 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -171,7 +171,8 @@ class TwitterCardIE(TwitterBaseIE): urls.append('https://twitter.com/i/videos/' + video_id) for u in urls: - webpage = self._download_webpage(u, video_id) + webpage = self._download_webpage( + u, video_id, headers={'Referer': 'https://twitter.com/'}) iframe_url = self._html_search_regex( r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', From 904bb599be0fcf5fa861f17eee757c3c9494208b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elan=20Ruusam=C3=A4e?= Date: Wed, 19 Dec 2018 22:22:10 +0200 Subject: [PATCH 0213/1201] [README.md] Add flake8 instructions --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a1d2904c0..b3c39bf66 100644 --- a/README.md +++ b/README.md @@ -1025,15 +1025,19 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): + + $ flake8 youtube_dl/extractor/yourextractor.py + +9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor -10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. +11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! From 835e45abab88e2a1661b86be255e06323e88ec7a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 19 Dec 2018 22:07:37 +0100 Subject: [PATCH 0214/1201] [crackle] extract ism and http formats --- youtube_dl/extractor/crackle.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 8dd9d6687..f73ef6b63 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -48,6 +48,21 @@ class CrackleIE(InfoExtractor): 'only_matching': True, }] + _MEDIA_FILE_SLOTS = { + '360p.mp4': { + 'width': 640, + 'height': 360, + }, + '480p.mp4': { + 'width': 768, + 'height': 432, + }, + '480p_1mbps.mp4': { + 'width': 852, + 'height': 480, + }, + } + def _real_extract(self, url): video_id = self._match_id(url) @@ -95,6 +110,20 @@ class CrackleIE(InfoExtractor): elif ext == 'mpd': formats.extend(self._extract_mpd_formats( format_url, video_id, mpd_id='dash', fatal=False)) + elif format_url.endswith('.ism/Manifest'): + formats.extend(self._extract_ism_formats( + format_url, video_id, ism_id='mss', fatal=False)) + else: + mfs_path = e.get('Type') + mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) + if not mfs_info: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-' + mfs_path.split('.')[0], + 'width': mfs_info['width'], + 'height': mfs_info['height'], + }) self._sort_formats(formats) description = media.get('Description') From e1a0628797e2dc7e5e032f853ab4ddc6a7a08020 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 20 Dec 2018 23:22:51 +0100 Subject: [PATCH 0215/1201] [liveleak] add support for another embed type and restore original format extraction --- youtube_dl/extractor/liveleak.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 26671753c..22a067e40 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -87,7 +87,7 @@ class LiveLeakIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return re.findall( - r']+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"', + r']+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"', webpage) def _real_extract(self, url): @@ -120,13 +120,27 @@ class LiveLeakIE(InfoExtractor): } for idx, info_dict in enumerate(entries): + formats = [] for a_format in info_dict['formats']: if not a_format.get('height'): a_format['height'] = int_or_none(self._search_regex( r'([0-9]+)p\.mp4', a_format['url'], 'height label', default=None)) + formats.append(a_format) - self._sort_formats(info_dict['formats']) + # Removing '.*.mp4' gives the raw video, which is essentially + # the same video without the LiveLeak logo at the top (see + # https://github.com/rg3/youtube-dl/pull/4768) + orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url']) + if a_format['url'] != orig_url: + format_id = a_format.get('format_id') + formats.append({ + 'format_id': 'original' + ('-' + format_id if format_id else ''), + 'url': orig_url, + 'preference': 1, + }) + self._sort_formats(formats) + info_dict['formats'] = formats # Don't append entry ID for one-video pages to keep backward compatibility if len(entries) > 1: @@ -146,7 +160,7 @@ class LiveLeakIE(InfoExtractor): class LiveLeakEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P[if])=(?P[\w_]+)' + _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P[ift])=(?P[\w_]+)' # See generic.py for actual test cases _TESTS = [{ @@ -158,15 +172,14 @@ class LiveLeakEmbedIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - kind, video_id = mobj.group('kind', 'id') + kind, video_id = re.match(self._VALID_URL, url).groups() if kind == 'f': webpage = self._download_webpage(url, video_id) liveleak_url = self._search_regex( - r'logourl\s*:\s*(?P[\'"])(?P%s)(?P=q1)' % LiveLeakIE._VALID_URL, + r'(?:logourl\s*:\s*|window\.open\()(?P[\'"])(?P%s)(?P=q1)' % LiveLeakIE._VALID_URL, webpage, 'LiveLeak URL', group='url') - elif kind == 'i': - liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id + else: + liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id) return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key()) From 4273caf5c7e4a79783106938802c1f4e1aa9c950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Dec 2018 16:38:16 +0700 Subject: [PATCH 0216/1201] [youtube] Extend html5 player regex (closes #17516) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44c25c11c..906774875 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1834,7 +1834,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: player_version = self._search_regex( [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', - r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'], + r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version From 63529e935cf5f87e6080607ef9d9196fe435e092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Dec 2018 16:57:10 +0700 Subject: [PATCH 0217/1201] [youtube] Relax html5 player regexes (closes #18465, closes #18466) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 906774875..954853e00 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1105,7 +1105,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P[a-z]+)$', + r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -1834,7 +1834,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: player_version = self._search_regex( [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', - r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'], + r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version From 825cd268a3a02e80a9c36dada664d25074243d26 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 26 Dec 2018 09:30:48 +0100 Subject: [PATCH 0218/1201] [youtube] detect DRM protected videos(#1774) --- youtube_dl/extractor/youtube.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 954853e00..4b06cb597 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1077,6 +1077,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://invidio.us/watch?v=BaW_jenozKc', 'only_matching': True, }, + { + # DRM protected + 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', + 'only_matching': True, + } ] def __init__(self, *args, **kwargs): @@ -1673,6 +1678,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '"token" parameter not in video info for unknown reason', video_id=video_id) + if video_info.get('license_info'): + raise ExtractorError('This video is DRM protected.', expected=True) + video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} From 140a13f5de1ff8e6bf7ba0d27b38063a2b20fa33 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 26 Dec 2018 10:55:28 +0100 Subject: [PATCH 0219/1201] [youtube] extract more format metadata --- youtube_dl/extractor/youtube.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4b06cb597..ac3b4fc17 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1794,6 +1794,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'height': int_or_none(width_height[1]), } q = qualities(['small', 'medium', 'hd720']) + streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) + if streaming_formats: + for fmt in streaming_formats: + itag = str_or_none(fmt.get('itag')) + if not itag: + continue + quality = fmt.get('quality') + quality_label = fmt.get('qualityLabel') or quality + formats_spec[itag] = { + 'asr': int_or_none(fmt.get('audioSampleRate')), + 'filesize': int_or_none(fmt.get('contentLength')), + 'format_note': quality_label, + 'fps': int_or_none(fmt.get('fps')), + 'height': int_or_none(fmt.get('height')), + 'quality': q(quality), + # bitrate for itag 43 is always 2147483647 + 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None, + 'width': int_or_none(fmt.get('width')), + } formats = [] for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) @@ -1876,7 +1895,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): filesize = int_or_none(url_data.get( 'clen', [None])[0]) or _extract_filesize(url) - quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0] + quality = url_data.get('quality', [None])[0] more_fields = { 'filesize': filesize, @@ -1884,7 +1903,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'width': width, 'height': height, 'fps': int_or_none(url_data.get('fps', [None])[0]), - 'format_note': quality, + 'format_note': url_data.get('quality_label', [None])[0] or quality, 'quality': q(quality), } for key, value in more_fields.items(): From c2dd2dc086653f46defe0bc6bee9b5e339e67dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Dec 2018 19:57:01 +0700 Subject: [PATCH 0220/1201] [youtube] Unescape HTML for series (closes #18641) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ac3b4fc17..539ff6ff2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2043,7 +2043,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', video_webpage) if m_episode: - series = m_episode.group('series') + series = unescapeHTML(m_episode.group('series')) season_number = int(m_episode.group('season')) episode_number = int(m_episode.group('episode')) else: From e4d51e751e361faaedaa50baf37501106459ab73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Dec 2018 20:59:53 +0700 Subject: [PATCH 0221/1201] [beeg] Fix extraction (closes #18610, closes #18626) --- youtube_dl/extractor/beeg.py | 75 ++++++------------------------------ 1 file changed, 11 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index bf22a41b7..1086d7632 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,15 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_ord, - compat_urllib_parse_unquote, -) +from ..compat import compat_str from ..utils import ( int_or_none, - parse_iso8601, - urljoin, + unified_timestamp, ) @@ -36,29 +31,9 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - cpl_url = self._search_regex( - r']+src=(["\'])(?P(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1', - webpage, 'cpl', default=None, group='url') - - cpl_url = urljoin(url, cpl_url) - - beeg_version, beeg_salt = [None] * 2 - - if cpl_url: - cpl = self._download_webpage( - self._proto_relative_url(cpl_url), video_id, - 'Downloading cpl JS', fatal=False) - if cpl: - beeg_version = int_or_none(self._search_regex( - r'beeg_version\s*=\s*([^\b]+)', cpl, - 'beeg version', default=None)) or self._search_regex( - r'/(\d+)\.js', cpl_url, 'beeg version', default=None) - beeg_salt = self._search_regex( - r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg salt', - default=None, group='beeg_salt') - - beeg_version = beeg_version or '2185' - beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' + beeg_version = self._search_regex( + r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', + default='1546225636701') for api_path in ('', 'api.'): video = self._download_json( @@ -68,37 +43,6 @@ class BeegIE(InfoExtractor): if video: break - def split(o, e): - def cut(s, x): - n.append(s[:x]) - return s[x:] - n = [] - r = len(o) % e - if r > 0: - o = cut(o, r) - while len(o) > e: - o = cut(o, e) - n.append(o) - return n - - def decrypt_key(key): - # Reverse engineered from http://static.beeg.com/cpl/1738.js - a = beeg_salt - e = compat_urllib_parse_unquote(key) - o = ''.join([ - compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21) - for n in range(len(e))]) - return ''.join(split(o, 3)[::-1]) - - def decrypt_url(encrypted_url): - encrypted_url = self._proto_relative_url( - encrypted_url.replace('{DATA_MARKERS}', ''), 'https:') - key = self._search_regex( - r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None) - if not key: - return encrypted_url - return encrypted_url.replace(key, decrypt_key(key)) - formats = [] for format_id, video_url in video.items(): if not video_url: @@ -108,18 +52,20 @@ class BeegIE(InfoExtractor): if not height: continue formats.append({ - 'url': decrypt_url(video_url), + 'url': self._proto_relative_url( + video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'), 'format_id': format_id, 'height': int(height), }) self._sort_formats(formats) title = video['title'] - video_id = video.get('id') or video_id + video_id = compat_str(video.get('id') or video_id) display_id = video.get('code') description = video.get('desc') + series = video.get('ps_name') - timestamp = parse_iso8601(video.get('date'), ' ') + timestamp = unified_timestamp(video.get('date')) duration = int_or_none(video.get('duration')) tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None @@ -129,6 +75,7 @@ class BeegIE(InfoExtractor): 'display_id': display_id, 'title': title, 'description': description, + 'series': series, 'timestamp': timestamp, 'duration': duration, 'tags': tags, From 4e1ddc8da9b318873d71d0283838af6aec1bc98d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Dec 2018 21:05:07 +0700 Subject: [PATCH 0222/1201] [npo:live] Add support for npostart.nl (closes #18644) --- youtube_dl/extractor/npo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c2cb85a73..5a427c396 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -363,7 +363,7 @@ class NPOIE(NPOBaseIE): class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.npo.nl/live/npo-1', @@ -380,6 +380,9 @@ class NPOLiveIE(NPOBaseIE): }, { 'url': 'http://www.npo.nl/live', 'only_matching': True, + }, { + 'url': 'https://www.npostart.nl/live/npo-1', + 'only_matching': True, }] def _real_extract(self, url): From 373941c5f0c163bf9f2fbbe7f65ee549a228720f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Dec 2018 23:20:40 +0700 Subject: [PATCH 0223/1201] [bbc] Add support for another embed pattern (closes #18643) --- youtube_dl/extractor/bbc.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index abcfa301d..eac9a5a46 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -795,6 +795,15 @@ class BBCIE(BBCCoUkIE): 'uploader': 'Radio 3', 'uploader_id': 'bbc_radio_three', }, + }, { + 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227', + 'info_dict': { + 'id': 'p06w9tws', + 'ext': 'mp4', + 'title': 'md5:2fabf12a726603193a2879a055f72514', + 'description': 'Learn English words and phrases from this story', + }, + 'add_ie': [BBCCoUkIE.ie_key()], }] @classmethod @@ -945,6 +954,15 @@ class BBCIE(BBCCoUkIE): if entries: return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 + group_id = self._search_regex( + r']+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + webpage, 'group id', default=None) + if playlist_id: + return self.url_result( + 'https://www.bbc.co.uk/programmes/%s' % group_id, + ie=BBCCoUkIE.ie_key()) + # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, From 32ac3d49ae83fe933515096a74068a8033131f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Dec 2018 23:57:46 +0700 Subject: [PATCH 0224/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 510013e89..31458dbd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Extractors ++ [bbc] Add support for another embed pattern (#18643) ++ [npo:live] Add support for npostart.nl (#18644) +* [beeg] Fix extraction (#18610, #18626) +* [youtube] Unescape HTML for series (#18641) ++ [youtube] Extract more format metadata +* [youtube] Detect DRM protected videos (#1774) +* [youtube] Relax HTML5 player regular expressions (#18465, #18466) +* [youtube] Extend HTML5 player regular expression (#17516) ++ [liveleak] Add support for another embed type and restore original + format extraction ++ [crackle] Extract ISM and HTTP formats ++ [twitter] Pass Referer with card request (#18579) +* [mediasite] Extend URL regular expression (#18558) ++ [lecturio] Add support for lecturio.de (#18562) ++ [discovery] Add support for Scripps Networks watch domains (#17947) + + version 2018.12.17 Extractors From 9d9daed4647c0a0798adee5a580e9fd95478cf6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Dec 2018 23:59:52 +0700 Subject: [PATCH 0225/1201] release 2018.12.31 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 10 +++++++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b84559932..b2fe6ee6a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.31*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.31** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.12.17 +[debug] youtube-dl version 2018.12.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cba87190d..24617264c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -153,15 +153,19 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): + + $ flake8 youtube_dl/extractor/yourextractor.py + +9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor -10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. +11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! diff --git a/ChangeLog b/ChangeLog index 31458dbd9..72bfe2110 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.12.31 Extractors + [bbc] Add support for another embed pattern (#18643) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 31d20f255..e92064432 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -438,6 +438,7 @@ - **Lecture2Go** - **Lecturio** - **LecturioCourse** + - **LecturioDeCourse** - **LEGO** - **Lemonde** - **Lenta** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0461aac0c..87de5848d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.12.17' +__version__ = '2018.12.31' From 6b688b8942a0bd18b2a3835e69a903f1eeffee2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Jan 2019 18:12:44 +0700 Subject: [PATCH 0226/1201] [bitchute] Fix extraction (closes #18567) --- youtube_dl/extractor/bitchute.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 43b4732aa..aa034355a 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -5,7 +5,10 @@ import itertools import re from .common import InfoExtractor -from ..utils import urlencode_postdata +from ..utils import ( + orderedSet, + urlencode_postdata, +) class BitChuteIE(InfoExtractor): @@ -43,10 +46,15 @@ class BitChuteIE(InfoExtractor): 'description', webpage, 'title', default=None) or self._og_search_description(webpage) + format_urls = [] + for mobj in re.finditer( + r'addWebSeed\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage): + format_urls.append(mobj.group('url')) + format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) + formats = [ - {'url': mobj.group('url')} - for mobj in re.finditer( - r'addWebSeed\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage)] + {'url': format_url} + for format_url in orderedSet(format_urls)] self._sort_formats(formats) description = self._html_search_regex( From 38d15ba7f981cc82db91edcbf05ea3844082d045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Jan 2019 20:31:48 +0700 Subject: [PATCH 0227/1201] [manyvids] Fix extraction (closes #18604, closes #18614) --- youtube_dl/extractor/manyvids.py | 62 +++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py index b94b3c2ab..e8d7163e4 100644 --- a/youtube_dl/extractor/manyvids.py +++ b/youtube_dl/extractor/manyvids.py @@ -2,12 +2,18 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + determine_ext, + int_or_none, + str_to_int, + urlencode_postdata, +) class ManyVidsIE(InfoExtractor): _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P\d+)' - _TEST = { + _TESTS = [{ + # preview video 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'info_dict': { @@ -17,7 +23,18 @@ class ManyVidsIE(InfoExtractor): 'view_count': int, 'like_count': int, }, - } + }, { + # full video + 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', + 'md5': 'f3e8f7086409e9b470e2643edb96bdcc', + 'info_dict': { + 'id': '935718', + 'ext': 'mp4', + 'title': 'MY FACE REVEAL', + 'view_count': int, + 'like_count': int, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -28,12 +45,41 @@ class ManyVidsIE(InfoExtractor): r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video URL', group='url') - title = '%s (Preview)' % self._html_search_regex( - r']+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') + title = self._html_search_regex( + (r']+class=["\']item-title[^>]+>([^<]+)', + r']+class=["\']h2 m-0["\'][^>]*>([^<]+)'), + webpage, 'title', default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', fatal=True) + + if any(p in webpage for p in ('preview_videos', '_preview.mp4')): + title += ' (Preview)' + + mv_token = self._search_regex( + r'data-mvtoken=(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'mv token', default=None, group='value') + + if mv_token: + # Sets some cookies + self._download_webpage( + 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', + video_id, fatal=False, data=urlencode_postdata({ + 'mvtoken': mv_token, + 'vid': video_id, + }), headers={ + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest' + }) + + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + else: + formats = [{'url': video_url}] like_count = int_or_none(self._search_regex( r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) - view_count = int_or_none(self._html_search_regex( + view_count = str_to_int(self._html_search_regex( r'(?s)]+class="views-wrapper"[^>]*>(.+?) Date: Sun, 30 Dec 2018 14:30:36 +0100 Subject: [PATCH 0228/1201] [rmcdecouverte] Update _VALID_URL (closes #18595) --- youtube_dl/extractor/rmcdecouverte.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index e921ca3e6..c44b90274 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -10,18 +10,18 @@ from ..compat import ( class RMCDecouverteIE(InfoExtractor): - _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P\d+)' + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/.+/program_(?P\d+)' _TEST = { - 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET', + 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', 'info_dict': { - 'id': '5419055995001', + 'id': '5983675500001', 'ext': 'mp4', - 'title': 'UN DELICIEUX PROJET', - 'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5', + 'title': 'CORVETTE', + 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', 'uploader_id': '1969646226001', - 'upload_date': '20170502', - 'timestamp': 1493745308, + 'upload_date': '20181226', + 'timestamp': 1545861635, }, 'params': { 'skip_download': True, From d9f1123c08e21c7170b906c7de2562b7293f7a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Jan 2019 20:49:13 +0700 Subject: [PATCH 0229/1201] [rmcdecouverte] Improve, bypass geo restriction and add support for live (closes #18697) --- youtube_dl/extractor/rmcdecouverte.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index c44b90274..c3623edcc 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -1,18 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE from ..compat import ( compat_parse_qs, compat_urlparse, ) +from ..utils import smuggle_url class RMCDecouverteIE(InfoExtractor): - _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/.+/program_(?P\d+)' + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P\d+)|(?Pmediaplayer-direct))' - _TEST = { + _TESTS = [{ 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', 'info_dict': { 'id': '5983675500001', @@ -27,12 +30,17 @@ class RMCDecouverteIE(InfoExtractor): 'skip_download': True, }, 'skip': 'only available for a week', - } + }, { + # live, geo restricted, bypassable + 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', + 'only_matching': True, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') or mobj.group('live_id') + webpage = self._download_webpage(url, display_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) if brightcove_legacy_url: brightcove_id = compat_parse_qs(compat_urlparse.urlparse( @@ -41,5 +49,7 @@ class RMCDecouverteIE(InfoExtractor): brightcove_id = self._search_regex( r'data-video-id=["\'](\d+)', webpage, 'brightcove id') return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['FR']}), + 'BrightcoveNew', brightcove_id) From 9b5c8751eeb10fd41c560696d73fec3401055ddc Mon Sep 17 00:00:00 2001 From: iwconfig Date: Tue, 1 Jan 2019 15:39:18 +0100 Subject: [PATCH 0230/1201] [extractors] Add missing age limits --- youtube_dl/extractor/cammodels.py | 2 ++ youtube_dl/extractor/camtube.py | 2 ++ youtube_dl/extractor/camwithher.py | 2 ++ youtube_dl/extractor/yourporn.py | 2 ++ 4 files changed, 8 insertions(+) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 79350817f..1eb81b75e 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.cammodels.com/cam/AutumnKnight/', 'only_matching': True, + 'age_limit': 18 }] def _real_extract(self, url): @@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor): 'title': self._live_title(user_id), 'is_live': True, 'formats': formats, + 'age_limit': 18 } diff --git a/youtube_dl/extractor/camtube.py b/youtube_dl/extractor/camtube.py index c7d40f849..b3be3bdcf 100644 --- a/youtube_dl/extractor/camtube.py +++ b/youtube_dl/extractor/camtube.py @@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor): 'duration': 1274, 'timestamp': 1528018608, 'upload_date': '20180603', + 'age_limit': 18 }, 'params': { 'skip_download': True, @@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor): 'like_count': like_count, 'creator': creator, 'formats': formats, + 'age_limit': 18 } diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py index afbc5ea26..bbc5205fd 100644 --- a/youtube_dl/extractor/camwithher.py +++ b/youtube_dl/extractor/camwithher.py @@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor): 'comment_count': int, 'uploader': 'MileenaK', 'upload_date': '20160322', + 'age_limit': 18, }, 'params': { 'skip_download': True, @@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor): 'comment_count': comment_count, 'uploader': uploader, 'upload_date': upload_date, + 'age_limit': 18 } diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index a9951f3b8..432d1fff4 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -14,6 +14,7 @@ class YourPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'md5:c9f43630bd968267672651ba905a7d35', 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18 }, } @@ -38,4 +39,5 @@ class YourPornIE(InfoExtractor): 'url': video_url, 'title': title, 'thumbnail': thumbnail, + 'age_limit': 18 } From 0e713dbb116493181218c38dc03716feda2fa7d9 Mon Sep 17 00:00:00 2001 From: biwubo <45994985+biwubo@users.noreply.github.com> Date: Tue, 1 Jan 2019 15:48:06 +0000 Subject: [PATCH 0231/1201] [acast:channel] Add support for play.acast.com (closes #18587) --- youtube_dl/extractor/acast.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index b32f74a37..c4362be88 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -79,17 +79,27 @@ class ACastIE(InfoExtractor): class ACastChannelIE(InfoExtractor): IE_NAME = 'acast:channel' - _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/#?]+)' - _TEST = { - 'url': 'https://www.acast.com/condenasttraveler', + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?acast\.com/| + play\.acast\.com/s/ + ) + (?P[^/#?]+) + ''' + _TESTS = [{ + 'url': 'https://www.acast.com/todayinfocus', 'info_dict': { - 'id': '50544219-29bb-499e-a083-6087f4cb7797', - 'title': 'Condé Nast Traveler Podcast', - 'description': 'md5:98646dee22a5b386626ae31866638fbd', + 'id': '4efc5294-5385-4847-98bd-519799ce5786', + 'title': 'Today in Focus', + 'description': 'md5:9ba5564de5ce897faeb12963f4537a64', }, - 'playlist_mincount': 20, - } - _API_BASE_URL = 'https://www.acast.com/api/' + 'playlist_mincount': 35, + }, { + 'url': 'http://play.acast.com/s/ft-banking-weekly', + 'only_matching': True, + }] + _API_BASE_URL = 'https://play.acast.com/api/' _PAGE_SIZE = 10 @classmethod @@ -102,7 +112,7 @@ class ACastChannelIE(InfoExtractor): channel_slug, note='Download page %d of channel data' % page) for cast in casts: yield self.url_result( - 'https://www.acast.com/%s/%s' % (channel_slug, cast['url']), + 'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']), 'ACast', cast['id']) def _real_extract(self, url): From 696f4e41149a8f91e963d33e2f6ea9bc835f2260 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Tue, 1 Jan 2019 23:13:39 +0700 Subject: [PATCH 0232/1201] [README.md] Add more guide lines for regular expressions --- README.md | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b3c39bf66..bdc5faeec 100644 --- a/README.md +++ b/README.md @@ -1133,11 +1133,33 @@ title = meta.get('title') or self._og_search_title(webpage) This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. -### Make regular expressions flexible +### Regular expressions -When using regular expressions try to write them fuzzy and flexible. +#### Don't capture groups you don't use + +Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. + +##### Example + +Don't capture id attribute name here since you can't use it for anything anyway. + +Correct: + +```python +r'(?:id|ID)=(?P\d+)' +``` + +Incorrect: +```python +r'(id|ID)=(?P\d+)' +``` + + +#### Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. -#### Example +##### Example Say you need to extract `title` from the following HTML code: From 1d803085d730bf2aaf54e5cea2362133b3fdb831 Mon Sep 17 00:00:00 2001 From: v-delta <45652398+v-delta@users.noreply.github.com> Date: Tue, 1 Jan 2019 17:26:59 +0100 Subject: [PATCH 0233/1201] [yourporn] Fix extraction --- youtube_dl/extractor/yourporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 432d1fff4..c8dc29bd8 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -27,7 +27,7 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn2/') + video_id)[video_id]).replace('/cdn/', '/cdn3/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', From 8437f5089f2fac85a249c4368c2ea1415955d8b0 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Tue, 1 Jan 2019 23:50:02 +0700 Subject: [PATCH 0234/1201] [README.md] Add long lines policy to coding conventions --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index bdc5faeec..901595444 100644 --- a/README.md +++ b/README.md @@ -1192,6 +1192,25 @@ title = self._search_regex( webpage, 'title', group='title') ``` +### Long lines policy + +There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. + +For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: + +Correct: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Incorrect: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=' +'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + ### Use safe conversion functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. From d226c560a6250d03ff5f27d5b078f894d448a0b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elan=20Ruusam=C3=A4e?= Date: Tue, 1 Jan 2019 18:56:05 +0200 Subject: [PATCH 0235/1201] Refactor code to use url_result --- youtube_dl/extractor/audiomack.py | 2 +- youtube_dl/extractor/cnn.py | 12 ++---------- youtube_dl/extractor/freespeech.py | 7 ++----- youtube_dl/extractor/generic.py | 5 +---- youtube_dl/extractor/livestream.py | 5 +---- youtube_dl/extractor/savefrom.py | 7 ++----- youtube_dl/extractor/ted.py | 6 ++---- youtube_dl/extractor/testurl.py | 6 +----- youtube_dl/extractor/wimp.py | 6 +----- 9 files changed, 13 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 62049b921..cc7771354 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor): # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor if SoundcloudIE.suitable(api_response['url']): - return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'} + return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { 'id': compat_str(api_response.get('id', album_url_tag)), diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 5fc311f53..774b71055 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor): def _real_extract(self, url): webpage = self._download_webpage(url, url_basename(url)) cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url') - return { - '_type': 'url', - 'url': cnn_url, - 'ie_key': CNNIE.ie_key(), - } + return self.url_result(cnn_url, CNNIE.ie_key()) class CNNArticleIE(InfoExtractor): @@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor): def _real_extract(self, url): webpage = self._download_webpage(url, url_basename(url)) cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url') - return { - '_type': 'url', - 'url': 'http://cnn.com/video/?/video/' + cnn_url, - 'ie_key': CNNIE.ie_key(), - } + return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key()) diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 486a49c05..ea9c3e317 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .youtube import YoutubeIE class FreespeechIE(InfoExtractor): @@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor): r'data-video-url="([^"]+)"', webpage, 'youtube url') - return { - '_type': 'url', - 'url': youtube_url, - 'ie_key': 'Youtube', - } + return self.url_result(youtube_url, YoutubeIE.ie_key()) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 65b482333..067de28cd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2197,10 +2197,7 @@ class GenericIE(InfoExtractor): def _real_extract(self, url): if url.startswith('//'): - return { - '_type': 'url', - 'url': self.http_scheme() + url, - } + return self.url_result(self.http_scheme() + url) parsed_url = compat_urlparse.urlparse(url) if not parsed_url.scheme: diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index c4776bbf3..e55b1a202 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -363,7 +363,4 @@ class LivestreamShortenerIE(InfoExtractor): id = mobj.group('id') webpage = self._download_webpage(url, id) - return { - '_type': 'url', - 'url': self._og_search_url(webpage), - } + return self.url_result(self._og_search_url(webpage)) diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py index 30f9cf824..21e44b69a 100644 --- a/youtube_dl/extractor/savefrom.py +++ b/youtube_dl/extractor/savefrom.py @@ -30,8 +30,5 @@ class SaveFromIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = os.path.splitext(url.split('/')[-1])[0] - return { - '_type': 'url', - 'id': video_id, - 'url': mobj.group('url'), - } + + return self.url_result(mobj.group('url'), video_id=video_id) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index f9b6aa48f..d3e4205f5 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -203,10 +203,8 @@ class TEDIE(InfoExtractor): ext_url = None if service.lower() == 'youtube': ext_url = external.get('code') - return { - '_type': 'url', - 'url': ext_url or external['uri'], - } + + return self.url_result(ext_url or external['uri']) resources_ = player_talk.get('resources') or talk_info.get('resources') diff --git a/youtube_dl/extractor/testurl.py b/youtube_dl/extractor/testurl.py index 46918adb0..84a14a0bd 100644 --- a/youtube_dl/extractor/testurl.py +++ b/youtube_dl/extractor/testurl.py @@ -61,8 +61,4 @@ class TestURLIE(InfoExtractor): self.to_screen('Test URL: %s' % tc['url']) - return { - '_type': 'url', - 'url': tc['url'], - 'id': video_id, - } + return self.url_result(tc['url'], video_id=video_id) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 3dab9145b..ea234e3c5 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -40,11 +40,7 @@ class WimpIE(InfoExtractor): r'data-id=["\']([0-9A-Za-z_-]{11})'), webpage, 'video URL', default=None) if youtube_id: - return { - '_type': 'url', - 'url': youtube_id, - 'ie_key': YoutubeIE.ie_key(), - } + return self.url_result(youtube_id, YoutubeIE.ie_key()) info_dict = self._extract_jwplayer_data( webpage, video_id, require_title=False) From 751e051557372ab3fa41d839e7e6a149aa2d04b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Jan 2019 17:26:15 +0700 Subject: [PATCH 0236/1201] [packtpub] Add support for subscription.packtpub.com (closes #18718) --- youtube_dl/extractor/packtpub.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 56a2a1083..1324137df 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -24,9 +24,9 @@ class PacktPubBaseIE(InfoExtractor): class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P\d+)/(?P\d+)/(?P\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P\d+)/(?P\d+)/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', 'md5': '1e74bd6cfd45d7d07666f4684ef58f70', 'info_dict': { @@ -37,7 +37,10 @@ class PacktPubIE(PacktPubBaseIE): 'timestamp': 1490918400, 'upload_date': '20170331', }, - } + }, { + 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro', + 'only_matching': True, + }] _NETRC_MACHINE = 'packtpub' _TOKEN = None @@ -110,15 +113,18 @@ class PacktPubIE(PacktPubBaseIE): class PacktPubCourseIE(PacktPubBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P\d+))' - _TEST = { + _VALID_URL = r'(?Phttps?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P\d+))' + _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', 'info_dict': { 'id': '9781787122215', 'title': 'Learn Nodejs by building 12 projects [Video]', }, 'playlist_count': 90, - } + }, { + 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215', + 'only_matching': True, + }] @classmethod def suitable(cls, url): From 2122d7151deca9c7047426320cd73152cb907afa Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Wed, 2 Jan 2019 17:46:07 +0100 Subject: [PATCH 0237/1201] [discovery] Use geo verification headers --- youtube_dl/extractor/discovery.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 44fbc41bb..b70c307a7 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -94,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE): })['access_token'] try: + headers = self.geo_verification_headers() + headers['Authorization'] = 'Bearer ' + access_token + stream = self._download_json( 'https://api.discovery.com/v1/streaming/video/' + video_id, - display_id, headers={ - 'Authorization': 'Bearer ' + access_token, - }) + display_id, headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): e_description = self._parse_json( From aeb72b3a413c8a9d11e3c002b8ab2612b9075a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Jan 2019 23:51:23 +0700 Subject: [PATCH 0238/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 72bfe2110..1a28d6be0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Extractors +* [discovery] Use geo verification headers (#17838) ++ [packtpub] Add support for subscription.packtpub.com (#18718) +* [yourporn] Fix extraction (#18583) ++ [acast:channel] Add support for play.acast.com (#18587) ++ [extractors] Add missing age limits (#18621) ++ [rmcdecouverte] Add support for live stream +* [rmcdecouverte] Bypass geo restriction +* [rmcdecouverte] Update URL regular expression (#18595, 18697) +* [manyvids] Fix extraction (#18604, #18614) +* [bitchute] Fix extraction (#18567) + + version 2018.12.31 Extractors From d7c3af7a72a1e6bd7f93321cce4daf2a13ebdf12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Jan 2019 23:52:54 +0700 Subject: [PATCH 0239/1201] release 2019.01.02 --- .github/ISSUE_TEMPLATE.md | 6 ++--- CONTRIBUTING.md | 47 ++++++++++++++++++++++++++++++++++++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b2fe6ee6a..3986bbc8e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.31*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.12.31 +[debug] youtube-dl version 2019.01.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 24617264c..a71b045d0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -261,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage) This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. -### Make regular expressions flexible +### Regular expressions -When using regular expressions try to write them fuzzy and flexible. +#### Don't capture groups you don't use + +Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. + +##### Example + +Don't capture id attribute name here since you can't use it for anything anyway. + +Correct: + +```python +r'(?:id|ID)=(?P\d+)' +``` + +Incorrect: +```python +r'(id|ID)=(?P\d+)' +``` + + +#### Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. -#### Example +##### Example Say you need to extract `title` from the following HTML code: @@ -298,6 +320,25 @@ title = self._search_regex( webpage, 'title', group='title') ``` +### Long lines policy + +There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. + +For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: + +Correct: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Incorrect: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=' +'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + ### Use safe conversion functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. diff --git a/ChangeLog b/ChangeLog index 1a28d6be0..41d190a72 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.01.02 Extractors * [discovery] Use geo verification headers (#17838) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 87de5848d..07a444706 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.12.31' +__version__ = '2019.01.02' From c87f65e43dea0f9edf1b5ed8979e274902fb60a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jan 2019 22:21:53 +0700 Subject: [PATCH 0240/1201] [carambatv:page] Fix extraction (closes #18739) --- youtube_dl/extractor/carambatv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py index 9ba909a91..b57b86af7 100644 --- a/youtube_dl/extractor/carambatv.py +++ b/youtube_dl/extractor/carambatv.py @@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor): webpage = self._download_webpage(url, video_id) videomore_url = VideomoreIE._extract_url(webpage) + if not videomore_url: + videomore_id = self._search_regex( + r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id', + default=None) + if videomore_id: + videomore_url = 'videomore:%s' % videomore_id if videomore_url: title = self._og_search_title(webpage) return { From de0359c0af3667605464212e66ba4048b6ba093b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Jan 2019 03:40:41 +0700 Subject: [PATCH 0241/1201] [tvnow] Fix and rework extractors, prepare for a switch to the new API (closes #17245, closes #18499) --- youtube_dl/extractor/extractors.py | 4 +- youtube_dl/extractor/tvnow.py | 378 +++++++++++++++++++++-------- 2 files changed, 283 insertions(+), 99 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d72f52e36..3b1dfc451 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1193,7 +1193,9 @@ from .tvnet import TVNetIE from .tvnoe import TVNoeIE from .tvnow import ( TVNowIE, - TVNowListIE, + TVNowNewIE, + TVNowSeasonIE, + TVNowAnnualIE, TVNowShowIE, ) from .tvp import ( diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py index 60937616f..3c6a60c39 100644 --- a/youtube_dl/extractor/tvnow.py +++ b/youtube_dl/extractor/tvnow.py @@ -10,8 +10,9 @@ from ..utils import ( int_or_none, parse_iso8601, parse_duration, - try_get, + str_or_none, update_url_query, + urljoin, ) @@ -24,8 +25,7 @@ class TVNowBaseIE(InfoExtractor): def _call_api(self, path, video_id, query): return self._download_json( - 'https://api.tvnow.de/v3/' + path, - video_id, query=query) + 'https://api.tvnow.de/v3/' + path, video_id, query=query) def _extract_video(self, info, display_id): video_id = compat_str(info['id']) @@ -108,6 +108,11 @@ class TVNowIE(TVNowBaseIE): (?!(?:list|jahr)(?:/|$))(?P[^/?\#&]+) ''' + @classmethod + def suitable(cls, url): + return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url) + else super(TVNowIE, cls).suitable(url)) + _TESTS = [{ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', 'info_dict': { @@ -116,7 +121,6 @@ class TVNowIE(TVNowBaseIE): 'ext': 'mp4', 'title': 'Der neue Porsche 911 GT 3', 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', - 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1495994400, 'upload_date': '20170528', 'duration': 5283, @@ -161,136 +165,314 @@ class TVNowIE(TVNowBaseIE): info = self._call_api( 'movies/' + display_id, display_id, query={ 'fields': ','.join(self._VIDEO_FIELDS), - 'station': mobj.group(1), }) return self._extract_video(info, display_id) -class TVNowListBaseIE(TVNowBaseIE): - _SHOW_VALID_URL = r'''(?x) - (?P - https?:// - (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/ - (?P[^/]+) - ) +class TVNowNewIE(InfoExtractor): + _VALID_URL = r'''(?x) + (?Phttps?:// + (?:www\.)?tvnow\.(?:de|at|ch)/ + (?:shows|serien))/ + (?P[^/]+)-\d+/ + [^/]+/ + episode-\d+-(?P[^/?$&]+)-(?P\d+) ''' - def _extract_list_info(self, display_id, show_id): - fields = list(self._SHOW_FIELDS) - fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS) - fields.extend( - 'formatTabs.formatTabPages.container.movies.%s' % field - for field in self._VIDEO_FIELDS) - return self._call_api( - 'formats/seo', display_id, query={ - 'fields': ','.join(fields), - 'name': show_id + '.php' - }) - - -class TVNowListIE(TVNowListBaseIE): - _VALID_URL = r'%s/(?:list|jahr)/(?P[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL - - _SHOW_FIELDS = ('title', ) - _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) - _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', ) - _TESTS = [{ - 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell', - 'info_dict': { - 'id': '28296', - 'title': '30 Minuten Deutschland - Aktuell', - }, - 'playlist_mincount': 1, - }, { - 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14', - 'only_matching': True, - }, { - 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3', + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return (False if TVNowIE.suitable(url) - else super(TVNowListIE, cls).suitable(url)) + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url')) + show, episode = mobj.group('show', 'episode') + return self.url_result( + # Rewrite new URLs to the old format and use extraction via old API + # at api.tvnow.de as a loophole for bypassing premium content checks + '%s/%s/%s' % (base_url, show, episode), + ie=TVNowIE.ie_key(), video_id=mobj.group('id')) + + +class TVNowNewBaseIE(InfoExtractor): + def _call_api(self, path, video_id, query={}): + result = self._download_json( + 'https://apigw.tvnow.de/module/' + path, video_id, query=query) + error = result.get('error') + if error: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + return result + + +""" +TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it +when api.tvnow.de is shut down. This version can't bypass premium checks though. +class TVNowIE(TVNowNewBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/ + (?:shows|serien)/[^/]+/ + (?:[^/]+/)+ + (?P[^/?$&]+)-(?P\d+) + ''' + + _TESTS = [{ + # episode with annual navigation + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', + 'info_dict': { + 'id': '331082', + 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', + 'ext': 'mp4', + 'title': 'Der neue Porsche 911 GT 3', + 'description': 'md5:6143220c661f9b0aae73b245e5d898bb', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1495994400, + 'upload_date': '20170528', + 'duration': 5283, + 'series': 'GRIP - Das Motormagazin', + 'season_number': 14, + 'episode_number': 405, + 'episode': 'Der neue Porsche 911 GT 3', + }, + }, { + # rtl2, episode with season navigation + 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124', + 'only_matching': True, + }, { + # rtlnitro + 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822', + 'only_matching': True, + }, { + # superrtl + 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120', + 'only_matching': True, + }, { + # ntv + 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630', + 'only_matching': True, + }, { + # vox + 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072', + 'only_matching': True, + }, { + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', + 'only_matching': True, + }] + + def _extract_video(self, info, url, display_id): + config = info['config'] + source = config['source'] + + video_id = compat_str(info.get('id') or source['videoId']) + title = source['title'].strip() + + paths = [] + for manifest_url in (info.get('manifest') or {}).values(): + if not manifest_url: + continue + manifest_url = update_url_query(manifest_url, {'filter': ''}) + path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') + if path in paths: + continue + paths.append(path) + + def url_repl(proto, suffix): + return re.sub( + r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( + r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', + '.ism/' + suffix, manifest_url)) + + formats = self._extract_mpd_formats( + url_repl('dash', '.mpd'), video_id, + mpd_id='dash', fatal=False) + formats.extend(self._extract_ism_formats( + url_repl('hss', 'Manifest'), + video_id, ism_id='mss', fatal=False)) + formats.extend(self._extract_m3u8_formats( + url_repl('hls', '.m3u8'), video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + if formats: + break + else: + if try_get(info, lambda x: x['rights']['isDrm']): + raise ExtractorError( + 'Video %s is DRM protected' % video_id, expected=True) + if try_get(config, lambda x: x['boards']['geoBlocking']['block']): + raise self.raise_geo_restricted() + if not info.get('free', True): + raise ExtractorError( + 'Video %s is not available for free' % video_id, expected=True) + self._sort_formats(formats) + + description = source.get('description') + thumbnail = url_or_none(source.get('poster')) + timestamp = unified_timestamp(source.get('previewStart')) + duration = parse_duration(source.get('length')) + + series = source.get('format') + season_number = int_or_none(self._search_regex( + r'staffel-(\d+)', url, 'season number', default=None)) + episode_number = int_or_none(self._search_regex( + r'episode-(\d+)', url, 'episode number', default=None)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, + 'episode': title, + 'formats': formats, + } def _real_extract(self, url): - base_url, show_id, season_id = re.match(self._VALID_URL, url).groups() + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = self._call_api('player/' + video_id, video_id) + return self._extract_video(info, video_id, display_id) +""" - list_info = self._extract_list_info(season_id, show_id) - season = next( - season for season in list_info['formatTabs']['items'] - if season.get('seoheadline') == season_id) +class TVNowListBaseIE(TVNowNewBaseIE): + _SHOW_VALID_URL = r'''(?x) + (?P + https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/ + [^/?#&]+-(?P\d+) + ) + ''' - title = list_info.get('title') - headline = season.get('headline') - if title and headline: - title = '%s - %s' % (title, headline) - else: - title = headline or title + @classmethod + def suitable(cls, url): + return (False if TVNowNewIE.suitable(url) + else super(TVNowListBaseIE, cls).suitable(url)) + + def _extract_items(self, url, show_id, list_id, query): + items = self._call_api( + 'teaserrow/format/episode/' + show_id, list_id, + query=query)['items'] entries = [] - for container in season['formatTabPages']['items']: - items = try_get( - container, lambda x: x['container']['movies']['items'], - list) or [] - for info in items: - seo_url = info.get('seoUrl') - if not seo_url: - continue - video_id = info.get('id') - entries.append(self.url_result( - '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(), - compat_str(video_id) if video_id else None)) + for item in items: + if not isinstance(item, dict): + continue + item_url = urljoin(url, item.get('url')) + if not item_url: + continue + video_id = str_or_none(item.get('id') or item.get('videoId')) + item_title = item.get('subheadline') or item.get('text') + entries.append(self.url_result( + item_url, ie=TVNowNewIE.ie_key(), video_id=video_id, + video_title=item_title)) - return self.playlist_result( - entries, compat_str(season.get('id') or season_id), title) + return self.playlist_result(entries, '%s/%s' % (show_id, list_id)) + + +class TVNowSeasonIE(TVNowListBaseIE): + _VALID_URL = r'%s/staffel-(?P\d+)' % TVNowListBaseIE._SHOW_VALID_URL + _TESTS = [{ + 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13', + 'info_dict': { + 'id': '1815/13', + }, + 'playlist_mincount': 22, + }] + + def _real_extract(self, url): + _, show_id, season_id = re.match(self._VALID_URL, url).groups() + return self._extract_items( + url, show_id, season_id, {'season': season_id}) + + +class TVNowAnnualIE(TVNowListBaseIE): + _VALID_URL = r'%s/(?P\d{4})-(?P\d{2})' % TVNowListBaseIE._SHOW_VALID_URL + _TESTS = [{ + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05', + 'info_dict': { + 'id': '1669/2017-05', + }, + 'playlist_mincount': 2, + }] + + def _real_extract(self, url): + _, show_id, year, month = re.match(self._VALID_URL, url).groups() + return self._extract_items( + url, show_id, '%s-%s' % (year, month), { + 'year': int(year), + 'month': int(month), + }) class TVNowShowIE(TVNowListBaseIE): _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL - - _SHOW_FIELDS = ('id', 'title', ) - _SEASON_FIELDS = ('id', 'headline', 'seoheadline', ) - _VIDEO_FIELDS = () - _TESTS = [{ - 'url': 'https://www.tvnow.at/vox/ab-ins-beet', + # annual navigationType + 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669', 'info_dict': { - 'id': 'ab-ins-beet', - 'title': 'Ab ins Beet!', + 'id': '1669', }, - 'playlist_mincount': 7, + 'playlist_mincount': 73, }, { - 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list', - 'only_matching': True, - }, { - 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/', - 'only_matching': True, + # season navigationType + 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471', + 'info_dict': { + 'id': '11471', + }, + 'playlist_mincount': 3, }] @classmethod def suitable(cls, url): - return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url) + return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) else super(TVNowShowIE, cls).suitable(url)) def _real_extract(self, url): base_url, show_id = re.match(self._VALID_URL, url).groups() - list_info = self._extract_list_info(show_id, show_id) + result = self._call_api( + 'teaserrow/format/navigation/' + show_id, show_id) + + items = result['items'] entries = [] - for season_info in list_info['formatTabs']['items']: - season_url = season_info.get('seoheadline') - if not season_url: - continue - season_id = season_info.get('id') - entries.append(self.url_result( - '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(), - compat_str(season_id) if season_id else None, - season_info.get('headline'))) + navigation = result.get('navigationType') + if navigation == 'annual': + for item in items: + if not isinstance(item, dict): + continue + year = int_or_none(item.get('year')) + if year is None: + continue + months = item.get('months') + if not isinstance(months, list): + continue + for month_dict in months: + if not isinstance(month_dict, dict) or not month_dict: + continue + month_number = int_or_none(list(month_dict.keys())[0]) + if month_number is None: + continue + entries.append(self.url_result( + '%s/%04d-%02d' % (base_url, year, month_number), + ie=TVNowAnnualIE.ie_key())) + elif navigation == 'season': + for item in items: + if not isinstance(item, dict): + continue + season_number = int_or_none(item.get('season')) + if season_number is None: + continue + entries.append(self.url_result( + '%s/staffel-%d' % (base_url, season_number), + ie=TVNowSeasonIE.ie_key())) + else: + raise ExtractorError('Unknown navigationType') - return self.playlist_result(entries, show_id, list_info.get('title')) + return self.playlist_result(entries, show_id) From b7acc83550598b9facf1b31d3d9a1c2823b29cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jan 2019 00:55:39 +0700 Subject: [PATCH 0242/1201] [utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765) --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 62e769fd5..64b524d2f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2968,6 +2968,7 @@ class ISO639Utils(object): 'gv': 'glv', 'ha': 'hau', 'he': 'heb', + 'iw': 'heb', # Replaced by he in 1989 revision 'hi': 'hin', 'ho': 'hmo', 'hr': 'hrv', @@ -2977,6 +2978,7 @@ class ISO639Utils(object): 'hz': 'her', 'ia': 'ina', 'id': 'ind', + 'in': 'ind', # Replaced by id in 1989 revision 'ie': 'ile', 'ig': 'ibo', 'ii': 'iii', @@ -3091,6 +3093,7 @@ class ISO639Utils(object): 'wo': 'wol', 'xh': 'xho', 'yi': 'yid', + 'ji': 'yid', # Replaced by he yi 1989 revision 'yo': 'yor', 'za': 'zha', 'zh': 'zho', From 04fb6928da5e34fb3c7eaf50592a141400e00487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jan 2019 00:57:24 +0700 Subject: [PATCH 0243/1201] [postprocessor/ffmpeg] Embed subtitles with non-standard language codes (refs #18765) --- youtube_dl/postprocessor/ffmpeg.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 757b496a1..efcd39d57 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -384,9 +384,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): opts += ['-c:s', 'mov_text'] for (i, lang) in enumerate(sub_langs): opts.extend(['-map', '%d:0' % (i + 1)]) - lang_code = ISO639Utils.short2long(lang) - if lang_code is not None: - opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + lang_code = ISO639Utils.short2long(lang) or lang + opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) temp_filename = prepend_extension(filename, 'temp') self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename) From e9a50fba8603128dcb81fa1b59206a0cb106d540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jan 2019 01:02:34 +0700 Subject: [PATCH 0244/1201] [utils] Fix typo --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 64b524d2f..d2d3c1a9f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3093,7 +3093,7 @@ class ISO639Utils(object): 'wo': 'wol', 'xh': 'xho', 'yi': 'yid', - 'ji': 'yid', # Replaced by he yi 1989 revision + 'ji': 'yid', # Replaced by yi in 1989 revision 'yo': 'yor', 'za': 'zha', 'zh': 'zho', From bcc334a3c66f2465cd63be284e389cc4c7a78203 Mon Sep 17 00:00:00 2001 From: 4rensiker <4rensics@gmx.de> Date: Tue, 8 Jan 2019 02:44:42 +0100 Subject: [PATCH 0245/1201] [dtube] Fix extraction (closes #18741) --- youtube_dl/extractor/dtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 5887887e1..20190c9cc 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor): def canonical_url(h): if not h: return None - return 'https://ipfs.io/ipfs/' + h + return 'https://video.dtube.top/ipfs/' + h formats = [] for q in ('240', '480', '720', '1080', ''): From 0266854f63baaa9669b30ee49c5e7e9f353970dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 08:46:34 +0700 Subject: [PATCH 0246/1201] [dtube] Update test --- youtube_dl/extractor/dtube.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 20190c9cc..114d2dbe3 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -15,16 +15,16 @@ from ..utils import ( class DTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P[0-9a-z.-]+)/(?P[0-9a-z]{8})' _TEST = { - 'url': 'https://d.tube/#!/v/benswann/zqd630em', - 'md5': 'a03eaa186618ffa7a3145945543a251e', + 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1', + 'md5': '9f29088fa08d699a7565ee983f56a06e', 'info_dict': { - 'id': 'zqd630em', + 'id': 'x380jtr1', 'ext': 'mp4', - 'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom', - 'description': 'md5:700d164e066b87f9eac057949e4227c2', - 'uploader_id': 'benswann', - 'upload_date': '20180222', - 'timestamp': 1519328958, + 'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks', + 'description': 'md5:60be222088183be3a42f196f34235776', + 'uploader_id': 'broncnutz', + 'upload_date': '20190107', + 'timestamp': 1546854054, }, 'params': { 'format': '480p', From 8cb5c2181ac039e92a20247f266f48cee2411748 Mon Sep 17 00:00:00 2001 From: Awal Garg Date: Mon, 7 Jan 2019 17:41:12 +0530 Subject: [PATCH 0247/1201] [hungama] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hungama.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/hungama.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b1dfc451..5ff34216f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -469,6 +469,7 @@ from .hrti import ( ) from .huajiao import HuajiaoIE from .huffpost import HuffPostIE +from .hungama import HungamaIE from .hypem import HypemIE from .iconosquare import IconosquareIE from .ign import ( diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py new file mode 100644 index 000000000..e51440299 --- /dev/null +++ b/youtube_dl/extractor/hungama.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class HungamaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)hungama\.com/song/[\w\d-]+/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', + 'md5': '396fa7e8e7e67aa25da0edc4cac9b785', + 'info_dict': { + 'id': '2931166', + 'ext': 'mp4', + 'title': 'Kitni Haseen Zindagi', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_data = self._download_json('https://www.hungama.com/audio-player-data/track/%s?_country=IN' % video_id, video_id)[0] + title = player_data.get('song_name') or self._og_search_title(webpage) + track_data = self._download_json(player_data['file'], video_id) + media_url = track_data['response']['media_url'] + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_m3u8_formats(media_url, video_id, ext='mp4'), + } From 06b4b90c70636d8044b7a2f588503cadf8b202d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 09:09:49 +0700 Subject: [PATCH 0248/1201] [hungama] Fix code and extract more metadata (closes #18771) --- youtube_dl/extractor/hungama.py | 41 +++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py index e51440299..d9ad1281a 100644 --- a/youtube_dl/extractor/hungama.py +++ b/youtube_dl/extractor/hungama.py @@ -2,31 +2,54 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class HungamaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)hungama\.com/song/[\w\d-]+/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P[0-9]+)' _TEST = { 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', - 'md5': '396fa7e8e7e67aa25da0edc4cac9b785', + 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', 'info_dict': { 'id': '2931166', 'ext': 'mp4', - 'title': 'Kitni Haseen Zindagi', + 'title': 'Lucky Ali - Kitni Haseen Zindagi', + 'track': 'Kitni Haseen Zindagi', + 'artist': 'Lucky Ali', + 'album': 'Aks', + 'release_year': 2000, } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_data = self._download_json('https://www.hungama.com/audio-player-data/track/%s?_country=IN' % video_id, video_id)[0] - title = player_data.get('song_name') or self._og_search_title(webpage) - track_data = self._download_json(player_data['file'], video_id) - media_url = track_data['response']['media_url'] + data = self._download_json( + 'https://www.hungama.com/audio-player-data/track/%s' % video_id, + video_id, query={'_country': 'IN'})[0] + + track = data['song_name'] + artist = data.get('singer_name') + + m3u8_url = self._download_json( + data.get('file') or data['preview_link'], + video_id)['response']['media_url'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + title = '%s - %s' % (artist, track) if artist else track + thumbnail = data.get('img_src') or data.get('album_image') return { 'id': video_id, 'title': title, - 'formats': self._extract_m3u8_formats(media_url, video_id, ext='mp4'), + 'thumbnail': thumbnail, + 'track': track, + 'artist': artist, + 'album': data.get('album_name'), + 'release_year': int_or_none(data.get('date')), + 'formats': formats, } From 391256dc0ee5e6ac8d3022455de207b8b02a5b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 10:02:00 +0700 Subject: [PATCH 0249/1201] [extractor/common] Add support for movies in _json_ld --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e5f8136fc..f507400cc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1250,6 +1250,13 @@ class InfoExtractor(object): part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) + elif item_type == 'Movie': + info.update({ + 'title': unescapeHTML(e.get('name')), + 'description': unescapeHTML(e.get('description')), + 'duration': parse_duration(e.get('duration')), + 'timestamp': unified_timestamp(e.get('dateCreated')), + }) elif item_type in ('Article', 'NewsArticle'): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), From 440863ade10c70bec272c1ffe17b63c877e46ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 10:02:49 +0700 Subject: [PATCH 0250/1201] [extractor/common] Use episode name as title in _json_ld --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f507400cc..9e7febcad 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1239,11 +1239,14 @@ class InfoExtractor(object): if expected_type is not None and expected_type != item_type: return info if item_type in ('TVEpisode', 'Episode'): + episode_name = unescapeHTML(e.get('name')) info.update({ - 'episode': unescapeHTML(e.get('name')), + 'episode': episode_name, 'episode_number': int_or_none(e.get('episodeNumber')), 'description': unescapeHTML(e.get('description')), }) + if not info.get('title') and episode_name: + info['title'] = episode_name part_of_season = e.get('partOfSeason') if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) From 2543938bbe393ceef8dca6a69b441d54df099107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jan 2019 10:03:44 +0700 Subject: [PATCH 0251/1201] [hungama] Add support for videos (closes #17402) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/hungama.py | 78 +++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ff34216f..ddeb70284 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -469,7 +469,10 @@ from .hrti import ( ) from .huajiao import HuajiaoIE from .huffpost import HuffPostIE -from .hungama import HungamaIE +from .hungama import ( + HungamaIE, + HungamaSongIE, +) from .hypem import HypemIE from .iconosquare import IconosquareIE from .ign import ( diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py index d9ad1281a..3fdaac5b6 100644 --- a/youtube_dl/extractor/hungama.py +++ b/youtube_dl/extractor/hungama.py @@ -2,11 +2,73 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + urlencode_postdata, +) class HungamaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P[0-9]+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?hungama\.com/ + (?: + (?:video|movie)/[^/]+/| + tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://www.hungama.com/video/krishna-chants/39349649/', + 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', + 'info_dict': { + 'id': '2931166', + 'ext': 'mp4', + 'title': 'Lucky Ali - Kitni Haseen Zindagi', + 'track': 'Kitni Haseen Zindagi', + 'artist': 'Lucky Ali', + 'album': 'Aks', + 'release_year': 2000, + } + }, { + 'url': 'https://www.hungama.com/movie/kahaani-2/44129919/', + 'only_matching': True, + }, { + 'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + info = self._search_json_ld(webpage, video_id) + + m3u8_url = self._download_json( + 'https://www.hungama.com/index.php', video_id, + data=urlencode_postdata({'content_id': video_id}), headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'X-Requested-With': 'XMLHttpRequest', + }, query={ + 'c': 'common', + 'm': 'get_video_mdn_url', + })['stream_url'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + info.update({ + 'id': video_id, + 'formats': formats, + }) + return info + + +class HungamaSongIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P\d+)' _TEST = { 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', @@ -22,21 +84,21 @@ class HungamaIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + audio_id = self._match_id(url) data = self._download_json( - 'https://www.hungama.com/audio-player-data/track/%s' % video_id, - video_id, query={'_country': 'IN'})[0] + 'https://www.hungama.com/audio-player-data/track/%s' % audio_id, + audio_id, query={'_country': 'IN'})[0] track = data['song_name'] artist = data.get('singer_name') m3u8_url = self._download_json( data.get('file') or data['preview_link'], - video_id)['response']['media_url'] + audio_id)['response']['media_url'] formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) @@ -44,7 +106,7 @@ class HungamaIE(InfoExtractor): thumbnail = data.get('img_src') or data.get('album_image') return { - 'id': video_id, + 'id': audio_id, 'title': title, 'thumbnail': thumbnail, 'track': track, From 6089ff40e7cc7710e399db1be87fea103a190ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Jan 2019 00:37:01 +0700 Subject: [PATCH 0252/1201] [youporn] Fix title and description extraction (closes #18748) --- youtube_dl/extractor/youporn.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index ea0bce784..d4eccb4b2 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -68,11 +68,9 @@ class YouPornIE(InfoExtractor): request.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(request, display_id) - title = self._search_regex( - [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], - webpage, 'title', group='title', - default=None) or self._og_search_title( + title = self._html_search_regex( + r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', + webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or self._html_search_meta( 'title', webpage, fatal=True) @@ -134,7 +132,11 @@ class YouPornIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - description = self._og_search_description(webpage, default=None) + description = self._html_search_regex( + r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>', + webpage, 'description', + default=None) or self._og_search_description( + webpage, default=None) thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') From 3c1089dba41f137dcc6c373daacdc19a24aef81c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Jan 2019 14:23:26 +0100 Subject: [PATCH 0253/1201] [gaia] Add new extractor(#14605) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/gaia.py | 98 ++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 youtube_dl/extractor/gaia.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ddeb70284..c7d04d366 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -411,6 +411,7 @@ from .funk import ( from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE +from .gaia import GaiaIE from .gameinformer import GameInformerIE from .gameone import ( GameOneIE, diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py new file mode 100644 index 000000000..f2eef3f4c --- /dev/null +++ b/youtube_dl/extractor/gaia.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + str_or_none, + strip_or_none, + try_get, +) + + +class GaiaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)' + _TESTS = [{ + 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature', + 'info_dict': { + 'id': '89356', + 'ext': 'mp4', + 'title': 'Connecting with Universal Consciousness', + 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', + 'upload_date': '20151116', + 'timestamp': 1447707266, + 'duration': 936, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview', + 'info_dict': { + 'id': '89351', + 'ext': 'mp4', + 'title': 'Connecting with Universal Consciousness', + 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f', + 'upload_date': '20151116', + 'timestamp': 1447707266, + 'duration': 53, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id, vtype = re.search(self._VALID_URL, url).groups() + node_id = self._download_json( + 'https://brooklyn.gaia.com/pathinfo', display_id, query={ + 'path': 'video/' + display_id, + })['id'] + node = self._download_json( + 'https://brooklyn.gaia.com/node/%d' % node_id, node_id) + vdata = node[vtype] + media_id = compat_str(vdata['nid']) + title = node['title'] + + media = self._download_json( + 'https://brooklyn.gaia.com/media/' + media_id, media_id) + formats = self._extract_m3u8_formats( + media['mediaUrls']['bcHLS'], media_id, 'mp4') + self._sort_formats(formats) + + subtitles = {} + text_tracks = media.get('textTracks', {}) + for key in ('captions', 'subtitles'): + for lang, sub_url in text_tracks.get(key, {}).items(): + subtitles.setdefault(lang, []).append({ + 'url': sub_url, + }) + + fivestar = node.get('fivestar', {}) + fields = node.get('fields', {}) + + def get_field_value(key, value_key='value'): + return try_get(fields, lambda x: x[key][0][value_key]) + + return { + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')), + 'timestamp': int_or_none(node.get('created')), + 'subtitles': subtitles, + 'duration': int_or_none(vdata.get('duration')), + 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])), + 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])), + 'comment_count': int_or_none(node.get('comment_count')), + 'series': try_get(node, lambda x: x['series']['title'], compat_str), + 'season_number': int_or_none(get_field_value('season')), + 'season_id': str_or_none(get_field_value('series_nid', 'nid')), + 'episode_number': int_or_none(get_field_value('episode')), + } From 65615be36874fe98d5a972546e8024cda3879c86 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Jan 2019 19:17:58 +0100 Subject: [PATCH 0254/1201] [globo] set GLBID cookie manually(closes #17346) --- youtube_dl/extractor/globo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index c2140c362..fb8f7679b 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -72,7 +72,7 @@ class GloboIE(InfoExtractor): return try: - self._download_json( + glb_id = (self._download_json( 'https://login.globo.com/api/authentication', None, data=json.dumps({ 'payload': { 'email': email, @@ -81,7 +81,9 @@ class GloboIE(InfoExtractor): }, }).encode(), headers={ 'Content-Type': 'application/json; charset=utf-8', - }) + }) or {}).get('glbId') + if glb_id: + self._set_cookie('.globo.com', 'GLBID', glb_id) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: resp = self._parse_json(e.cause.read(), None) From 4ad159c7b0ce476171aad1eaa8a36f7b9b1cce65 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Jan 2019 20:39:48 +0100 Subject: [PATCH 0255/1201] [playplustv] Add new extractor(closes #18789) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/playplustv.py | 109 +++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 youtube_dl/extractor/playplustv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c7d04d366..7d7e4247a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -861,6 +861,7 @@ from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE from .playfm import PlayFMIE +from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py new file mode 100644 index 000000000..419c2974b --- /dev/null +++ b/youtube_dl/extractor/playplustv.py @@ -0,0 +1,109 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + PUTRequest, +) + + +class PlayPlusTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playplus\.tv/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _TEST = { + 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', + 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', + 'info_dict': { + 'id': 'db8d274a5163424e967f35a30ddafb8e', + 'ext': 'mp4', + 'title': 'Capítulo 179 - Final', + 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', + 'timestamp': 1529992740, + 'upload_date': '20180626', + }, + 'skip': 'Requires account credential', + } + _NETRC_MACHINE = 'playplustv' + _GEO_COUNTRIES = ['BR'] + _token = None + _profile_id = None + + def _call_api(self, resource, video_id=None, query=None): + return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ + 'Authorization': 'Bearer ' + self._token, + }, query=query) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + req = PUTRequest( + 'https://api.playplus.tv/api/web/login', json.dumps({ + 'email': email, + 'password': password, + }).encode(), { + 'Content-Type': 'application/json; charset=utf-8', + }) + + try: + self._token = self._download_json(req, None)['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(self._parse_json( + e.cause.read(), None)['errorMessage'], expected=True) + raise + + self._profile = self._call_api('Profiles')['list'][0]['_id'] + + def _real_extract(self, url): + project_id, media_id = re.match(self._VALID_URL, url).groups() + media = self._call_api( + 'Media', media_id, { + 'profileId': self._profile, + 'projectId': project_id, + 'mediaId': media_id, + })['obj'] + title = media['title'] + + formats = [] + for f in media.get('files', []): + f_url = f.get('url') + if not f_url: + continue + file_info = f.get('fileInfo') or {} + formats.append({ + 'url': f_url, + 'width': int_or_none(file_info.get('width')), + 'height': int_or_none(file_info.get('height')), + }) + self._sort_formats(formats) + + thumbnails = [] + for thumb in media.get('thumbs', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + return { + 'id': media_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': clean_html(media.get('description')) or media.get('shortDescription'), + 'timestamp': int_or_none(media.get('publishDate'), 1000), + 'view_count': int_or_none(media.get('numberOfViews')), + 'comment_count': int_or_none(media.get('numberOfComments')), + 'tags': media.get('tags'), + } From 96c186e1fd8af4943eeaa8c858f9cb0f15e8b265 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 09:05:00 +0100 Subject: [PATCH 0256/1201] [fox] add support National Geographic(closes #17985)(closes #15333)(closes #14698) --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/fox.py | 93 +++++++------- youtube_dl/extractor/nationalgeographic.py | 135 --------------------- 3 files changed, 48 insertions(+), 186 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7d7e4247a..a01d99b85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -687,11 +687,7 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .nationalgeographic import ( - NationalGeographicVideoIE, - NationalGeographicIE, - NationalGeographicEpisodeGuideIE, -) +from .nationalgeographic import NationalGeographicVideoIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 11d6c9c32..b1c91f095 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +# import json +# import uuid + from .adobepass import AdobePassIE -from .uplynk import UplynkPreplayIE -from ..compat import compat_str from ..utils import ( - HEADRequest, int_or_none, parse_age_limit, parse_duration, @@ -16,7 +16,7 @@ from ..utils import ( class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' + _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)' _TESTS = [{ # clip 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', @@ -43,41 +43,47 @@ class FOXIE(AdobePassIE): # episode, geo-restricted, tv provided required 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, + }, { + 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', + 'only_matching': True, }] + # _access_token = None + + # def _call_api(self, path, video_id, data=None): + # headers = { + # 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + # } + # if self._access_token: + # headers['Authorization'] = 'Bearer ' + self._access_token + # return self._download_json( + # 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers) + + # def _real_initialize(self): + # self._access_token = self._call_api( + # 'login', None, json.dumps({ + # 'deviceId': compat_str(uuid.uuid4()), + # }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( - 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id, + 'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id, video_id, headers={ 'apikey': 'abdcbed02c124d393b39e818a4312055', 'Content-Type': 'application/json', 'Referer': url, }) + # video = self._call_api('vodplayer/' + video_id, video_id) title = video['name'] release_url = video['videoRelease']['url'] - - description = video.get('description') - duration = int_or_none(video.get('durationInSeconds')) or int_or_none( - video.get('duration')) or parse_duration(video.get('duration')) - timestamp = unified_timestamp(video.get('datePublished')) - rating = video.get('contentRating') - age_limit = parse_age_limit(rating) + # release_url = video['url'] data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} - creator = data.get('brand') or data.get('network') or video.get('network') - - series = video.get('seriesName') or data.get( - 'seriesName') or data.get('show') - season_number = int_or_none(video.get('seasonNumber')) - episode = video.get('name') - episode_number = int_or_none(video.get('episodeNumber')) - release_year = int_or_none(video.get('releaseYear')) - + rating = video.get('contentRating') if data.get('authRequired'): resource = self._get_mvpd_resource( 'fbc-fox', title, video.get('guid'), rating) @@ -86,6 +92,18 @@ class FOXIE(AdobePassIE): 'auth': self._extract_mvpd_auth( url, video_id, 'fbc-fox', resource) }) + m3u8_url = self._download_json(release_url, video_id)['playURL'] + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( + video.get('duration')) or parse_duration(video.get('duration')) + timestamp = unified_timestamp(video.get('datePublished')) + creator = data.get('brand') or data.get('network') or video.get('network') + series = video.get('seriesName') or data.get( + 'seriesName') or data.get('show') subtitles = {} for doc_rel in video.get('documentReleases', []): @@ -98,36 +116,19 @@ class FOXIE(AdobePassIE): }] break - info = { + return { 'id': video_id, 'title': title, - 'description': description, + 'formats': formats, + 'description': video.get('description'), 'duration': duration, 'timestamp': timestamp, - 'age_limit': age_limit, + 'age_limit': parse_age_limit(rating), 'creator': creator, 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'release_year': release_year, + 'season_number': int_or_none(video.get('seasonNumber')), + 'episode': video.get('name'), + 'episode_number': int_or_none(video.get('episodeNumber')), + 'release_year': int_or_none(video.get('releaseYear')), 'subtitles': subtitles, } - - urlh = self._request_webpage(HEADRequest(release_url), video_id) - video_url = compat_str(urlh.geturl()) - - if UplynkPreplayIE.suitable(video_url): - info.update({ - '_type': 'url_transparent', - 'url': video_url, - 'ie_key': UplynkPreplayIE.ie_key(), - }) - else: - m3u8_url = self._download_json(release_url, video_id)['playURL'] - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) - info['formats'] = formats - return info diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 4d2ee6408..165964ca0 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -1,15 +1,9 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from .adobepass import AdobePassIE -from .theplatform import ThePlatformIE from ..utils import ( smuggle_url, url_basename, - update_url_query, - get_element_by_class, ) @@ -64,132 +58,3 @@ class NationalGeographicVideoIE(InfoExtractor): {'force_smil_url': True}), 'id': guid, } - - -class NationalGeographicIE(ThePlatformIE, AdobePassIE): - IE_NAME = 'natgeo' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)' - - _TESTS = [ - { - 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/', - 'md5': '518c9aa655686cf81493af5cc21e2a04', - 'info_dict': { - 'id': 'vKInpacll2pC', - 'ext': 'mp4', - 'title': 'Uncovering a Universal Knowledge', - 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', - 'timestamp': 1458680907, - 'upload_date': '20160322', - 'uploader': 'NEWA-FNG-NGTV', - }, - 'add_ie': ['ThePlatform'], - }, - { - 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/', - 'md5': 'c4912f656b4cbe58f3e000c489360989', - 'info_dict': { - 'id': 'Pok5lWCkiEFA', - 'ext': 'mp4', - 'title': 'The Stunning Red Bird of Paradise', - 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', - 'timestamp': 1459362152, - 'upload_date': '20160330', - 'uploader': 'NEWA-FNG-NGTV', - }, - 'add_ie': ['ThePlatform'], - }, - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', - 'only_matching': True, - }, - { - 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', - 'only_matching': True, - } - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - release_url = self._search_regex( - r'video_auth_playlist_url\s*=\s*"([^"]+)"', - webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') - video_id = theplatform_path.split('/')[-1] - query = { - 'mbr': 'true', - } - is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False) - if is_auth == 'auth': - auth_resource_id = self._search_regex( - r"video_auth_resourceId\s*=\s*'([^']+)'", - webpage, 'auth resource id') - query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id) - - formats = [] - subtitles = {} - for key, value in (('switch', 'http'), ('manifest', 'm3u')): - tp_query = query.copy() - tp_query.update({ - key: value, - }) - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value) - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) - self._sort_formats(formats) - - info = self._extract_theplatform_metadata(theplatform_path, display_id) - info.update({ - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'display_id': display_id, - }) - return info - - -class NationalGeographicEpisodeGuideIE(InfoExtractor): - IE_NAME = 'natgeo:episodeguide' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide' - _TESTS = [ - { - 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/', - 'info_dict': { - 'id': 'the-story-of-god-with-morgan-freeman-season-1', - 'title': 'The Story of God with Morgan Freeman - Season 1', - }, - 'playlist_mincount': 6, - }, - { - 'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2', - 'info_dict': { - 'id': 'underworld-inc-season-2', - 'title': 'Underworld, Inc. - Season 2', - }, - 'playlist_mincount': 7, - }, - ] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - show = get_element_by_class('show', webpage) - selected_season = self._search_regex( - r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>', - webpage, 'selected season') - entries = [ - self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic') - for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)] - return self.playlist_result( - entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')), - '%s - %s' % (show, selected_season)) From 7c072f00d66f3b3f44d301059443480a5c83d3c0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 10:50:18 +0100 Subject: [PATCH 0257/1201] [jwplatform] use JW Platform Delivery API V2 and add support for more urls --- youtube_dl/extractor/jwplatform.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 63d0dc998..d19a6a774 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,8 +7,8 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' - _TEST = { + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' + _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', 'info_dict': { @@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor): 'upload_date': '20081127', 'timestamp': 1227796140, } - } + }, { + 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js', + 'only_matching': True, + }] @staticmethod def _extract_url(webpage): @@ -34,5 +37,5 @@ class JWPlatformIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id) + json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id) return self._parse_jwplayer_data(json_data, video_id) From 432aba1c5e68cf8efccb868107785a15617d5109 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 10:54:46 +0100 Subject: [PATCH 0258/1201] [outsidetv] Add new extractor(closes #18774) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/outsidetv.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 youtube_dl/extractor/outsidetv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a01d99b85..de38c6641 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -829,6 +829,7 @@ from .orf import ( ORFOE1IE, ORFIPTVIE, ) +from .outsidetv import OutsideTVIE from .packtpub import ( PacktPubIE, PacktPubCourseIE, diff --git a/youtube_dl/extractor/outsidetv.py b/youtube_dl/extractor/outsidetv.py new file mode 100644 index 000000000..c5333b08c --- /dev/null +++ b/youtube_dl/extractor/outsidetv.py @@ -0,0 +1,28 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OutsideTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})' + _TESTS = [{ + 'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4', + 'md5': '192d968fedc10b2f70ec31865ffba0da', + 'info_dict': { + 'id': 'Hdg0jukV', + 'ext': 'mp4', + 'title': 'Home - Jackson Ep 1 | Arbor Snowboards', + 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd', + 'upload_date': '20181225', + 'timestamp': 1545742800, + } + }, { + 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4', + 'only_matching': True, + }] + + def _real_extract(self, url): + jw_media_id = self._match_id(url) + return self.url_result( + 'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id) From c3e543893bfba7faa7c13e53fbe6b60f936b81f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Jan 2019 22:46:53 +0700 Subject: [PATCH 0259/1201] [youtube] Extract live HLS URL from player response (closes #18799) --- youtube_dl/extractor/youtube.py | 55 +++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 539ff6ff2..29773877e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1931,31 +1931,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'http_chunk_size': 10485760, } formats.append(dct) - elif video_info.get('hlsvp'): - manifest_url = video_info['hlsvp'][0] - formats = [] - m3u8_formats = self._extract_m3u8_formats( - manifest_url, video_id, 'mp4', fatal=False) - for a_format in m3u8_formats: - itag = self._search_regex( - r'/itag/(\d+)/', a_format['url'], 'itag', default=None) - if itag: - a_format['format_id'] = itag - if itag in self._formats: - dct = self._formats[itag].copy() - dct.update(a_format) - a_format = dct - a_format['player_url'] = player_url - # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming - a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' - formats.append(a_format) else: - error_message = clean_html(video_info.get('reason', [None])[0]) - if not error_message: - error_message = extract_unavailable_message() - if error_message: - raise ExtractorError(error_message, expected=True) - raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') + manifest_url = ( + url_or_none(try_get( + player_response, + lambda x: x['streamingData']['hlsManifestUrl'], + compat_str)) or + url_or_none(try_get( + video_info, lambda x: x['hlsvp'][0], compat_str))) + if manifest_url: + formats = [] + m3u8_formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', fatal=False) + for a_format in m3u8_formats: + itag = self._search_regex( + r'/itag/(\d+)/', a_format['url'], 'itag', default=None) + if itag: + a_format['format_id'] = itag + if itag in self._formats: + dct = self._formats[itag].copy() + dct.update(a_format) + a_format = dct + a_format['player_url'] = player_url + # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming + a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' + formats.append(a_format) + else: + error_message = clean_html(video_info.get('reason', [None])[0]) + if not error_message: + error_message = extract_unavailable_message() + if error_message: + raise ExtractorError(error_message, expected=True) + raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info') # uploader video_uploader = try_get( From a4491dd55c5feaf02ac8969caafd637265cbeede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Jan 2019 23:23:19 +0700 Subject: [PATCH 0260/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 41d190a72..c7d090c6c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version <unreleased> + +Core +* [extractor/common] Use episode name as title in _json_ld ++ [extractor/common] Add support for movies in _json_ld +* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes + (#18765) ++ [utils] Add language codes replaced in 1989 revision of ISO 639 + to ISO639Utils (#18765) + +Extractors +* [youtube] Extract live HLS URL from player response (#18799) ++ [outsidetv] Add support for outsidetv.com (#18774) +* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs ++ [fox] Add support National Geographic (#17985, #15333, #14698) ++ [playplustv] Add support for playplus.tv (#18789) +* [globo] Set GLBID cookie manually (#17346) ++ [gaia] Add support for gaia.com (#14605) +* [youporn] Fix title and description extraction (#18748) ++ [hungama] Add support for hungama.com (#17402, #18771) +* [dtube] Fix extraction (#18741) +* [tvnow] Fix and rework extractors and prepare for a switch to the new API + (#17245, #18499) +* [carambatv:page] Fix extraction (#18739) + + version 2019.01.02 Extractors From b64f6e690ff84052751c593a8826a40a63cb1d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Jan 2019 23:26:54 +0700 Subject: [PATCH 0261/1201] release 2019.01.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 11 ++++++++--- youtube_dl/version.py | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3986bbc8e..d6abdbcb7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.02 +[debug] youtube-dl version 2019.01.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c7d090c6c..3d60754c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.10 Core * [extractor/common] Use episode name as title in _json_ld diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e92064432..c01409419 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -320,6 +320,7 @@ - **Fusion** - **Fux** - **FXNetworks** + - **Gaia** - **GameInformer** - **GameOne** - **gameone:playlist** @@ -370,6 +371,8 @@ - **HRTiPlaylist** - **Huajiao**: 花椒直播 - **HuffPost**: Huffington Post + - **Hungama** + - **HungamaSong** - **Hypem** - **Iconosquare** - **ign.com** @@ -540,8 +543,6 @@ - **MyviEmbed** - **MyVisionTV** - **n-tv.de** - - **natgeo** - - **natgeo:episodeguide** - **natgeo:video** - **Naver** - **NBA** @@ -642,6 +643,7 @@ - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek - **OsnatelTV** + - **OutsideTV** - **PacktPub** - **PacktPubCourse** - **PandaTV**: 熊猫TV @@ -666,6 +668,7 @@ - **Pinkbike** - **Pladform** - **play.fm** + - **PlayPlusTV** - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** @@ -934,7 +937,9 @@ - **TVNet** - **TVNoe** - **TVNow** - - **TVNowList** + - **TVNowAnnual** + - **TVNowNew** + - **TVNowSeason** - **TVNowShow** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 07a444706..5ba61f489 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.02' +__version__ = '2019.01.10' From c469e8808c723d6ba090c0b33c292622a3cb555e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Jan 2019 18:48:52 +0100 Subject: [PATCH 0262/1201] [playplustv] add support for playplus.com(#18789) --- youtube_dl/extractor/playplustv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py index 419c2974b..1e30ab23a 100644 --- a/youtube_dl/extractor/playplustv.py +++ b/youtube_dl/extractor/playplustv.py @@ -15,7 +15,7 @@ from ..utils import ( class PlayPlusTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?playplus\.tv/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' _TEST = { 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', From a64646e417615173c9436887116402c9c4c9f6ee Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 11 Jan 2019 15:09:44 +0100 Subject: [PATCH 0263/1201] [postprocessor/ffmpeg] sanitize ffmpeg version for Ubuntu and Arch Linux systems(closes #18813) --- youtube_dl/postprocessor/ffmpeg.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index efcd39d57..173bdbe68 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -79,6 +79,19 @@ class FFmpegPostProcessor(PostProcessor): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] prefer_ffmpeg = True + def get_ffmpeg_version(path): + ver = get_exe_version(path, args=['-version']) + if ver: + regexs = [ + r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu + r'n([0-9.]+)$', # Arch Linux + ] + for regex in regexs: + mobj = re.match(regex, ver) + if mobj: + ver = mobj.group(1) + return ver + self.basename = None self.probe_basename = None @@ -110,11 +123,10 @@ class FFmpegPostProcessor(PostProcessor): self._paths = dict( (p, os.path.join(location, p)) for p in programs) self._versions = dict( - (p, get_exe_version(self._paths[p], args=['-version'])) - for p in programs) + (p, get_ffmpeg_version(self._paths[p])) for p in programs) if self._versions is None: self._versions = dict( - (p, get_exe_version(p, args=['-version'])) for p in programs) + (p, get_ffmpeg_version(p)) for p in programs) self._paths = dict((p, p) for p in programs) if prefer_ffmpeg is False: From 5caa531a1a7e070e3bef8cc60c3d00c3f7ab1e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 11 Jan 2019 23:47:23 +0700 Subject: [PATCH 0264/1201] [postprocessor/ffmpeg] PEP 8 --- youtube_dl/postprocessor/ffmpeg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 173bdbe68..a9fafa363 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -83,8 +83,8 @@ class FFmpegPostProcessor(PostProcessor): ver = get_exe_version(path, args=['-version']) if ver: regexs = [ - r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu - r'n([0-9.]+)$', # Arch Linux + r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu + r'n([0-9.]+)$', # Arch Linux ] for regex in regexs: mobj = re.match(regex, ver) From cbdc688c41876714f1d7e2f82ba9d11f6695448a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 12 Jan 2019 00:30:06 +0700 Subject: [PATCH 0265/1201] [postprocessor/ffmpeg] Relax ubuntu ffmpeg version regex --- youtube_dl/postprocessor/ffmpeg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index a9fafa363..39a905380 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -83,8 +83,9 @@ class FFmpegPostProcessor(PostProcessor): ver = get_exe_version(path, args=['-version']) if ver: regexs = [ - r'([0-9.]+)-0ubuntu0\.[0-9.]+$', # Ubuntu + r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] r'n([0-9.]+)$', # Arch Linux + # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/ ] for regex in regexs: mobj = re.match(regex, ver) From 60a899bb7ed583b4c40289925c8127e746c683ea Mon Sep 17 00:00:00 2001 From: Atlas Sullivan <AtlasJan@gmx.com> Date: Fri, 11 Jan 2019 19:15:48 +0000 Subject: [PATCH 0266/1201] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 901595444..70bcfaccf 100644 --- a/README.md +++ b/README.md @@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names. **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are: - `id` (string): Video identifier - `title` (string): Video title From ed8db0a25c93f5611dfae81ee8f15cbc177bd0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 12 Jan 2019 04:56:17 +0700 Subject: [PATCH 0267/1201] [wistia] Extend _VALID_URL (closes #18823) --- youtube_dl/extractor/wistia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 01a51275e..fa142b974 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -12,7 +12,7 @@ from ..utils import ( class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -38,6 +38,9 @@ class WistiaIE(InfoExtractor): }, { 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', 'only_matching': True, + }, { + 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', + 'only_matching': True, }] @staticmethod From d65f6e734b5c00acbb396f4569907f7957e1cbef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 13 Jan 2019 03:57:31 +0700 Subject: [PATCH 0268/1201] [bitchute] Check formats (#18833) --- youtube_dl/extractor/bitchute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index aa034355a..4f39424f5 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -55,6 +55,7 @@ class BitChuteIE(InfoExtractor): formats = [ {'url': format_url} for format_url in orderedSet(format_urls)] + self._check_formats(formats, video_id) self._sort_formats(formats) description = self._html_search_regex( From f1ab3b7de7667c0382bf6eb6364ccab4260c0654 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 13 Jan 2019 10:01:26 +0100 Subject: [PATCH 0269/1201] [downloader/hls] fix uplynk ad skipping(closes #18824) --- youtube_dl/downloader/hls.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index fd304527e..4def8e2d5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -75,10 +75,14 @@ class HlsFD(FragmentFD): fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) - def is_ad_fragment(s): + def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + def is_ad_fragment_end(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or + s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + media_frags = 0 ad_frags = 0 ad_frag_next = False @@ -87,12 +91,13 @@ class HlsFD(FragmentFD): if not line: continue if line.startswith('#'): - if is_ad_fragment(line): - ad_frags += 1 + if is_ad_fragment_start(line): ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False continue if ad_frag_next: - ad_frag_next = False + ad_frags += 1 continue media_frags += 1 @@ -123,7 +128,6 @@ class HlsFD(FragmentFD): if line: if not line.startswith('#'): if ad_frag_next: - ad_frag_next = False continue frag_index += 1 if frag_index <= ctx['fragment_index']: @@ -196,8 +200,10 @@ class HlsFD(FragmentFD): 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } - elif is_ad_fragment(line): + elif is_ad_fragment_start(line): ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False self._finish_frag_download(ctx) From 3b983ee471305946709dcb83fa3799fc26a7db03 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 13 Jan 2019 15:46:54 +0100 Subject: [PATCH 0270/1201] [curiositystream] add support for non app urls --- youtube_dl/extractor/curiositystream.py | 56 +++++++++++++------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index 35b1e7a34..e4a7fca6c 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor): self._handle_errors(result) self._auth_token = result['message']['auth_token'] - def _extract_media_info(self, media): - video_id = compat_str(media['id']) + +class CuriosityStreamIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/video/2', + 'md5': '262bb2f257ff301115f1973540de8983', + 'info_dict': { + 'id': '2', + 'ext': 'mp4', + 'title': 'How Did You Develop The Internet?', + 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('media/' + video_id, video_id) title = media['title'] formats = [] @@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor): } -class CuriosityStreamIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream' - _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' - _TEST = { - 'url': 'https://app.curiositystream.com/video/2', - 'md5': '262bb2f257ff301115f1973540de8983', - 'info_dict': { - 'id': '2', - 'ext': 'mp4', - 'title': 'How Did You Develop The Internet?', - 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - media = self._call_api('media/' + video_id, video_id) - return self._extract_media_info(media) - - class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)' + _TESTS = [{ 'url': 'https://app.curiositystream.com/collection/2', 'info_dict': { 'id': '2', 'title': 'Curious Minds: The Internet', 'description': 'How is the internet shaping our lives in the 21st Century?', }, - 'playlist_mincount': 12, - } + 'playlist_mincount': 17, + }, { + 'url': 'https://curiositystream.com/series/2', + 'only_matching': True, + }] def _real_extract(self, url): collection_id = self._match_id(url) @@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): 'collections/' + collection_id, collection_id) entries = [] for media in collection.get('media', []): - entries.append(self._extract_media_info(media)) + media_id = compat_str(media.get('id')) + entries.append(self.url_result( + 'https://curiositystream.com/video/' + media_id, + CuriosityStreamIE.ie_key(), media_id)) return self.playlist_result( entries, collection_id, collection.get('title'), collection.get('description')) From 10026329c2534635876921da229f382098b8f8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 14 Jan 2019 23:23:51 +0700 Subject: [PATCH 0271/1201] [skylinewebcams] Fix extraction (closes #18853) --- youtube_dl/extractor/skylinewebcams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py index 5b4aaac6f..b7f8ac736 100644 --- a/youtube_dl/extractor/skylinewebcams.py +++ b/youtube_dl/extractor/skylinewebcams.py @@ -26,7 +26,7 @@ class SkylineWebcamsIE(InfoExtractor): webpage = self._download_webpage(url, video_id) stream_url = self._search_regex( - r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, + r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, 'stream url', group='url') title = self._og_search_title(webpage) From 929ba3997b026fd9fafe15d447732d8ccdf367bb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Jan 2019 10:23:59 +0100 Subject: [PATCH 0272/1201] [funimation] fix extraction(closes #14089) --- youtube_dl/extractor/funimation.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 07d01caec..8bbedca26 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import random +import string + from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor): video_id = title_data.get('id') or self._search_regex([ r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", - r'<iframe[^>]+src="/player/(\d+)"', + r'<iframe[^>]+src="/player/(\d+)', ], webpage, 'video_id', default=None) if not video_id: player_url = self._html_search_meta([ @@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor): if self._TOKEN: headers['Authorization'] = 'Token %s' % self._TOKEN sources = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id, - video_id, headers=headers)['items'] + 'https://www.funimation.com/api/showexperience/%s/' % video_id, + video_id, headers=headers, query={ + 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), + })['items'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: error = self._parse_json(e.cause.read(), video_id)['errors'][0] From 561b456e2d10e60b74a1bee6154bb7fdb0bfaf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 01:12:58 +0700 Subject: [PATCH 0273/1201] [youtube] Extract DASH formats from player response (closes #18804) --- youtube_dl/extractor/youtube.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 29773877e..cca149107 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1545,6 +1545,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if dash_mpd and dash_mpd[0] not in dash_mpds: dash_mpds.append(dash_mpd[0]) + def add_dash_mpd_pr(pl_response): + dash_mpd = url_or_none(try_get( + pl_response, lambda x: x['streamingData']['dashManifestUrl'], + compat_str)) + if dash_mpd and dash_mpd not in dash_mpds: + dash_mpds.append(dash_mpd) + is_live = None view_count = None @@ -1602,6 +1609,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if isinstance(pl_response, dict): player_response = pl_response if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): + add_dash_mpd_pr(player_response) # We also try looking in get_video_info since it may contain different dashmpd # URL that points to a DASH manifest with possibly different itag set (some itags # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH @@ -1633,6 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): pl_response = get_video_info.get('player_response', [None])[0] if isinstance(pl_response, dict): player_response = pl_response + add_dash_mpd_pr(player_response) add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) From 2f483bc1c389709623117079439708783122b5ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 01:28:50 +0700 Subject: [PATCH 0274/1201] [youtube] Skip unsupported adaptive stream type (#18804) --- youtube_dl/extractor/youtube.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cca149107..5e93b5329 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1827,6 +1827,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url_data = compat_parse_qs(url_data_str) if 'itag' not in url_data or 'url' not in url_data: continue + stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) + # Unsupported FORMAT_STREAM_TYPE_OTF + if stream_type == 3: + continue format_id = url_data['itag'][0] url = url_data['url'][0] From a16c7c033a161f310b69d444edc9dbf67cfc49ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:17:49 +0700 Subject: [PATCH 0275/1201] [test/helper] Add support for maxcount and count collection len test checkers --- test/helper.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/test/helper.py b/test/helper.py index aa9a1c9b2..e62aab11e 100644 --- a/test/helper.py +++ b/test/helper.py @@ -153,15 +153,27 @@ def expect_value(self, got, expected, field): isinstance(got, compat_str), 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) got = 'md5:' + md5(got) - elif isinstance(expected, compat_str) and expected.startswith('mincount:'): + elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected): self.assertTrue( isinstance(got, (list, dict)), 'Expected field %s to be a list or a dict, but it is of type %s' % ( field, type(got).__name__)) - expected_num = int(expected.partition(':')[2]) - assertGreaterEqual( + op, _, expected_num = expected.partition(':') + expected_num = int(expected_num) + if op == 'mincount': + assert_func = assertGreaterEqual + msg_tmpl = 'Expected %d items in field %s, but only got %d' + elif op == 'maxcount': + assert_func = assertLessEqual + msg_tmpl = 'Expected maximum %d items in field %s, but got %d' + elif op == 'count': + assert_func = assertEqual + msg_tmpl = 'Expected exactly %d items in field %s, but got %d' + else: + assert False + assert_func( self, len(got), expected_num, - 'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got))) + msg_tmpl % (expected_num, field, len(got))) return self.assertEqual( expected, got, @@ -237,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None): self.assertTrue(got >= expected, msg) +def assertLessEqual(self, got, expected, msg=None): + if not (got <= expected): + if msg is None: + msg = '%r not less than or equal to %r' % (got, expected) + self.assertTrue(got <= expected, msg) + + +def assertEqual(self, got, expected, msg=None): + if not (got == expected): + if msg is None: + msg = '%r not equal to %r' % (got, expected) + self.assertTrue(got == expected, msg) + + def expect_warnings(ydl, warnings_re): real_warning = ydl.report_warning From 4fe54c128a11d394874505af75aaa5a2276aa3ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:18:27 +0700 Subject: [PATCH 0276/1201] [youtube] Update tests and add a tests for #18804 --- youtube_dl/extractor/youtube.py | 57 +++++++++++++++++---------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5e93b5329..730935657 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -498,7 +498,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', - 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], @@ -527,7 +526,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', - 'license': 'Standard YouTube License', 'creator': 'Icona Pop', 'track': 'I Love It (feat. Charli XCX)', 'artist': 'Icona Pop', @@ -540,14 +538,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': '07FYdnEawAQ', 'ext': 'mp4', 'upload_date': '20130703', - 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', + 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)', 'alt_title': 'Tunnel Vision', - 'description': 'md5:64249768eec3bc4276236606ea996373', + 'description': 'md5:07dab3356cde4199048e4c7cd93471e1', 'duration': 419, 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', - 'license': 'Standard YouTube License', 'creator': 'Justin Timberlake', 'track': 'Tunnel Vision', 'artist': 'Justin Timberlake', @@ -566,7 +563,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'SET India', 'uploader_id': 'setindia', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia', - 'license': 'Standard YouTube License', 'age_limit': 18, } }, @@ -581,7 +577,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', - 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], @@ -605,7 +600,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 'description': '', 'uploader': '8KVIDEO', - 'license': 'Standard YouTube License', 'title': 'UHDTV TEST 8K VIDEO.mp4' }, 'params': { @@ -620,13 +614,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'IB3lcPjvWLA', 'ext': 'm4a', - 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', - 'description': 'md5:1900ed86ee514927b9e00fbead6969a5', + 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson', + 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf', 'duration': 244, 'uploader': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', - 'license': 'Standard YouTube License', }, 'params': { 'youtube_include_dash_manifest': True, @@ -640,13 +633,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'nfWlot6h_JM', 'ext': 'm4a', 'title': 'Taylor Swift - Shake It Off', - 'alt_title': 'Shake It Off', - 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', + 'description': 'md5:bec2185232c05479482cb5a9b82719bf', 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', - 'license': 'Standard YouTube License', 'creator': 'Taylor Swift', }, 'params': { @@ -662,10 +653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'duration': 219, 'upload_date': '20100909', - 'uploader': 'TJ Kirk', + 'uploader': 'Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', - 'license': 'Standard YouTube License', 'title': 'Burning Everyone\'s Koran', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', } @@ -683,7 +673,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'WitcherGame', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'upload_date': '20140605', - 'license': 'Standard YouTube License', 'age_limit': 18, }, }, @@ -692,7 +681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU', 'info_dict': { 'id': '6kLq3WMV1nU', - 'ext': 'webm', + 'ext': 'mp4', 'title': 'Dedication To My Ex (Miss That) (Lyric Video)', 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', 'duration': 246, @@ -700,7 +689,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'LloydVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', 'upload_date': '20110629', - 'license': 'Standard YouTube License', 'age_limit': 18, }, }, @@ -718,7 +706,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'deadmau5', 'description': 'md5:12c56784b8032162bb936a5f76d55360', 'uploader': 'deadmau5', - 'license': 'Standard YouTube License', 'title': 'Deadmau5 - Some Chords (HD)', 'alt_title': 'Some Chords', }, @@ -736,7 +723,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', - 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympic', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', @@ -758,7 +744,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 'uploader': '孫ᄋᄅ', - 'license': 'Standard YouTube License', 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', }, }, @@ -792,7 +777,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'dorappi2000', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', - 'license': 'Standard YouTube License', 'formats': 'mincount:31', }, 'skip': 'not actual anymore', @@ -808,7 +792,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Airtek', 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', - 'license': 'Standard YouTube License', 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', }, 'params': { @@ -881,6 +864,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video is not available.', }, { # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536) @@ -917,7 +901,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'IronSoulElf', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 'uploader': 'IronSoulElf', - 'license': 'Standard YouTube License', 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', @@ -1021,13 +1004,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'iqKdEhx-dD4', 'ext': 'mp4', 'title': 'Isolation - Mind Field (Ep 1)', - 'description': 'md5:25b78d2f64ae81719f5c96319889b736', + 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f', 'duration': 2085, 'upload_date': '20170118', 'uploader': 'Vsauce', 'uploader_id': 'Vsauce', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', - 'license': 'Standard YouTube License', 'series': 'Mind Field', 'season_number': 1, 'episode_number': 1, @@ -1053,7 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'New Century Foundation', 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg', - 'license': 'Standard YouTube License', }, 'params': { 'skip_download': True, @@ -1081,6 +1062,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # DRM protected 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc', 'only_matching': True, + }, + { + # Video with unsupported adaptive stream type formats + 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U', + 'info_dict': { + 'id': 'Z4Vy8R84T1U', + 'ext': 'mp4', + 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'duration': 433, + 'upload_date': '20130923', + 'uploader': 'Amelia Putri Harwita', + 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q', + 'formats': 'maxcount:10', + }, + 'params': { + 'skip_download': True, + 'youtube_include_dash_manifest': False, + }, } ] From b0d73a745656443baf3e2134d9ca03bd3a4a934c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:20:10 +0700 Subject: [PATCH 0277/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3d60754c5..d25169074 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core ++ [test/helper] Add support for maxcount and count collection len checkers +* [downloader/hls] Fix uplynk ad skipping (#18824) +* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813) + +Extractors +* [youtube] Skip unsupported adaptive stream type (#18804) ++ [youtube] Extract DASH formats from player response (#18804) +* [funimation] Fix extraction (#14089) +* [skylinewebcams] Fix extraction (#18853) ++ [curiositystream] Add support for non app URLs ++ [bitchute] Check formats (#18833) +* [wistia] Extend URL regular expression (#18823) ++ [playplustv] Add support for playplus.com (#18789) + + version 2019.01.10 Core From bfc8eeea57251796d44f1e10b61d06760690bc25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Jan 2019 02:24:08 +0700 Subject: [PATCH 0278/1201] release 2019.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d6abdbcb7..650f78511 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.10 +[debug] youtube-dl version 2019.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d25169074..13019bf2b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.16 Core + [test/helper] Add support for maxcount and count collection len checkers diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5ba61f489..c13f3a38a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.10' +__version__ = '2019.01.16' From fa4ac365f69cbd51e4c9801984ebea49a12825b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 17 Jan 2019 10:24:44 +0700 Subject: [PATCH 0279/1201] [youtube] Extend JS player signature function name regexes (closes #18890, closes #18891, closes #18893) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 730935657..c8bf98b58 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1198,8 +1198,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') From f53cecd796dbb698abbde6ac2f7f973dba78f8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 17 Jan 2019 10:25:50 +0700 Subject: [PATCH 0280/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 13019bf2b..171034a75 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version <unreleased> + +Extractors +* [youtube] Extend JS player signature function name regular expressions + (#18890, #18891, #18893) + + version 2019.01.16 Core From 29639b363ddab7903ceae096912a0227c8017533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 17 Jan 2019 10:27:17 +0700 Subject: [PATCH 0281/1201] release 2019.01.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 650f78511..841bca914 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.16 +[debug] youtube-dl version 2019.01.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 171034a75..902301765 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.17 Extractors * [youtube] Extend JS player signature function name regular expressions diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c13f3a38a..ea3f62928 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.16' +__version__ = '2019.01.17' From 79fec976b0c250446ea9a9eb7323fb2045ee37fe Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Jan 2019 09:44:08 +0100 Subject: [PATCH 0282/1201] [vimeo] fix extraction for password protected player URLs(closes #18889) --- youtube_dl/extractor/vimeo.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5e15f060b..fd37f919b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 import json import re import itertools @@ -392,6 +393,22 @@ class VimeoIE(VimeoBaseInfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://player.vimeo.com/video/68375962', + 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', + 'info_dict': { + 'id': '68375962', + 'ext': 'mp4', + 'title': 'youtube-dl password protected test video', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', + 'uploader_id': 'user18948128', + 'uploader': 'Jaime Marquínez Ferrándiz', + 'duration': 10, + }, + 'params': { + 'videopassword': 'youtube-dl', + }, + }, { 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 'only_matching': True, @@ -452,7 +469,9 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') - data = urlencode_postdata({'password': password}) + data = urlencode_postdata({ + 'password': base64.b64encode(password.encode()), + }) pass_url = url + '/check-password' password_request = sanitized_Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') From e2dd132f054df5b6c09b7c274752a77d8ba44f8d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Jan 2019 23:56:37 +0100 Subject: [PATCH 0283/1201] [cartoonnetwork] fix extraction(closes #15664)(closes #17224) --- youtube_dl/extractor/cartoonnetwork.py | 56 +++++++++++++++++--------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 6aeebd7b3..48b33617f 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -1,20 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .turner import TurnerBaseIE +from ..utils import int_or_none class CartoonNetworkIE(TurnerBaseIE): _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' _TEST = { - 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html', + 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html', 'info_dict': { - 'id': '8a250ab04ed07e6c014ef3f1e2f9016c', + 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e', 'ext': 'mp4', - 'title': 'Starfire the Cat Lady', - 'description': 'Robin decides to become a cat so that Starfire will finally love him.', + 'title': 'How to Draw Upgrade', + 'description': 'md5:2061d83776db7e8be4879684eefe8c0f', }, 'params': { # m3u8 download @@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() - query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id - return self._extract_cvp_info( - 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', - 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile', - }, - }, { + + def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False): + metadata_re = '' + if content_re: + metadata_re = r'|video_metadata\.content_' + content_re + return self._search_regex( + r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + webpage, name, fatal=fatal) + + media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) + title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True) + + info = self._extract_ngtv_info( + media_id, {'networkId': 'cartoonnetwork'}, { 'url': url, 'site_name': 'CartoonNetwork', - 'auth_required': self._search_regex( - r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);', - webpage, 'auth required', default='false') == 'true', + 'auth_required': find_field('authType', 'auth type') != 'unauth', }) + + series = find_field( + 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage) + info.update({ + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'description': self._html_search_meta('description', webpage), + 'series': series, + 'episode': title, + }) + + for field in ('season', 'episode'): + field_name = field + 'Number' + info[field + '_number'] = int_or_none(find_field( + field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage)) + + return info From 2bfc1d9d68dec097fd8093dc0284dd0cd64beb2e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 19 Jan 2019 21:25:15 +0100 Subject: [PATCH 0284/1201] [extractor/common] imporove HLS video only format detection(closes #18923) --- youtube_dl/extractor/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9e7febcad..af621b74b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1596,6 +1596,7 @@ class InfoExtractor(object): # References: # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21 # 2. https://github.com/rg3/youtube-dl/issues/12211 + # 3. https://github.com/rg3/youtube-dl/issues/18923 # We should try extracting formats only from master playlists [1, 4.3.4], # i.e. playlists that describe available qualities. On the other hand @@ -1667,11 +1668,16 @@ class InfoExtractor(object): rendition = stream_group[0] return rendition.get('NAME') or stream_group_id + # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the + # chance to detect video only formats when EXT-X-STREAM-INF tags + # precede EXT-X-MEDIA tags in HLS manifest such as [3]. + for line in m3u8_doc.splitlines(): + if line.startswith('#EXT-X-MEDIA:'): + extract_media(line) + for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_stream_inf = parse_m3u8_attributes(line) - elif line.startswith('#EXT-X-MEDIA:'): - extract_media(line) elif line.startswith('#') or not line.strip(): continue else: From f28363ad1fb45b4450ae6f09ff9aff8f93227e40 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 19 Jan 2019 21:25:53 +0100 Subject: [PATCH 0285/1201] [ted] correct acodec for http formats(#18923) --- youtube_dl/extractor/ted.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d3e4205f5..645942dfd 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -265,6 +265,8 @@ class TEDIE(InfoExtractor): 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) + if f.get('acodec') == 'none': + del f['acodec'] formats.append(f) audio_download = talk_info.get('audioDownload') From 379306ef55b64c966392c072b17a450831fec252 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 19 Jan 2019 21:35:02 +0100 Subject: [PATCH 0286/1201] [extractor/common] fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index af621b74b..6e36e6778 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1668,7 +1668,7 @@ class InfoExtractor(object): rendition = stream_group[0] return rendition.get('NAME') or stream_group_id - # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the + # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the # chance to detect video only formats when EXT-X-STREAM-INF tags # precede EXT-X-MEDIA tags in HLS manifest such as [3]. for line in m3u8_doc.splitlines(): From 2cc779f497ae20d6e0e28fc546a25723cfea631a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 13:48:09 +0700 Subject: [PATCH 0287/1201] [YoutubeDL] Add negation support for string comparisons in format selection expressions (closes #18600, closes #18805) --- README.md | 3 ++- test/test_YoutubeDL.py | 46 +++++++++++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 9 +++++--- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 70bcfaccf..886696015 100644 --- a/README.md +++ b/README.md @@ -667,13 +667,14 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `asr`: Audio sampling rate in Hertz - `fps`: Frame rate -Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields: +Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format +Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f0f5a8470..df8994b84 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -239,6 +239,52 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + def test_format_selection_string_ops(self): + formats = [ + {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # equals (=) + ydl = YDL({'format': '[format_id=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not equal (!=) + ydl = YDL({'format': '[format_id!=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # starts with (^=) + ydl = YDL({'format': '[format_id^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not start with (!^=) + ydl = YDL({'format': '[format_id!^=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # ends with ($=) + ydl = YDL({'format': '[format_id$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not end with (!$=) + ydl = YDL({'format': '[format_id!$=abc-cba]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # contains (*=) + ydl = YDL({'format': '[format_id*=-]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=-]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + def test_youtube_format_selection(self): order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4493fd0e1..a827414dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1063,21 +1063,24 @@ class YoutubeDL(object): if not m: STR_OPERATORS = { '=': operator.eq, - '!=': operator.ne, '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id) - \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? + \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)? \s*(?P<value>[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) if m: comparison_value = m.group('value') - op = STR_OPERATORS[m.group('op')] + str_op = STR_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not str_op + else: + op = str_op if not m: raise ValueError('Invalid filter specification %r' % filter_spec) From 4e58d9fabbfef80b2ecc0d20959bdf9dd3705e73 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 20 Jan 2019 14:23:35 +0700 Subject: [PATCH 0288/1201] [README.md] Fix formatting --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 886696015..4ba982907 100644 --- a/README.md +++ b/README.md @@ -674,6 +674,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format + Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. From fc746c3fdd7d40935a11e72b5cb69a8aee840e94 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 20 Jan 2019 09:04:51 +0100 Subject: [PATCH 0289/1201] [test/test_InfoExtractor] add test for #18923 --- test/test_InfoExtractor.py | 59 ++++++++++++++++++++++++++++++- test/testdata/m3u8/ted_18923.m3u8 | 28 +++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 test/testdata/m3u8/ted_18923.m3u8 diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 06be72616..75fa0bbb7 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -497,7 +497,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1280, 'height': 720, }] - ) + ), + ( + # https://github.com/rg3/youtube-dl/issues/18923 + # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa + 'ted_18923', + 'http://hls.ted.com/talks/31241.m3u8', + [{ + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '600k-Audio', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '68', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '163', + 'acodec': 'none', + 'width': 320, + 'height': 180, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '481', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '769', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '984', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1255', + 'acodec': 'none', + 'width': 640, + 'height': 360, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1693', + 'acodec': 'none', + 'width': 853, + 'height': 480, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '2462', + 'acodec': 'none', + 'width': 1280, + 'height': 720, + }] + ), ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: diff --git a/test/testdata/m3u8/ted_18923.m3u8 b/test/testdata/m3u8/ted_18923.m3u8 new file mode 100644 index 000000000..52a27118b --- /dev/null +++ b/test/testdata/m3u8/ted_18923.m3u8 @@ -0,0 +1,28 @@ +#EXTM3U +#EXT-X-VERSION:4 +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360 +/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180 +/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288 +/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480 +/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720 +/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b +#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES +/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b + +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b" + +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400 From 15870747f02effba9376fd1a1500c32d36d1b811 Mon Sep 17 00:00:00 2001 From: aviperes <avipr24@gmail.com> Date: Sun, 20 Jan 2019 10:15:01 +0200 Subject: [PATCH 0290/1201] [odnoklassniki] Detect paid videos --- youtube_dl/extractor/odnoklassniki.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 190d8af4d..114b93c07 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -115,6 +115,10 @@ class OdnoklassnikiIE(InfoExtractor): }, { 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', 'only_matching': True, + }, { + # Paid video + 'url': 'https://ok.ru/video/954886983203', + 'only_matching': True, }] def _real_extract(self, url): @@ -244,6 +248,11 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'flv', }) + if not formats: + payment_info = metadata.get('paymentInfo') + if payment_info: + raise ExtractorError('This video is paid, subscribe to download it', expected=True) + self._sort_formats(formats) info['formats'] = formats From 31fbedc06a349bc555ab934588544f75734e3a55 Mon Sep 17 00:00:00 2001 From: jhwgh1968 <jhwgh1968@users.noreply.github.com> Date: Sun, 20 Jan 2019 09:10:46 +0000 Subject: [PATCH 0291/1201] [instagram] Add base extractor for playlists and tag extractor --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/instagram.py | 136 +++++++++++++++++++++-------- 2 files changed, 105 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de38c6641..24361def4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -494,7 +494,11 @@ from .ina import InaIE from .inc import IncIE from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE -from .instagram import InstagramIE, InstagramUserIE +from .instagram import ( + InstagramIE, + InstagramUserIE, + InstagramTagIE, +) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 7e0e838f0..ffd87b55f 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -227,44 +227,37 @@ class InstagramIE(InfoExtractor): } -class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' - IE_DESC = 'Instagram user profile' - IE_NAME = 'instagram:user' - _TEST = { - 'url': 'https://instagram.com/porsche', - 'info_dict': { - 'id': 'porsche', - 'title': 'porsche', - }, - 'playlist_count': 5, - 'params': { - 'extract_flat': True, - 'skip_download': True, - 'playlistend': 5, - } - } +class InstagramPlaylistIE(InfoExtractor): + # A superclass for handling any kind of query based on GraphQL which + # results in a playlist. - _gis_tmpl = None + _gis_tmpl = None # used to cache GIS request type - def _entries(self, data): + def _parse_graphql(self, webpage, item_id): + # Reads a webpage and returns its GraphQL data. + return self._parse_json( + self._search_regex( + r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), + item_id) + + def _extract_graphql(self, data, url): + # Parses GraphQL queries containing videos and generates a playlist. def get_count(suffix): return int_or_none(try_get( node, lambda x: x['edge_media_' + suffix]['count'])) - uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + uploader_id = self._match_id(url) csrf_token = data['config']['csrf_token'] rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' - self._set_cookie('instagram.com', 'ig_pr', '1') - cursor = '' for page_num in itertools.count(1): - variables = json.dumps({ - 'id': uploader_id, + variables = { 'first': 12, 'after': cursor, - }) + } + variables.update(self._query_vars_for(data)) + variables = json.dumps(variables) if self._gis_tmpl: gis_tmpls = [self._gis_tmpl] @@ -276,21 +269,26 @@ class InstagramUserIE(InfoExtractor): '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), ] + # try all of the ways to generate a GIS query, and not only use the + # first one that works, but cache it for future requests for gis_tmpl in gis_tmpls: try: - media = self._download_json( + json_data = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, 'Downloading JSON page %d' % page_num, headers={ 'X-Requested-With': 'XMLHttpRequest', 'X-Instagram-GIS': hashlib.md5( ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), }, query={ - 'query_hash': '42323d64886122307be10013ad2dcc44', + 'query_hash': self._QUERY_HASH, 'variables': variables, - })['data']['user']['edge_owner_to_timeline_media'] + }) + media = self._parse_timeline_from(json_data) self._gis_tmpl = gis_tmpl break except ExtractorError as e: + # if it's an error caused by a bad query, and there are + # more GIS templates to try, ignore it and keep trying if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if gis_tmpl != gis_tmpls[-1]: continue @@ -348,14 +346,80 @@ class InstagramUserIE(InfoExtractor): break def _real_extract(self, url): - username = self._match_id(url) + user_or_tag = self._match_id(url) + webpage = self._download_webpage(url, user_or_tag) + data = self._parse_graphql(webpage, user_or_tag) - webpage = self._download_webpage(url, username) - - data = self._parse_json( - self._search_regex( - r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'), - username) + self._set_cookie('instagram.com', 'ig_pr', '1') return self.playlist_result( - self._entries(data), username, username) + self._extract_graphql(data, url), user_or_tag, user_or_tag) + + +class InstagramUserIE(InstagramPlaylistIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' + IE_DESC = 'Instagram user profile' + IE_NAME = 'instagram:user' + _TEST = { + 'url': 'https://instagram.com/porsche', + 'info_dict': { + 'id': 'porsche', + 'title': 'porsche', + }, + 'playlist_count': 5, + 'params': { + 'extract_flat': True, + 'skip_download': True, + 'playlistend': 5, + } + } + + _QUERY_HASH = '42323d64886122307be10013ad2dcc44', + + @staticmethod + def _parse_timeline_from(data): + # extracts the media timeline data from a GraphQL result + return data['data']['user']['edge_owner_to_timeline_media'] + + @staticmethod + def _query_vars_for(data): + # returns a dictionary of variables to add to the timeline query based + # on the GraphQL of the original page + return { + 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + } + + +class InstagramTagIE(InstagramPlaylistIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)' + IE_DESC = 'Instagram hashtag search' + IE_NAME = 'instagram:tag' + _TEST = { + 'url': 'https://instagram.com/explore/tags/lolcats', + 'info_dict': { + 'id': 'lolcats', + 'title': 'lolcats', + }, + 'playlist_count': 50, + 'params': { + 'extract_flat': True, + 'skip_download': True, + 'playlistend': 50, + } + } + + _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', + + @staticmethod + def _parse_timeline_from(data): + # extracts the media timeline data from a GraphQL result + return data['data']['hashtag']['edge_hashtag_to_media'] + + @staticmethod + def _query_vars_for(data): + # returns a dictionary of variables to add to the timeline query based + # on the GraphQL of the original page + return { + 'tag_name': + data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] + } From 6ca3fa898cd066422daea3d5be53efdae22187d8 Mon Sep 17 00:00:00 2001 From: yonaikerlol <lawlietrs7@gmail.com> Date: Sun, 20 Jan 2019 05:24:21 -0400 Subject: [PATCH 0292/1201] [streamango] Add support for fruithosts.net --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index fcaa5ac0b..efb259f96 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -38,6 +38,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', 'only_matching': True, + }, { + 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', + 'only_matching': True, }] def _real_extract(self, url): From 289ef490f77cb35c43d98b9d2ea4cf529c24895e Mon Sep 17 00:00:00 2001 From: Anthony Fok <foka@debian.org> Date: Sun, 30 Dec 2018 02:44:40 -0700 Subject: [PATCH 0293/1201] [hketv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hketv.py | 185 +++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 youtube_dl/extractor/hketv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24361def4..574a47e6d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -452,6 +452,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hgtv import HGTVComShowIE +from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py new file mode 100644 index 000000000..b5790cdee --- /dev/null +++ b/youtube_dl/extractor/hketv.py @@ -0,0 +1,185 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + merge_dicts, + str_or_none, + str_to_int, + try_get, + unified_strdate, + urlencode_postdata, + urljoin, +) + + +class HKETVIE(InfoExtractor): + IE_NAME = 'hketv' + IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['HK'] + _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.hkedcity.net/etv/resource/2932360618', + 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7', + 'info_dict': { + 'id': '2932360618', + 'ext': 'mp4', + 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', + 'description': '本節目輯錄了「閱讀滿Fun嘉年華」和「二○一八響應世界閱讀日――悅愛閱讀・愈讀愈愛」的活動花絮,並由學者、作家、演藝界人士等,分享培養子女閱讀興趣和習慣的方法,以及呼籲大家一同分享閱讀的樂趣。', + 'upload_date': '20181024', + 'duration': 900, + 'subtitles': { + 'en': [{ + 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=en', + 'ext': 'srt', + }], + 'zh-Hant': [{ + 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=qmt', + 'ext': 'srt', + }], + } + }, + }, { + 'url': 'https://www.hkedcity.net/etv/resource/972641418', + 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', + 'info_dict': { + 'id': '972641418', + 'ext': 'mp4', + 'title': '衣冠楚楚 (天使系列之一)', + 'description': '天國仙境,有兩位可愛的天使小姐妹。她們對幾千年來天使衣著一成不變頗有不滿。她們下望人世間:只見人們穿著七彩繽紛、款式各異的服裝,漂亮極了。天使姐妹決定下凡考察衣著,以設計天使新裝。 下到人間,姐妹試穿各式各樣的衣著,引發連串奇特有趣的情節:她們穿著校服在街上閒逛時,被女警誤認為逃學而送回學校,到校後又被體育老師誤認為是新同學,匆匆忙忙換上運動服後在操場上大顯神通。她們穿著護士服在醫院散步時,又被誤認為當班護士,而投入追尋失蹤病童、治病救人的工作中去。姐妹倆還到過玩具店,與布娃娃們談論衣著。她們也去過服裝設計學校,被當成小模特兒而試穿各式服裝。最令姐妹倆興奮的是一場盛大的民族服裝表演會。身穿盛裝的十二個民族的少女在台上翩翩起舞,各種服飾七彩繽紛、美不勝收。姐妹們情不自禁地穿上民族服裝,小天使變成了少數民族姑娘……最後天使姐妹回到天上,對於天使究竟穿甚麼樣的衣服好,她們還是拿不定主意。 節目通過天使姐妹的奇特經歷,反復示範各式衣服鞋襪的正確讀音及談論衣著時的常用句式,並以盛大的民族服裝表演活動,帶出有關服裝的文化知識。內容豐富而饒有趣味。', + 'upload_date': '20070109', + 'duration': 907, + 'subtitles': {}, + }, + }] + + _CC_LANGS = { + '中文(繁體中文)': 'zh-Hant', + '中文(简体中文)': 'zh-Hans', + 'English': 'en', + 'Bahasa Indonesia': 'id', + '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi', + '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne', + 'Tagalog': 'tl', + '\u0e44\u0e17\u0e22': 'th', + '\u0627\u0631\u062f\u0648': 'ur', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('ed_title', webpage, fatal=True) + + file_id = self._html_search_regex(r'post_var\["file_id"\]\s*=\s*(.+?);', webpage, 'file ID') + curr_url = self._html_search_regex(r'post_var\["curr_url"\]\s*=\s*"(.+?)";', webpage, 'curr URL') + data = { + 'action': 'get_info', + 'curr_url': curr_url, + 'file_id': file_id, + 'video_url': file_id, + } + _APPS_BASE_URL = 'https://apps.hkedcity.net' + handler_url = _APPS_BASE_URL + '/media/play/handler.php' + + response = self._download_json( + handler_url, video_id, + data=urlencode_postdata(data), + headers=merge_dicts({'Content-Type': 'application/x-www-form-urlencoded'}, + self.geo_verification_headers())) + + result = response['result'] + + formats = [] + subtitles = {} + + if response.get('success') and response.get('access'): + width = int_or_none(result.get('width')) + height = int_or_none(result.get('height')) + + playlist0 = try_get(result, lambda x: x['playlist'][0], dict) + fmts = playlist0.get('sources') + for fmt in fmts: + file_path = fmt.get('file') + if file_path: + file_url = urljoin(_APPS_BASE_URL, file_path) + # If we ever wanted to provide the final resolved URL that + # does not require cookies, albeit with a shorter lifespan: + # urlh = self._downloader.urlopen(file_url) + # resolved_url = urlh.geturl() + + label = fmt.get('label') + w = None + h = None + if label == 'HD': + h = 720 + elif label == 'SD': + h = 360 + if h: + if width and height: + w = h * width // height + else: + w = h * 4 // 3 + + formats.append({ + 'format_id': label, + 'ext': fmt.get('type'), + 'url': file_url, + 'width': w, + 'height': h, + }) + + tracks = playlist0.get('tracks', []) + for track in tracks: + if not isinstance(track, dict): + continue + track_kind = str_or_none(track.get('kind')) + if not track_kind or not isinstance(track_kind, compat_str): + continue + if track_kind.lower() not in ('captions', 'subtitles'): + continue + track_url = urljoin(_APPS_BASE_URL, track.get('file')) + if not track_url: + continue + track_label = track.get('label') + subtitles.setdefault(self._CC_LANGS.get(track_label, track_label), []).append({ + 'url': self._proto_relative_url(track_url), + 'ext': 'srt', + }) + + else: + error = clean_html(response.get('access_err_msg')) + if 'Video streaming is not available in your country' in error: + self.raise_geo_restricted(msg=error, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError(error) + + # Likes + emotion = self._download_json( + 'https://emocounter.hkedcity.net/handler.php', + video_id, + data=urlencode_postdata({ + 'action': 'get_emotion', + 'data[bucket_id]': 'etv', + 'data[identifier]': video_id, + }), + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + fatal=False) + like_count = int_or_none(try_get(emotion, lambda x: x['data']['emotion_data'][0]['count'])) + + return { + 'id': video_id, + 'title': title, + 'description': self._html_search_meta('description', webpage, fatal=False), + 'upload_date': unified_strdate(self._html_search_meta('ed_date', webpage, fatal=False), day_first=False), + 'duration': int_or_none(result.get('length')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': urljoin(_APPS_BASE_URL, result.get('image')), + 'view_count': str_to_int(result.get('view_count')), + 'like_count': like_count, + } From 73c19aaa9f74e9383a7aaf0dfb3c608727d5b6b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 17:42:25 +0700 Subject: [PATCH 0294/1201] [hketv] Improve and simplify (closes #18696) --- youtube_dl/extractor/hketv.py | 180 ++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py index b5790cdee..b57927fc1 100644 --- a/youtube_dl/extractor/hketv.py +++ b/youtube_dl/extractor/hketv.py @@ -8,8 +8,8 @@ from ..utils import ( ExtractorError, int_or_none, merge_dicts, + parse_count, str_or_none, - str_to_int, try_get, unified_strdate, urlencode_postdata, @@ -30,20 +30,12 @@ class HKETVIE(InfoExtractor): 'id': '2932360618', 'ext': 'mp4', 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)', - 'description': '本節目輯錄了「閱讀滿Fun嘉年華」和「二○一八響應世界閱讀日――悅愛閱讀・愈讀愈愛」的活動花絮,並由學者、作家、演藝界人士等,分享培養子女閱讀興趣和習慣的方法,以及呼籲大家一同分享閱讀的樂趣。', + 'description': 'md5:d5286d05219ef50e0613311cbe96e560', 'upload_date': '20181024', 'duration': 900, - 'subtitles': { - 'en': [{ - 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=en', - 'ext': 'srt', - }], - 'zh-Hant': [{ - 'url': 'https://apps.hkedcity.net/media/mediaplayer/caption.php?f=74395&lang=qmt', - 'ext': 'srt', - }], - } + 'subtitles': 'count:2', }, + 'skip': 'Geo restricted to HK', }, { 'url': 'https://www.hkedcity.net/etv/resource/972641418', 'md5': '1ed494c1c6cf7866a8290edad9b07dc9', @@ -51,11 +43,15 @@ class HKETVIE(InfoExtractor): 'id': '972641418', 'ext': 'mp4', 'title': '衣冠楚楚 (天使系列之一)', - 'description': '天國仙境,有兩位可愛的天使小姐妹。她們對幾千年來天使衣著一成不變頗有不滿。她們下望人世間:只見人們穿著七彩繽紛、款式各異的服裝,漂亮極了。天使姐妹決定下凡考察衣著,以設計天使新裝。 下到人間,姐妹試穿各式各樣的衣著,引發連串奇特有趣的情節:她們穿著校服在街上閒逛時,被女警誤認為逃學而送回學校,到校後又被體育老師誤認為是新同學,匆匆忙忙換上運動服後在操場上大顯神通。她們穿著護士服在醫院散步時,又被誤認為當班護士,而投入追尋失蹤病童、治病救人的工作中去。姐妹倆還到過玩具店,與布娃娃們談論衣著。她們也去過服裝設計學校,被當成小模特兒而試穿各式服裝。最令姐妹倆興奮的是一場盛大的民族服裝表演會。身穿盛裝的十二個民族的少女在台上翩翩起舞,各種服飾七彩繽紛、美不勝收。姐妹們情不自禁地穿上民族服裝,小天使變成了少數民族姑娘……最後天使姐妹回到天上,對於天使究竟穿甚麼樣的衣服好,她們還是拿不定主意。 節目通過天使姐妹的奇特經歷,反復示範各式衣服鞋襪的正確讀音及談論衣著時的常用句式,並以盛大的民族服裝表演活動,帶出有關服裝的文化知識。內容豐富而饒有趣味。', + 'description': 'md5:10bb3d659421e74f58e5db5691627b0f', 'upload_date': '20070109', 'duration': 907, 'subtitles': {}, }, + 'params': { + 'geo_verification_proxy': '<HK proxy here>', + }, + 'skip': 'Geo restricted to HK', }] _CC_LANGS = { @@ -69,117 +65,127 @@ class HKETVIE(InfoExtractor): '\u0e44\u0e17\u0e22': 'th', '\u0627\u0631\u062f\u0648': 'ur', } + _FORMAT_HEIGHTS = { + 'SD': 360, + 'HD': 720, + } + _APPS_BASE_URL = 'https://apps.hkedcity.net' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('ed_title', webpage, fatal=True) - file_id = self._html_search_regex(r'post_var\["file_id"\]\s*=\s*(.+?);', webpage, 'file ID') - curr_url = self._html_search_regex(r'post_var\["curr_url"\]\s*=\s*"(.+?)";', webpage, 'curr URL') + title = ( + self._html_search_meta( + ('ed_title', 'search.ed_title'), webpage, default=None) or + self._search_regex( + r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'title', default=None, group='url') or + self._html_search_regex( + r'<h1>([^<]+)</h1>', webpage, 'title', default=None) or + self._og_search_title(webpage) + ) + + file_id = self._search_regex( + r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);', + webpage, 'file ID') + curr_url = self._search_regex( + r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";', + webpage, 'curr URL') data = { 'action': 'get_info', 'curr_url': curr_url, 'file_id': file_id, 'video_url': file_id, } - _APPS_BASE_URL = 'https://apps.hkedcity.net' - handler_url = _APPS_BASE_URL + '/media/play/handler.php' response = self._download_json( - handler_url, video_id, + self._APPS_BASE_URL + '/media/play/handler.php', video_id, data=urlencode_postdata(data), - headers=merge_dicts({'Content-Type': 'application/x-www-form-urlencoded'}, - self.geo_verification_headers())) + headers=merge_dicts({ + 'Content-Type': 'application/x-www-form-urlencoded'}, + self.geo_verification_headers())) result = response['result'] + if not response.get('success') or not response.get('access'): + error = clean_html(response.get('access_err_msg')) + if 'Video streaming is not available in your country' in error: + self.raise_geo_restricted( + msg=error, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError(error, expected=True) + formats = [] + + width = int_or_none(result.get('width')) + height = int_or_none(result.get('height')) + + playlist0 = result['playlist'][0] + for fmt in playlist0['sources']: + file_url = urljoin(self._APPS_BASE_URL, fmt.get('file')) + if not file_url: + continue + # If we ever wanted to provide the final resolved URL that + # does not require cookies, albeit with a shorter lifespan: + # urlh = self._downloader.urlopen(file_url) + # resolved_url = urlh.geturl() + label = fmt.get('label') + h = self._FORMAT_HEIGHTS.get(label) + w = h * width // height if h and width and height else None + formats.append({ + 'format_id': label, + 'ext': fmt.get('type'), + 'url': file_url, + 'width': w, + 'height': h, + }) + self._sort_formats(formats) + subtitles = {} - - if response.get('success') and response.get('access'): - width = int_or_none(result.get('width')) - height = int_or_none(result.get('height')) - - playlist0 = try_get(result, lambda x: x['playlist'][0], dict) - fmts = playlist0.get('sources') - for fmt in fmts: - file_path = fmt.get('file') - if file_path: - file_url = urljoin(_APPS_BASE_URL, file_path) - # If we ever wanted to provide the final resolved URL that - # does not require cookies, albeit with a shorter lifespan: - # urlh = self._downloader.urlopen(file_url) - # resolved_url = urlh.geturl() - - label = fmt.get('label') - w = None - h = None - if label == 'HD': - h = 720 - elif label == 'SD': - h = 360 - if h: - if width and height: - w = h * width // height - else: - w = h * 4 // 3 - - formats.append({ - 'format_id': label, - 'ext': fmt.get('type'), - 'url': file_url, - 'width': w, - 'height': h, - }) - - tracks = playlist0.get('tracks', []) - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = str_or_none(track.get('kind')) - if not track_kind or not isinstance(track_kind, compat_str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(_APPS_BASE_URL, track.get('file')) - if not track_url: - continue - track_label = track.get('label') - subtitles.setdefault(self._CC_LANGS.get(track_label, track_label), []).append({ + tracks = try_get(playlist0, lambda x: x['tracks'], list) or [] + for track in tracks: + if not isinstance(track, dict): + continue + track_kind = str_or_none(track.get('kind')) + if not track_kind or not isinstance(track_kind, compat_str): + continue + if track_kind.lower() not in ('captions', 'subtitles'): + continue + track_url = urljoin(self._APPS_BASE_URL, track.get('file')) + if not track_url: + continue + track_label = track.get('label') + subtitles.setdefault(self._CC_LANGS.get( + track_label, track_label), []).append({ 'url': self._proto_relative_url(track_url), 'ext': 'srt', }) - else: - error = clean_html(response.get('access_err_msg')) - if 'Video streaming is not available in your country' in error: - self.raise_geo_restricted(msg=error, countries=self._GEO_COUNTRIES) - else: - raise ExtractorError(error) - # Likes emotion = self._download_json( - 'https://emocounter.hkedcity.net/handler.php', - video_id, + 'https://emocounter.hkedcity.net/handler.php', video_id, data=urlencode_postdata({ 'action': 'get_emotion', 'data[bucket_id]': 'etv', 'data[identifier]': video_id, }), headers={'Content-Type': 'application/x-www-form-urlencoded'}, - fatal=False) - like_count = int_or_none(try_get(emotion, lambda x: x['data']['emotion_data'][0]['count'])) + fatal=False) or {} + like_count = int_or_none(try_get( + emotion, lambda x: x['data']['emotion_data'][0]['count'])) return { 'id': video_id, 'title': title, - 'description': self._html_search_meta('description', webpage, fatal=False), - 'upload_date': unified_strdate(self._html_search_meta('ed_date', webpage, fatal=False), day_first=False), + 'description': self._html_search_meta( + 'description', webpage, fatal=False), + 'upload_date': unified_strdate(self._html_search_meta( + 'ed_date', webpage, fatal=False), day_first=False), 'duration': int_or_none(result.get('length')), 'formats': formats, 'subtitles': subtitles, - 'thumbnail': urljoin(_APPS_BASE_URL, result.get('image')), - 'view_count': str_to_int(result.get('view_count')), + 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')), + 'view_count': parse_count(result.get('view_count')), 'like_count': like_count, } From a1a460759815414c6194bc921ac77a5533b6e02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 18:21:31 +0700 Subject: [PATCH 0295/1201] [vimeo] Fix video password verification for videos protected by Referer HTTP header --- youtube_dl/extractor/vimeo.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index fd37f919b..6215b3258 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -435,6 +435,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/160743502/abd0e13fb4', 'only_matching': True, } + # https://gettingthingsdone.com/workflowmap/ + # vimeo embed with check-password page protected by Referer header ] @staticmethod @@ -465,20 +467,22 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = VimeoIE._extract_urls(url, webpage) return urls[0] if urls else None - def _verify_player_video_password(self, url, video_id): + def _verify_player_video_password(self, url, video_id, headers): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') data = urlencode_postdata({ 'password': base64.b64encode(password.encode()), }) - pass_url = url + '/check-password' - password_request = sanitized_Request(pass_url, data) - password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - password_request.add_header('Referer', url) - return self._download_json( - password_request, video_id, - 'Verifying the password', 'Wrong password') + headers = merge_dicts(headers, { + 'Content-Type': 'application/x-www-form-urlencoded', + }) + checked = self._download_json( + url + '/check-password', video_id, + 'Verifying the password', data=data, headers=headers) + if checked is False: + raise ExtractorError('Wrong video password', expected=True) + return checked def _real_initialize(self): self._login() @@ -591,7 +595,7 @@ class VimeoIE(VimeoBaseInfoExtractor): cause=e) else: if config.get('view') == 4: - config = self._verify_player_video_password(redirect_url, video_id) + config = self._verify_player_video_password(redirect_url, video_id, headers) vod = config.get('video', {}).get('vod', {}) From 29cfcb43da8ac60e6c2eddad095a41c800d4d95a Mon Sep 17 00:00:00 2001 From: Alexandre Huot <alexandre.huot@usherbrooke.ca> Date: Sun, 20 Jan 2019 06:33:09 -0500 Subject: [PATCH 0296/1201] [radiocanada] Relax DRM check --- youtube_dl/extractor/radiocanada.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index b952e59b4..302f67d96 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -49,6 +49,16 @@ class RadioCanadaIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, + { + # with protectionType but not actually DRM protected + 'url': 'radiocanada:toutv:140872', + 'info_dict': { + 'id': '140872', + 'title': 'Épisode 1', + 'series': 'District 31', + }, + 'only_matching': True, } ] @@ -67,8 +77,10 @@ class RadioCanadaIE(InfoExtractor): el = find_xpath_attr(metadata, './/Meta', 'name', name) return el.text if el is not None else None + # protectionType does not necessarily mean the video is DRM protected (see + # https://github.com/rg3/youtube-dl/pull/18609). if get_meta('protectionType'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_warning('This video is probably DRM protected.') device_types = ['ipad'] if not smuggled_data: From 6945b9e78f38284eb4e440b7badea2fc60b66c2f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 20 Jan 2019 13:31:41 +0100 Subject: [PATCH 0297/1201] [extractor/common] improve jwplayer relative url handling(closes #18892) --- youtube_dl/extractor/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6e36e6778..95456b291 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2630,7 +2630,7 @@ class InfoExtractor(object): 'id': this_video_id, 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, @@ -2657,12 +2657,9 @@ class InfoExtractor(object): for source in jwplayer_sources_data: if not isinstance(source, dict): continue - source_url = self._proto_relative_url(source.get('file')) - if not source_url: - continue - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - if source_url in urls: + source_url = urljoin( + base_url, self._proto_relative_url(source.get('file'))) + if not source_url or source_url in urls: continue urls.append(source_url) source_type = source.get('type') or '' From fad4ceb53404227f471af2f3544c4c14a5df4acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 20:21:24 +0700 Subject: [PATCH 0298/1201] [utils] Fix urljoin for paths with non-http(s) schemes --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9e28e008f..409482c3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') + self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') def test_url_or_none(self): self.assertEqual(url_or_none(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2d3c1a9f..d0cb65814 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1868,7 +1868,7 @@ def urljoin(base, path): path = path.decode('utf-8') if not isinstance(path, compat_str) or not path: return None - if re.match(r'^(?:https?:)?//', path): + if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): base = base.decode('utf-8') From 07f9febc4b86a9cd819329f3a7daafdbe9455f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jan 2019 22:07:01 +0700 Subject: [PATCH 0299/1201] [tnaflix] Pass Referer in metadata request (closes #18925) --- youtube_dl/extractor/tnaflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 6798ef4c3..b3573c6e0 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -96,7 +96,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', - transform_source=fix_xml_ampersands) + transform_source=fix_xml_ampersands, headers={'Referer': url}) formats = [] From 19d6991312405f5af108af28b3721966720fc72d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Jan 2019 03:03:53 +0700 Subject: [PATCH 0300/1201] [videomore] Improve extraction and fix season extractor (closes #18908) --- youtube_dl/extractor/videomore.py | 96 ++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 9b56630de..e3eda3327 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -4,8 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, + orderedSet, + parse_duration, + str_or_none, + unified_strdate, + url_or_none, xpath_element, xpath_text, ) @@ -13,7 +19,19 @@ from ..utils import ( class VideomoreIE(InfoExtractor): IE_NAME = 'videomore' - _VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)' + _VALID_URL = r'''(?x) + videomore:(?P<sid>\d+)$| + https?://(?:player\.)?videomore\.ru/ + (?: + (?: + embed| + [^/]+/[^/]+ + )/| + [^/]*\?.*?\btrack_id= + ) + (?P<id>\d+) + (?:[/?#&]|\.(?:xml|json)|$) + ''' _TESTS = [{ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', 'md5': '44455a346edc0d509ac5b5a5b531dc35', @@ -79,6 +97,9 @@ class VideomoreIE(InfoExtractor): }, { 'url': 'videomore:367617', 'only_matching': True, + }, { + 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=', + 'only_matching': True, }] @staticmethod @@ -136,7 +157,7 @@ class VideomoreIE(InfoExtractor): class VideomoreVideoIE(InfoExtractor): IE_NAME = 'videomore:video' - _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)[/?#&]*$' + _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$' _TESTS = [{ # single video with og:video:iframe 'url': 'http://videomore.ru/elki_3', @@ -176,6 +197,9 @@ class VideomoreVideoIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so', + 'only_matching': True, }] @classmethod @@ -196,13 +220,16 @@ class VideomoreVideoIE(InfoExtractor): r'track-id=["\'](\d+)', r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id') video_url = 'videomore:%s' % video_id + else: + video_id = None - return self.url_result(video_url, VideomoreIE.ie_key()) + return self.url_result( + video_url, ie=VideomoreIE.ie_key(), video_id=video_id) class VideomoreSeasonIE(InfoExtractor): IE_NAME = 'videomore:season' - _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)[/?#&]*$' + _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$' _TESTS = [{ 'url': 'http://videomore.ru/molodezhka/sezon_promo', 'info_dict': { @@ -210,8 +237,16 @@ class VideomoreSeasonIE(InfoExtractor): 'title': 'Молодежка Промо', }, 'playlist_mincount': 12, + }, { + 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url)) + else super(VideomoreSeasonIE, cls).suitable(url)) + def _real_extract(self, url): display_id = self._match_id(url) @@ -219,9 +254,54 @@ class VideomoreSeasonIE(InfoExtractor): title = self._og_search_title(webpage) - entries = [ - self.url_result(item) for item in re.findall( - r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' - % display_id, webpage)] + data = self._parse_json( + self._html_search_regex( + r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1', + webpage, 'data', default='{}', group='value'), + display_id, fatal=False) + + entries = [] + + if data: + episodes = data.get('episodes') + if isinstance(episodes, list): + for ep in episodes: + if not isinstance(ep, dict): + continue + ep_id = int_or_none(ep.get('id')) + ep_url = url_or_none(ep.get('url')) + if ep_id: + e = { + 'url': 'videomore:%s' % ep_id, + 'id': compat_str(ep_id), + } + elif ep_url: + e = {'url': ep_url} + else: + continue + e.update({ + '_type': 'url', + 'ie_key': VideomoreIE.ie_key(), + 'title': str_or_none(ep.get('title')), + 'thumbnail': url_or_none(ep.get('image')), + 'duration': parse_duration(ep.get('duration')), + 'episode_number': int_or_none(ep.get('number')), + 'upload_date': unified_strdate(ep.get('date')), + }) + entries.append(e) + + if not entries: + entries = [ + self.url_result( + 'videomore:%s' % video_id, ie=VideomoreIE.ie_key(), + video_id=video_id) + for video_id in orderedSet(re.findall( + r':(?:id|key)=["\'](\d+)["\']', webpage))] + + if not entries: + entries = [ + self.url_result(item) for item in re.findall( + r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' + % display_id, webpage)] return self.playlist_result(entries, display_id, title) From 4b85f0f9db9329ef1774a66c3e2fd4da558a5201 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Jan 2019 14:38:29 +0100 Subject: [PATCH 0301/1201] [vrv] add support for authentication(closes #14307) --- youtube_dl/extractor/vrv.py | 95 ++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 483a3be3a..014513051 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -11,10 +11,12 @@ import time from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_parse_urlencode, compat_urllib_parse, ) from ..utils import ( + ExtractorError, float_or_none, int_or_none, ) @@ -24,29 +26,41 @@ class VRVBaseIE(InfoExtractor): _API_DOMAIN = None _API_PARAMS = {} _CMS_SIGNING = {} + _TOKEN = None + _TOKEN_SECRET = '' def _call_api(self, path, video_id, note, data=None): + # https://tools.ietf.org/html/rfc5849#section-3 base_url = self._API_DOMAIN + '/core/' + path - encoded_query = compat_urllib_parse_urlencode({ + query = { 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'oauth_signature_method': 'HMAC-SHA1', 'oauth_timestamp': int(time.time()), - 'oauth_version': '1.0', - }) + } + if self._TOKEN: + query['oauth_token'] = self._TOKEN + encoded_query = compat_urllib_parse_urlencode(query) headers = self.geo_verification_headers() if data: data = json.dumps(data).encode() headers['Content-Type'] = 'application/json' - method = 'POST' if data else 'GET' - base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')]) + base_string = '&'.join([ + 'POST' if data else 'GET', + compat_urllib_parse.quote(base_url, ''), + compat_urllib_parse.quote(encoded_query, '')]) oauth_signature = base64.b64encode(hmac.new( - (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), + (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), base_string.encode(), hashlib.sha1).digest()).decode() encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') - return self._download_json( - '?'.join([base_url, encoded_query]), video_id, - note='Downloading %s JSON metadata' % note, headers=headers, data=data) + try: + return self._download_json( + '?'.join([base_url, encoded_query]), video_id, + note='Downloading %s JSON metadata' % note, headers=headers, data=data) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True) + raise def _call_cms(self, path, video_id, note): if not self._CMS_SIGNING: @@ -55,19 +69,22 @@ class VRVBaseIE(InfoExtractor): self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) - def _set_api_params(self, webpage, video_id): - if not self._API_PARAMS: - self._API_PARAMS = self._parse_json(self._search_regex( - r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>', - webpage, 'api config'), video_id)['cxApiParams'] - self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') - def _get_cms_resource(self, resource_key, video_id): return self._call_api( 'cms_resource', video_id, 'resource path', data={ 'resource_key': resource_key, })['__links__']['cms_resource']['href'] + def _real_initialize(self): + webpage = self._download_webpage( + 'https://vrv.co/', None, headers=self.geo_verification_headers()) + self._API_PARAMS = self._parse_json(self._search_regex( + [ + r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)', + r'window\.__APP_CONFIG__\s*=\s*({.+})' + ], webpage, 'app config'), None)['cxApiParams'] + self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') + class VRVIE(VRVBaseIE): IE_NAME = 'vrv' @@ -86,6 +103,22 @@ class VRVIE(VRVBaseIE): 'skip_download': True, }, }] + _NETRC_MACHINE = 'vrv' + + def _real_initialize(self): + super(VRVIE, self)._real_initialize() + + email, password = self._get_login_info() + if email is None: + return + + token_credentials = self._call_api( + 'authenticate/by:credentials', None, 'Token Credentials', data={ + 'email': email, + 'password': password, + }) + self._TOKEN = token_credentials['oauth_token'] + self._TOKEN_SECRET = token_credentials['oauth_token_secret'] def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): if not url or stream_format not in ('hls', 'dash'): @@ -116,28 +149,16 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, - headers=self.geo_verification_headers()) - media_resource = self._parse_json(self._search_regex( - [ - r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:</script>|;)', - r'window\.__INITIAL_STATE__\s*=\s*({.+})' - ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} - video_data = media_resource.get('json') - if not video_data: - self._set_api_params(webpage, video_id) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + episode_path = self._get_cms_resource( + 'cms:/episodes/' + video_id, video_id) + video_data = self._call_cms(episode_path, video_id, 'video') title = video_data['title'] - streams_json = media_resource.get('streams', {}).get('json', {}) - if not streams_json: - self._set_api_params(webpage, video_id) - streams_path = video_data['__links__']['streams']['href'] - streams_json = self._call_cms(streams_path, video_id, 'streams') + streams_path = video_data['__links__'].get('streams', {}).get('href') + if not streams_path: + self.raise_login_required() + streams_json = self._call_cms(streams_path, video_id, 'streams') audio_locale = streams_json.get('audio_locale') formats = [] @@ -202,11 +223,7 @@ class VRVSeriesIE(VRVBaseIE): def _real_extract(self, url): series_id = self._match_id(url) - webpage = self._download_webpage( - url, series_id, - headers=self.geo_verification_headers()) - self._set_api_params(webpage, series_id) seasons_path = self._get_cms_resource( 'cms:/seasons?series_id=' + series_id, series_id) seasons_data = self._call_cms(seasons_path, series_id, 'seasons') From 503b604a316837b9dd6ef32045e4e9bbfb6a1363 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Jan 2019 18:21:37 +0100 Subject: [PATCH 0302/1201] [vrv] fix oauth signing for python 2(#14307) --- youtube_dl/extractor/vrv.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 014513051..6c060ae76 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -32,14 +32,14 @@ class VRVBaseIE(InfoExtractor): def _call_api(self, path, video_id, note, data=None): # https://tools.ietf.org/html/rfc5849#section-3 base_url = self._API_DOMAIN + '/core/' + path - query = { - 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], - 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), - 'oauth_signature_method': 'HMAC-SHA1', - 'oauth_timestamp': int(time.time()), - } + query = [ + ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), + ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), + ('oauth_signature_method', 'HMAC-SHA1'), + ('oauth_timestamp', int(time.time())), + ] if self._TOKEN: - query['oauth_token'] = self._TOKEN + query.append(('oauth_token', self._TOKEN)) encoded_query = compat_urllib_parse_urlencode(query) headers = self.geo_verification_headers() if data: From 278d061a0c5eae20963c0a6df4b9b13fd1537186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 03:51:29 +0700 Subject: [PATCH 0303/1201] [pornhub] Bypass scrape detection (closes #5930) --- youtube_dl/extractor/pornhub.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e377de196..f5f3e6593 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -10,7 +10,9 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_str, + compat_urllib_request, ) +from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, @@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r'<body\b[^>]+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + @staticmethod def _extract_urls(webpage): return re.findall( From 6510a3aa971c00525969040ad654249c0c73f125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 03:55:41 +0700 Subject: [PATCH 0304/1201] [crunchyroll] Extend _VALID_URL (closes #18955) --- youtube_dl/extractor/crunchyroll.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4a68d092b..5e2cbe41d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -144,7 +144,7 @@ class CrunchyrollBaseIE(InfoExtractor): class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): IE_NAME = 'crunchyroll' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { @@ -269,6 +269,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): }, { 'url': 'http://www.crunchyroll.com/media-723735', 'only_matching': True, + }, { + 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921', + 'only_matching': True, }] _FORMAT_IDS = { From 71a1f61700789fb0d61fc6ad9681b6f0899d2f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 04:12:06 +0700 Subject: [PATCH 0305/1201] [pornhub] Apply scrape detection bypass for all extractors --- youtube_dl/extractor/pornhub.py | 46 +++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index f5f3e6593..be93d5d48 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -24,7 +24,29 @@ from ..utils import ( ) -class PornHubIE(InfoExtractor): +class PornHubBaseIE(InfoExtractor): + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r'<body\b[^>]+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + +class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' _VALID_URL = r'''(?x) https?:// @@ -128,26 +150,6 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] - def _download_webpage_handle(self, *args, **kwargs): - def dl(*args, **kwargs): - return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs) - - webpage, urlh = dl(*args, **kwargs) - - if any(re.search(p, webpage) for p in ( - r'<body\b[^>]+\bonload=["\']go\(\)', - r'document\.cookie\s*=\s*["\']RNKEY=', - r'document\.location\.reload\(true\)')): - url_or_request = args[0] - url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) - else url_or_request) - phantom = PhantomJSwrapper(self, required_version='2.0') - phantom.get(url, html=webpage) - webpage, urlh = dl(*args, **kwargs) - - return webpage, urlh - @staticmethod def _extract_urls(webpage): return re.findall( @@ -329,7 +331,7 @@ class PornHubIE(InfoExtractor): } -class PornHubPlaylistBaseIE(InfoExtractor): +class PornHubPlaylistBaseIE(PornHubBaseIE): def _extract_entries(self, webpage, host): # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see From 0670bdd8f2bca39fdeadc63e1cd53b5d5b3e638c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 04:43:55 +0700 Subject: [PATCH 0306/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 902301765..687796a0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version <unreleased> + +Core +* [utils] Fix urljoin for paths with non-http(s) schemes +* [extractor/common] Improve jwplayer relative URL handling (#18892) ++ [YoutubeDL] Add negation support for string comparisons in format selection + expressions (#18600, #18805) +* [extractor/common] Improve HLS video-only format detection (#18923) + +Extractors +* [crunchyroll] Extend URL regular expression (#18955) +* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722, + #17197, #18338 #18842, #18899) ++ [vrv] Add support for authentication (#14307) +* [videomore:season] Fix extraction +* [videomore] Improve extraction (#18908) ++ [tnaflix] Pass Referer in metadata request (#18925) +* [radiocanada] Relax DRM check (#18608, #18609) +* [vimeo] Fix video password verification for videos protected by + Referer HTTP header ++ [hketv] Add support for hkedcity.net (#18696) ++ [streamango] Add support for fruithosts.net (#18710) ++ [instagram] Add support for tags (#18757) ++ [odnoklassniki] Detect paid videos (#18876) +* [ted] Correct acodec for HTTP formats (#18923) +* [cartoonnetwork] Fix extraction (#15664, #17224) +* [vimeo] Fix extraction for password protected player URLs (#18889) + + version 2019.01.17 Extractors From 435e382423f860aca82a58d7c3db58cbfa242b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 23 Jan 2019 04:46:55 +0700 Subject: [PATCH 0307/1201] release 2019.01.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 841bca914..db3ebaeed 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.17*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.17 +[debug] youtube-dl version 2019.01.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 687796a0e..55ad44315 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.23 Core * [utils] Fix urljoin for paths with non-http(s) schemes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c01409419..d759d0273 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -361,6 +361,7 @@ - **hitbox** - **hitbox:live** - **HitRecord** + - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - **HornBunny** - **HotNewHipHop** - **hotstar** @@ -386,6 +387,7 @@ - **IndavideoEmbed** - **InfoQ** - **Instagram** + - **instagram:tag**: Instagram hashtag search - **instagram:user**: Instagram user profile - **Internazionale** - **InternetVideoArchive** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea3f62928..d77949eed 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.17' +__version__ = '2019.01.23' From e118a8794ffe5a3a414afd489726f34d753b0b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 01:34:41 +0700 Subject: [PATCH 0308/1201] [YoutubeDL] Fix typo in string negation implementation and add more tests (closes #18961) --- test/test_YoutubeDL.py | 30 +++++++++++++++++++++++++++--- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index df8994b84..1d7452744 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -242,6 +242,7 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_string_ops(self): formats = [ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, ] info_dict = _make_result(formats) @@ -253,6 +254,11 @@ class TestFormatSelection(unittest.TestCase): # does not equal (!=) ydl = YDL({'format': '[format_id!=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # starts with (^=) @@ -262,7 +268,12 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'abc-cba') # does not start with (!^=) - ydl = YDL({'format': '[format_id!^=abc-cba]'}) + ydl = YDL({'format': '[format_id!^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # ends with ($=) @@ -272,16 +283,29 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'abc-cba') # does not end with (!$=) - ydl = YDL({'format': '[format_id!$=abc-cba]'}) + ydl = YDL({'format': '[format_id!$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # contains (*=) - ydl = YDL({'format': '[format_id*=-]'}) + ydl = YDL({'format': '[format_id*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + ydl = YDL({'format': '[format_id!*=-]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a827414dc..80ed8d7e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1078,7 +1078,7 @@ class YoutubeDL(object): comparison_value = m.group('value') str_op = STR_OPERATORS[m.group('op')] if m.group('negation'): - op = lambda attr, value: not str_op + op = lambda attr, value: not str_op(attr, value) else: op = str_op From 7d311586eda9eae9430da3f6d18932d79127daa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 01:44:09 +0700 Subject: [PATCH 0309/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 55ad44315..a28c6e951 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [YoutubeDL] Fix negation for string operators in format selection (#18961) + + version 2019.01.23 Core From a1e171233d86a064865353cc820969c10cb1f251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 01:46:23 +0700 Subject: [PATCH 0310/1201] release 2019.01.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index db3ebaeed..63aefe013 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.23 +[debug] youtube-dl version 2019.01.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a28c6e951..1fda747bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.24 Core * [YoutubeDL] Fix negation for string operators in format selection (#18961) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d77949eed..18c1f8d4c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.23' +__version__ = '2019.01.24' From 9713d1d1e0a7eff5c1b9873a2f4f054111a568ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Jan 2019 02:30:12 +0700 Subject: [PATCH 0311/1201] [openload] Add support for oload.club (closes #18969) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index cf51e4770..b713e78b8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club) ) )/ (?:f|embed)/ @@ -334,6 +334,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.fun/f/gb6G1H4sHXY', 'only_matching': True, + }, { + 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 118afcf52ff23726e5f0c436083710f5c63230fa Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 23 Jan 2019 22:16:21 +0100 Subject: [PATCH 0312/1201] [go] fix adobe pass requests for Disney Now(closes #18901) --- youtube_dl/extractor/go.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index ec9dd6e3a..206d89e82 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -25,15 +25,15 @@ class GoIE(AdobePassIE): }, 'watchdisneychannel': { 'brand': '004', - 'requestor_id': 'Disney', + 'resource_id': 'Disney', }, 'watchdisneyjunior': { 'brand': '008', - 'requestor_id': 'DisneyJunior', + 'resource_id': 'DisneyJunior', }, 'watchdisneyxd': { 'brand': '009', - 'requestor_id': 'DisneyXD', + 'resource_id': 'DisneyXD', } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ @@ -130,8 +130,8 @@ class GoIE(AdobePassIE): 'device': '001', } if video_data.get('accesslevel') == '1': - requestor_id = site_info['requestor_id'] - resource = self._get_mvpd_resource( + requestor_id = site_info.get('requestor_id', 'DisneyChannels') + resource = site_info.get('resource_id') or self._get_mvpd_resource( requestor_id, title, video_id, None) auth = self._extract_mvpd_auth( url, video_id, requestor_id, resource) From eb35b163adf61f8ff0ee6c504e98bc94db16e705 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 24 Jan 2019 20:23:04 +0100 Subject: [PATCH 0313/1201] [postprocessor/ffmpeg] fallback to ffmpeg/avconv for audio codec detection(closes #681) --- youtube_dl/postprocessor/ffmpeg.py | 53 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 39a905380..b952b0970 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -9,9 +9,6 @@ import re from .common import AudioConversionError, PostProcessor -from ..compat import ( - compat_subprocess_get_DEVNULL, -) from ..utils import ( encodeArgument, encodeFilename, @@ -165,27 +162,45 @@ class FFmpegPostProcessor(PostProcessor): return self._paths[self.probe_basename] def get_audio_codec(self, path): - if not self.probe_available: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + if not self.probe_available and not self.available: + raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.') try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self.probe_available: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams')] + else: + cmd = [ + encodeFilename(self.executable, True), + encodeArgument('-i')] + cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: + self._downloader.to_screen( + '[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen( + cmd, stderr=subprocess.PIPE, + stdout=subprocess.PIPE, stdin=subprocess.PIPE) + stdout_data, stderr_data = handle.communicate() + expected_ret = 0 if self.probe_available else 1 + if handle.wait() != expected_ret: return None except (IOError, OSError): return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec + output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore') + if self.probe_available: + audio_codec = None + for line in output.split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + else: + # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME + mobj = re.search( + r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)', + output) + if mobj: + return mobj.group(1) return None def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): From 0eba178fce80923515f4a9ac411e46648a19d78c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Jan 2019 04:04:58 +0700 Subject: [PATCH 0314/1201] [nhk] Extend _VALID_URL (closes #18968) --- youtube_dl/extractor/nhk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 5c8cd76dc..d4acbcc3e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -5,8 +5,8 @@ from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/(?:vod|ondemand)/(?P<id>[^/]+/[^/?#&]+)' + _TESTS = [{ # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', @@ -19,7 +19,10 @@ class NhkVodIE(InfoExtractor): 'episode': 'The Kimono as Global Fashion', }, 'skip': 'Videos available only for a limited period of time', - } + }, { + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', + 'only_matching': True, + }] _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): From 1602a240a7742f9e0a02b3f0effd215d00d859f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Jan 2019 04:16:49 +0700 Subject: [PATCH 0315/1201] [drtv] Fix extraction (closes #18989) --- youtube_dl/extractor/drtv.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index f757745ba..8d63ca433 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -77,10 +77,9 @@ class DRTVIE(InfoExtractor): r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') - programcard = self._download_json( - 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, - video_id, 'Downloading video JSON') - data = programcard['Data'][0] + data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id, + video_id, 'Downloading video JSON', query={'expanded': 'true'}) title = remove_end(self._og_search_title( webpage, default=None), ' | TV | DR') or data['Title'] @@ -97,7 +96,7 @@ class DRTVIE(InfoExtractor): formats = [] subtitles = {} - for asset in data['Assets']: + for asset in [data['PrimaryAsset']]: kind = asset.get('Kind') if kind == 'Image': thumbnail = asset.get('Uri') From ae18d58297c16a300a89693360efd19be4a97e92 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 25 Jan 2019 11:01:27 +0100 Subject: [PATCH 0316/1201] [usatoday] fix extraction for videos with custom brightcove partner id(closes #18990) --- youtube_dl/extractor/usatoday.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py index e5678dc78..b2103448d 100644 --- a/youtube_dl/extractor/usatoday.py +++ b/youtube_dl/extractor/usatoday.py @@ -3,21 +3,23 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, get_element_by_attribute, parse_duration, + try_get, update_url_query, - ExtractorError, ) from ..compat import compat_str class USATodayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)' - _TEST = { + _TESTS = [{ + # Brightcove Partner ID = 29906170001 'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/', - 'md5': '4d40974481fa3475f8bccfd20c5361f8', + 'md5': '033587d2529dc3411a1ab3644c3b8827', 'info_dict': { - 'id': '81729424', + 'id': '4799374959001', 'ext': 'mp4', 'title': 'US, France warn Syrian regime ahead of new peace talks', 'timestamp': 1457891045, @@ -25,8 +27,20 @@ class USATodayIE(InfoExtractor): 'uploader_id': '29906170001', 'upload_date': '20160313', } - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s' + }, { + # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001 + 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/', + 'info_dict': { + 'id': '5824495846001', + 'ext': 'mp4', + 'title': 'Yellowstone more likely to crack rather than explode', + 'timestamp': 1534790612, + 'description': 'md5:3715e7927639a4f16b474e9391687c62', + 'uploader_id': '28911775001', + 'upload_date': '20180820', + } + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) @@ -35,10 +49,11 @@ class USATodayIE(InfoExtractor): if not ui_video_data: raise ExtractorError('no video on the webpage', expected=True) video_data = self._parse_json(ui_video_data, display_id) + item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {} return { '_type': 'url_transparent', - 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'], + 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']), 'id': compat_str(video_data['id']), 'title': video_data['title'], 'thumbnail': video_data.get('thumbnail'), From 252abb1e8b881aa9d3942c436711ac33235b37cd Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sat, 26 Jan 2019 15:29:19 +0700 Subject: [PATCH 0317/1201] [README.md] Mention more convenience extraction functions --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ba982907..c1572f771 100644 --- a/README.md +++ b/README.md @@ -1213,7 +1213,7 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` -### Use safe conversion functions +### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. @@ -1221,6 +1221,8 @@ Use `url_or_none` for safe URL processing. Use `try_get` for safe metadata extraction from parsed JSON. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples From 845333acf6280761d19f91b3e018c418d922a0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 04:14:54 +0700 Subject: [PATCH 0318/1201] [wakanim] Add extractor (closes #14374) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wakanim.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/wakanim.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 574a47e6d..2ffcffa9e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1373,6 +1373,7 @@ from .vuclip import VuClipIE from .vvvvid import VVVVIDIE from .vyborymos import VyboryMosIE from .vzaar import VzaarIE +from .wakanim import WakanimIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py new file mode 100644 index 000000000..1d588bdd6 --- /dev/null +++ b/youtube_dl/extractor/wakanim.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + merge_dicts, + urljoin, +) + + +class WakanimIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' + _TEST = { + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', + 'info_dict': { + 'id': '2997', + 'ext': 'mp4', + 'title': 'Episode 02', + 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d', + 'series': 'The Asterisk War (OmU.)', + 'season_number': 1, + 'episode': 'Episode 02', + 'episode_number': 2, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + m3u8_url = urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url', + group='url')) + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + info = self._search_json_ld(webpage, video_id, default={}) + + title = self._search_regex( + (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, group='title') + + return merge_dicts(info, { + 'id': video_id, + 'title': title, + 'formats': formats, + }) From 458fd30f56785d514862dcb8a604a329d8e29ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 04:36:58 +0700 Subject: [PATCH 0319/1201] [extractor/common] Extract season in _json_ld --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95456b291..c4ea2882f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1249,7 +1249,10 @@ class InfoExtractor(object): info['title'] = episode_name part_of_season = e.get('partOfSeason') if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): - info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) + info.update({ + 'season': unescapeHTML(part_of_season.get('name')), + 'season_number': int_or_none(part_of_season.get('seasonNumber')), + }) part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) From 30cd1a5f3920d7485225a5d57b6ce41be4cde672 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Jan 2019 22:52:54 +0100 Subject: [PATCH 0320/1201] [wakanim] detect DRM protected videos --- youtube_dl/extractor/wakanim.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py index 1d588bdd6..f9a2395d9 100644 --- a/youtube_dl/extractor/wakanim.py +++ b/youtube_dl/extractor/wakanim.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, merge_dicts, urljoin, ) @@ -10,7 +11,7 @@ from ..utils import ( class WakanimIE(InfoExtractor): _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', 'info_dict': { 'id': '2997', @@ -26,7 +27,11 @@ class WakanimIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, - } + }, { + # DRM Protected + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -36,6 +41,12 @@ class WakanimIE(InfoExtractor): m3u8_url = urljoin(url, self._search_regex( r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url', group='url')) + # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls + encryption = self._search_regex( + r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', + m3u8_url, 'encryption', default=None) + if encryption and encryption in ('cenc', 'cbcs-aapl'): + raise ExtractorError('This video is DRM protected.', expected=True) formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', From 1fcc91663bc84a599cc613ff1fa0e4bc15f42a9e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Jan 2019 10:53:38 +0100 Subject: [PATCH 0321/1201] [vice] fix extraction for locked videos(closes #16248) --- youtube_dl/extractor/vice.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 538258617..8fdfd743d 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -94,7 +94,6 @@ class ViceIE(AdobePassIE): 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1', 'only_matching': True, }] - _PREPLAY_HOST = 'vms.vice' @staticmethod def _extract_urls(webpage): @@ -158,9 +157,8 @@ class ViceIE(AdobePassIE): }) try: - host = 'www.viceland' if is_locked else self._PREPLAY_HOST preplay = self._download_json( - 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id), + 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id), video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401): From bf8ebc9cfe1ae2b62baf2116f84748db03c4df7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 21:25:43 +0700 Subject: [PATCH 0322/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1fda747bb..9d5c25273 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version <unreleased> + +Core ++ [extractor/common] Extract season in _json_ld +* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection + (#681) + +Extractors +* [vice] Fix extraction for locked videos (#16248) ++ [wakanim] Detect DRM protected videos ++ [wakanim] Add support for wakanim.tv (#14374) +* [usatoday] Fix extraction for videos with custom brightcove partner id + (#18990) +* [drtv] Fix extraction (#18989) +* [nhk] Extend URL regular expression (#18968) +* [go] Fix Adobe Pass requests for Disney Now (#18901) ++ [openload] Add support for oload.club (#18969) + + version 2019.01.24 Core From e71be6ee9f239308765443d49d91358fa306e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Jan 2019 21:28:09 +0700 Subject: [PATCH 0323/1201] release 2019.01.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 4 +++- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 63aefe013..f529e3f4b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.24 +[debug] youtube-dl version 2019.01.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a71b045d0..6c1739860 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -339,7 +339,7 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` -### Use safe conversion functions +### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. @@ -347,6 +347,8 @@ Use `url_or_none` for safe URL processing. Use `try_get` for safe metadata extraction from parsed JSON. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/ChangeLog b/ChangeLog index 9d5c25273..d94fe36ec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.27 Core + [extractor/common] Extract season in _json_ld diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d759d0273..6377bf815 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1069,6 +1069,7 @@ - **VVVVID** - **VyboryMos** - **Vzaar** + - **Wakanim** - **Walla** - **WalyTV** - **washingtonpost** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 18c1f8d4c..ec89cfc64 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.24' +__version__ = '2019.01.27' From 2b3afe6b0fee226bb1e61027d815df088cb40757 Mon Sep 17 00:00:00 2001 From: Andrew Udvare <audvare@gmail.com> Date: Sun, 27 Jan 2019 22:24:37 -0500 Subject: [PATCH 0324/1201] [postprocessor/ffmpeg] Disable "Last message repeated" messages which cause non-zero exit status --- youtube_dl/postprocessor/ffmpeg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index b952b0970..8ef03f43b 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -218,6 +218,7 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self._ffmpeg_filename_argument(path), True) ]) cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + + ['-loglevel', 'repeat+info'] + files_cmd + [encodeArgument(o) for o in opts] + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) From 7f903dd8bfbc9c2de129d5b0be23ef62d8ca3df3 Mon Sep 17 00:00:00 2001 From: Tatsh <Tatsh@users.noreply.github.com> Date: Mon, 28 Jan 2019 10:57:14 -0500 Subject: [PATCH 0325/1201] [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding subtitles (closes #19042) Related issue: https://trac.ffmpeg.org/ticket/6016 --- youtube_dl/postprocessor/ffmpeg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index b952b0970..fff2021ff 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -407,6 +407,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', + # Don't copy Apple TV chapters track, bin_data (see #19042, #19024, + # https://trac.ffmpeg.org/ticket/6016) + '-map', '-0:d', ] if information['ext'] == 'mp4': opts += ['-c:s', 'mov_text'] From 61ff92e11ea876532697451b1ed727f42274b109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Jan 2019 01:59:56 +0700 Subject: [PATCH 0326/1201] [postprocessor/ffmpeg] Wrap loglevel args in encodeArgument --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 33dbcad9f..88b9ae9be 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -218,7 +218,7 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self._ffmpeg_filename_argument(path), True) ]) cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + - ['-loglevel', 'repeat+info'] + + [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + files_cmd + [encodeArgument(o) for o in opts] + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) From a81daba2311cb4d6c5bc7e62b47438a78aa5c10f Mon Sep 17 00:00:00 2001 From: Alexander Seiler <seileralex@gmail.com> Date: Mon, 28 Jan 2019 20:20:46 +0100 Subject: [PATCH 0327/1201] [zattoo] Add support for tv.salt.ch --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/zattoo.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2ffcffa9e..9d776ff45 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1497,6 +1497,7 @@ from .zattoo import ( QuantumTVIE, QuicklineIE, QuicklineLiveIE, + SaltTVIE, SAKTVIE, VTXTVIE, WalyTVIE, diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index 896276301..ee514666b 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -420,3 +420,14 @@ class EinsUndEinsTVIE(ZattooIE): 'url': 'https://www.1und1.tv/watch/abc/123-abc', 'only_matching': True, }] + + +class SaltTVIE(ZattooIE): + _NETRC_MACHINE = 'salttv' + _HOST = 'tv.salt.ch' + _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + + _TESTS = [{ + 'url': 'https://tv.salt.ch/watch/abc/123-abc', + 'only_matching': True, + }] From 41c2c254d3c30afde395e8abbe0ced2c53485a78 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Jan 2019 22:39:08 +0100 Subject: [PATCH 0328/1201] [fox] fix extraction for free videos(#19060) --- youtube_dl/extractor/fox.py | 46 ++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index b1c91f095..2d6c97ec9 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,10 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -# import json -# import uuid +import json +import uuid from .adobepass import AdobePassIE +from ..compat import compat_str from ..utils import ( int_or_none, parse_age_limit, @@ -47,38 +48,31 @@ class FOXIE(AdobePassIE): 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', 'only_matching': True, }] - # _access_token = None + _access_token = None - # def _call_api(self, path, video_id, data=None): - # headers = { - # 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', - # } - # if self._access_token: - # headers['Authorization'] = 'Bearer ' + self._access_token - # return self._download_json( - # 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers) + def _call_api(self, path, video_id, data=None): + headers = { + 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + } + if self._access_token: + headers['Authorization'] = 'Bearer ' + self._access_token + return self._download_json( + 'https://api2.fox.com/v2.0/' + path, + video_id, data=data, headers=headers) - # def _real_initialize(self): - # self._access_token = self._call_api( - # 'login', None, json.dumps({ - # 'deviceId': compat_str(uuid.uuid4()), - # }).encode())['accessToken'] + def _real_initialize(self): + self._access_token = self._call_api( + 'login', None, json.dumps({ + 'deviceId': compat_str(uuid.uuid4()), + }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) - video = self._download_json( - 'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id, - video_id, headers={ - 'apikey': 'abdcbed02c124d393b39e818a4312055', - 'Content-Type': 'application/json', - 'Referer': url, - }) - # video = self._call_api('vodplayer/' + video_id, video_id) + video = self._call_api('vodplayer/' + video_id, video_id) title = video['name'] - release_url = video['videoRelease']['url'] - # release_url = video['url'] + release_url = video['url'] data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} From 6df196f32e68ec22bd854c4d779b9d94e04e63b2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 29 Jan 2019 00:31:49 +0100 Subject: [PATCH 0329/1201] [fox] add support for locked videos using cookies(closes #19060) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/fox.py | 50 +++++++++++----------- youtube_dl/extractor/nationalgeographic.py | 22 ++++++++++ 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d776ff45..b40be42e6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -692,7 +692,10 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .nationalgeographic import NationalGeographicVideoIE +from .nationalgeographic import ( + NationalGeographicVideoIE, + NationalGeographicTVIE, +) from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 2d6c97ec9..568656542 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -5,19 +5,23 @@ import json import uuid from .adobepass import AdobePassIE -from ..compat import compat_str +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urllib_parse_unquote, +) from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_duration, try_get, unified_timestamp, - update_url_query, ) class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)' + _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' _TESTS = [{ # clip 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', @@ -32,6 +36,7 @@ class FOXIE(AdobePassIE): 'upload_date': '20170901', 'creator': 'FOX', 'series': 'Gotham', + 'age_limit': 14, }, 'params': { 'skip_download': True, @@ -44,15 +49,14 @@ class FOXIE(AdobePassIE): # episode, geo-restricted, tv provided required 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, - }, { - 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/', - 'only_matching': True, }] + _HOME_PAGE_URL = 'https://www.fox.com/' + _API_KEY = 'abdcbed02c124d393b39e818a4312055' _access_token = None def _call_api(self, path, video_id, data=None): headers = { - 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd', + 'X-Api-Key': self._API_KEY, } if self._access_token: headers['Authorization'] = 'Bearer ' + self._access_token @@ -61,10 +65,16 @@ class FOXIE(AdobePassIE): video_id, data=data, headers=headers) def _real_initialize(self): - self._access_token = self._call_api( - 'login', None, json.dumps({ - 'deviceId': compat_str(uuid.uuid4()), - }).encode())['accessToken'] + if not self._access_token: + mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth') + if mvpd_auth: + self._access_token = (self._parse_json(compat_urllib_parse_unquote( + mvpd_auth.value), None, fatal=False) or {}).get('accessToken') + if not self._access_token: + self._access_token = self._call_api( + 'login', None, json.dumps({ + 'deviceId': compat_str(uuid.uuid4()), + }).encode())['accessToken'] def _real_extract(self, url): video_id = self._match_id(url) @@ -73,25 +83,15 @@ class FOXIE(AdobePassIE): title = video['name'] release_url = video['url'] - - data = try_get( - video, lambda x: x['trackingData']['properties'], dict) or {} - - rating = video.get('contentRating') - if data.get('authRequired'): - resource = self._get_mvpd_resource( - 'fbc-fox', title, video.get('guid'), rating) - release_url = update_url_query( - release_url, { - 'auth': self._extract_mvpd_auth( - url, video_id, 'fbc-fox', resource) - }) m3u8_url = self._download_json(release_url, video_id)['playURL'] formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) + data = try_get( + video, lambda x: x['trackingData']['properties'], dict) or {} + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( video.get('duration')) or parse_duration(video.get('duration')) timestamp = unified_timestamp(video.get('datePublished')) @@ -117,7 +117,7 @@ class FOXIE(AdobePassIE): 'description': video.get('description'), 'duration': duration, 'timestamp': timestamp, - 'age_limit': parse_age_limit(rating), + 'age_limit': parse_age_limit(video.get('contentRating')), 'creator': creator, 'series': series, 'season_number': int_or_none(video.get('seasonNumber')), diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 165964ca0..ee12e2b47 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .fox import FOXIE from ..utils import ( smuggle_url, url_basename, @@ -58,3 +59,24 @@ class NationalGeographicVideoIE(InfoExtractor): {'force_smil_url': True}), 'id': guid, } + + +class NationalGeographicTVIE(FOXIE): + _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)' + _TESTS = [{ + 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/', + 'info_dict': { + 'id': '6a875e6e734b479beda26438c9f21138', + 'ext': 'mp4', + 'title': 'Why Nat Geo? Valley of the Boom', + 'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.', + 'timestamp': 1542662458, + 'upload_date': '20181119', + 'age_limit': 14, + }, + 'params': { + 'skip_download': True, + }, + }] + _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/' + _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd' From a2d821d7112fb1423f99ddf309a843c80cc3be2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Jan 2019 23:33:09 +0700 Subject: [PATCH 0330/1201] [drtv] Improve extraction (closes #19039) + Add support for EncryptedUri videos + Extract more metadata * Fix subtitles extraction --- youtube_dl/extractor/drtv.py | 133 +++++++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 8d63ca433..c5f211128 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,15 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals +import binascii +import hashlib +import re + + from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import compat_urllib_parse_unquote from ..utils import ( + bytes_to_intlist, ExtractorError, int_or_none, + intlist_to_bytes, float_or_none, mimetype2ext, - parse_iso8601, - remove_end, + str_or_none, + unified_timestamp, update_url_query, + url_or_none, ) @@ -20,23 +30,31 @@ class DRTVIE(InfoExtractor): IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', - 'md5': '7ae17b4e18eb5d29212f424a7511c184', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', 'title': 'Klassen - Dårlig taber (10)', 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', - 'timestamp': 1471991907, - 'upload_date': '20160823', + 'timestamp': 1539085800, + 'upload_date': '20181009', 'duration': 606.84, + 'series': 'Klassen', + 'season': 'Klassen I', + 'season_number': 1, + 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b', + 'episode': 'Episode 10', + 'episode_number': 10, + 'release_year': 2016, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { # embed 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', 'info_dict': { - 'id': 'christiania-pusher-street-ryddes-drdkrjpo', + 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6', 'ext': 'mp4', - 'title': 'LIVE Christianias rydning af Pusher Street er i gang', + 'title': 'christiania pusher street ryddes drdkrjpo', 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5', 'timestamp': 1472800279, 'upload_date': '20160902', @@ -45,17 +63,18 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { # with SignLanguage formats 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 'info_dict': { 'id': 'historien-om-danmark-stenalder', 'ext': 'mp4', - 'title': 'Historien om Danmark: Stenalder (1)', + 'title': 'Historien om Danmark: Stenalder', 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', - 'timestamp': 1490401996, - 'upload_date': '20170325', - 'duration': 3502.04, + 'timestamp': 1546628400, + 'upload_date': '20190104', + 'duration': 3502.56, 'formats': 'mincount:20', }, 'params': { @@ -74,19 +93,26 @@ class DRTVIE(InfoExtractor): video_id = self._search_regex( (r'data-(?:material-identifier|episode-slug)="([^"]+)"', - r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), - webpage, 'video id') + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), + webpage, 'video id', default=None) + + if not video_id: + video_id = compat_urllib_parse_unquote(self._search_regex( + r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', + webpage, 'urn')) data = self._download_json( 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id, video_id, 'Downloading video JSON', query={'expanded': 'true'}) - title = remove_end(self._og_search_title( - webpage, default=None), ' | TV | DR') or data['Title'] + title = str_or_none(data.get('Title')) or re.sub( + r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', + self._og_search_title(webpage)) description = self._og_search_description( webpage, default=None) or data.get('Description') - timestamp = parse_iso8601(data.get('CreatedTime')) + timestamp = unified_timestamp( + data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime')) thumbnail = None duration = None @@ -96,16 +122,51 @@ class DRTVIE(InfoExtractor): formats = [] subtitles = {} - for asset in [data['PrimaryAsset']]: + assets = [] + primary_asset = data.get('PrimaryAsset') + if isinstance(primary_asset, dict): + assets.append(primary_asset) + secondary_assets = data.get('SecondaryAssets') + if isinstance(secondary_assets, list): + for secondary_asset in secondary_assets: + if isinstance(secondary_asset, dict): + assets.append(secondary_asset) + + def hex_to_bytes(hex): + return binascii.a2b_hex(hex.encode('ascii')) + + def decrypt_uri(e): + n = int(e[2:10], 16) + a = e[10 + n:] + data = bytes_to_intlist(hex_to_bytes(e[10:10 + n])) + key = bytes_to_intlist(hashlib.sha256( + ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()) + iv = bytes_to_intlist(hex_to_bytes(a)) + decrypted = aes_cbc_decrypt(data, key, iv) + return intlist_to_bytes( + decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0] + + for asset in assets: kind = asset.get('Kind') if kind == 'Image': - thumbnail = asset.get('Uri') + thumbnail = url_or_none(asset.get('Uri')) elif kind in ('VideoResource', 'AudioResource'): duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) restricted_to_denmark = asset.get('RestrictedToDenmark') asset_target = asset.get('Target') for link in asset.get('Links', []): uri = link.get('Uri') + if not uri: + encrypted_uri = link.get('EncryptedUri') + if not encrypted_uri: + continue + try: + uri = decrypt_uri(encrypted_uri) + except Exception: + self.report_warning( + 'Unable to decrypt EncryptedUri', video_id) + continue + uri = url_or_none(uri) if not uri: continue target = link.get('Target') @@ -139,19 +200,22 @@ class DRTVIE(InfoExtractor): 'vcodec': 'none' if kind == 'AudioResource' else None, 'preference': preference, }) - subtitles_list = asset.get('SubtitlesList') - if isinstance(subtitles_list, list): - LANGS = { - 'Danish': 'da', - } - for subs in subtitles_list: - if not subs.get('Uri'): - continue - lang = subs.get('Language') or 'da' - subtitles.setdefault(LANGS.get(lang, lang), []).append({ - 'url': subs['Uri'], - 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' - }) + subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist') + if isinstance(subtitles_list, list): + LANGS = { + 'Danish': 'da', + } + for subs in subtitles_list: + if not isinstance(subs, dict): + continue + sub_uri = url_or_none(subs.get('Uri')) + if not sub_uri: + continue + lang = subs.get('Language') or 'da' + subtitles.setdefault(LANGS.get(lang, lang), []).append({ + 'url': sub_uri, + 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' + }) if not formats and restricted_to_denmark: self.raise_geo_restricted( @@ -169,6 +233,13 @@ class DRTVIE(InfoExtractor): 'duration': duration, 'formats': formats, 'subtitles': subtitles, + 'series': str_or_none(data.get('SeriesTitle')), + 'season': str_or_none(data.get('SeasonTitle')), + 'season_number': int_or_none(data.get('SeasonNumber')), + 'season_id': str_or_none(data.get('SeasonUrn')), + 'episode': str_or_none(data.get('EpisodeTitle')), + 'episode_number': int_or_none(data.get('EpisodeNumber')), + 'release_year': int_or_none(data.get('ProductionYear')), } From 41cff90c41006b30213c7f676bd3920a1612b717 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Mon, 28 Jan 2019 19:42:49 -0400 Subject: [PATCH 0331/1201] [yourporn] Fix extraction and extract duration (closes #18815, closes #18852) change cdn to cdn4 for the video_url --- youtube_dl/extractor/yourporn.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index c8dc29bd8..01e5f0c0e 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import urljoin +from ..utils import ( + parse_duration, + urljoin +) class YourPornIE(InfoExtractor): @@ -27,17 +30,21 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn3/') + video_id)[video_id]).replace('/cdn/', '/cdn4/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', default=None) or self._og_search_description(webpage)).strip() + thumbnail = self._og_search_thumbnail(webpage) + duration = parse_duration(self._search_regex(r'duration:[^0-9]*([0-9:]+)', + webpage, 'duration', default=None)) return { 'id': video_id, 'url': video_url, 'title': title, + 'duration': duration, 'thumbnail': thumbnail, 'age_limit': 18 } From 9868f1ab1853484d7a6c38cd6fa0d94a11914cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Jan 2019 23:56:42 +0700 Subject: [PATCH 0332/1201] [yourporn] Improve (closes #19061) --- youtube_dl/extractor/yourporn.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 01e5f0c0e..2c63f9752 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( parse_duration, - urljoin + urljoin, ) @@ -17,7 +17,11 @@ class YourPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'md5:c9f43630bd968267672651ba905a7d35', 'thumbnail': r're:^https?://.*\.jpg$', - 'age_limit': 18 + 'duration': 165, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, }, } @@ -35,16 +39,16 @@ class YourPornIE(InfoExtractor): title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', default=None) or self._og_search_description(webpage)).strip() - thumbnail = self._og_search_thumbnail(webpage) + duration = parse_duration(self._search_regex( + r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', + default=None)) - duration = parse_duration(self._search_regex(r'duration:[^0-9]*([0-9:]+)', - webpage, 'duration', default=None)) return { 'id': video_id, 'url': video_url, 'title': title, - 'duration': duration, 'thumbnail': thumbnail, - 'age_limit': 18 + 'duration': duration, + 'age_limit': 18, } From 5496754ae4c9097f37cfd9b307261cbbca438260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 00:03:19 +0700 Subject: [PATCH 0333/1201] [fox] Remove unused imports --- youtube_dl/extractor/fox.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 568656542..0ffceeb7c 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -6,12 +6,10 @@ import uuid from .adobepass import AdobePassIE from ..compat import ( - compat_HTTPError, compat_str, compat_urllib_parse_unquote, ) from ..utils import ( - ExtractorError, int_or_none, parse_age_limit, parse_duration, From ca01e5f9039dd6c0d5abff5c7139f82c5d1dfba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 00:05:32 +0700 Subject: [PATCH 0334/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index d94fe36ec..8f5343b23 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core +* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding + subtitles (#19024, #19042) +* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025) + +Extractors +* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061) +* [drtv] Improve extraction (#19039) + + Add support for EncryptedUri videos + + Extract more metadata + * Fix subtitles extraction ++ [fox] Add support for locked videos using cookies (#19060) +* [fox] Fix extraction for free videos (#19060) ++ [zattoo] Add support for tv.salt.ch (#19059) + + version 2019.01.27 Core From 1063b4c7073ce056f694b1690dd5d5a1a06fb347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 00:08:39 +0700 Subject: [PATCH 0335/1201] release 2019.01.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f529e3f4b..3944a4a38 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.27 +[debug] youtube-dl version 2019.01.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8f5343b23..745fffeaa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.30 Core * [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6377bf815..2918520c3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -546,6 +546,7 @@ - **MyVisionTV** - **n-tv.de** - **natgeo:video** + - **NationalGeographicTV** - **Naver** - **NBA** - **NBC** @@ -776,6 +777,7 @@ - **safari:api** - **safari:course**: safaribooksonline.com online courses - **SAKTV** + - **SaltTV** - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ec89cfc64..97818d0c7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.27' +__version__ = '2019.01.30' From ce52c7c111602f41d7f9c498f2915fd255ba2eab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 06:15:23 +0700 Subject: [PATCH 0336/1201] [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (closes #19067) --- youtube_dl/postprocessor/ffmpeg.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 88b9ae9be..5bcb00ac0 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -217,11 +217,13 @@ class FFmpegPostProcessor(PostProcessor): encodeArgument('-i'), encodeFilename(self._ffmpeg_filename_argument(path), True) ]) - cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + - [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + - files_cmd + - [encodeArgument(o) for o in opts] + - [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) + cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] + # avconv does not have repeat option + if self.basename == 'ffmpeg': + cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] + cmd += (files_cmd + + [encodeArgument(o) for o in opts] + + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) if self._downloader.params.get('verbose', False): self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) From c2a0fe2ea7422c437a27c8fac57c7e865517354b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 06:17:25 +0700 Subject: [PATCH 0337/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 745fffeaa..e6de6ca03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067) + + version 2019.01.30 Core From 7b0f9df23d9842ddb2a545a0ceaf594daa0e12ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Jan 2019 06:19:36 +0700 Subject: [PATCH 0338/1201] release 2019.01.30.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3944a4a38..423a08e4d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.30 +[debug] youtube-dl version 2019.01.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e6de6ca03..4872cd9fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.01.30.1 Core * [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 97818d0c7..be3bbdd73 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.30' +__version__ = '2019.01.30.1' From 645c4885cf38ecb244412dffda2760f4c0e72033 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 30 Jan 2019 14:43:44 +0100 Subject: [PATCH 0339/1201] [crackle] authorize media detail request(closes #16931) --- youtube_dl/extractor/crackle.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index f73ef6b63..49bf3a4f9 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,7 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals, division +import hashlib +import hmac import re +import time from .common import InfoExtractor from ..compat import compat_HTTPError @@ -74,13 +77,16 @@ class CrackleIE(InfoExtractor): for country in countries: try: + # Authorization generation algorithm is reverse engineered from: + # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js + media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country) + timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) + h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() media = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' - % (video_id, country), video_id, - 'Downloading media JSON as %s' % country, - 'Unable to download media JSON', query={ - 'disableProtocols': 'true', - 'format': 'json' + media_detail_url, video_id, 'Downloading media JSON as %s' % country, + 'Unable to download media JSON', headers={ + 'Accept': 'application/json', + 'Authorization': '|'.join([h, timestamp, '117', '1']), }) except ExtractorError as e: # 401 means geo restriction, trying next country From 15e832ff2a1bee42b299d3498439cf789c16fffa Mon Sep 17 00:00:00 2001 From: Batuhan's Unmaintained Account <batuhanosmantaskaya@gmail.com> Date: Wed, 30 Jan 2019 19:39:02 +0300 Subject: [PATCH 0340/1201] [openload] Add support for oload.info --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b713e78b8..747aa298a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info) ) )/ (?:f|embed)/ @@ -337,6 +337,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', 'only_matching': True, + }, { + 'url': 'https://oload.info/f/5NEAbI2BDSk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 9613e14a92429046b162145dfd40dee5795ca409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Jan 2019 00:15:45 +0700 Subject: [PATCH 0341/1201] [openload] Add support for openload.pw and oload.pw (closes #18930) --- youtube_dl/extractor/openload.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 747aa298a..a2ae25272 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -248,8 +248,8 @@ class OpenloadIE(InfoExtractor): (?P<host> (?:www\.)? (?: - openload\.(?:co|io|link)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info) + openload\.(?:co|io|link|pw)| + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw) ) )/ (?:f|embed)/ @@ -340,6 +340,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.info/f/5NEAbI2BDSk', 'only_matching': True, + }, { + 'url': 'https://openload.pw/f/WyKgK8s94N0', + 'only_matching': True, + }, { + 'url': 'https://oload.pw/f/WyKgK8s94N0', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 49fe4175ae165527a0b06b8d97cdc85d83041fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Feb 2019 01:48:10 +0700 Subject: [PATCH 0342/1201] [drtv] Improve preference (closes #19079) --- youtube_dl/extractor/drtv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c5f211128..0c7e350f0 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -171,10 +171,13 @@ class DRTVIE(InfoExtractor): continue target = link.get('Target') format_id = target or '' - preference = None - if asset_target in ('SpokenSubtitles', 'SignLanguage'): + if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'): preference = -1 format_id += '-%s' % asset_target + elif asset_target == 'Default': + preference = 1 + else: + preference = None if target == 'HDS': f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', From 3ef2da2d21061bd44df0b0a0d27e82a365209662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 04:00:29 +0700 Subject: [PATCH 0343/1201] [soundcloud] Fix paged playlists extraction, add support for albums and update client id --- youtube_dl/extractor/soundcloud.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 81c81c8d5..030840fd8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -34,7 +34,7 @@ class SoundcloudIE(InfoExtractor): (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?!stations/track) (?P<uploader>[\w\d-]+)/ - (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -157,7 +157,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb' + _CLIENT_ID = 'NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK' @staticmethod def _extract_urls(webpage): @@ -368,7 +368,6 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): - _API_BASE = 'https://api.soundcloud.com' _API_V2_BASE = 'https://api-v2.soundcloud.com' def _extract_playlist(self, base_url, playlist_id, playlist_title): @@ -389,8 +388,12 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): next_href, playlist_id, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - break + + if not isinstance(collection, list): + collection = [] + + # Empty collection may be returned, in this case we proceed + # straight to next_href def resolve_permalink_url(candidates): for cand in candidates: @@ -429,7 +432,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): (?:(?:www|m)\.)?soundcloud\.com/ (?P<user>[^/]+) (?:/ - (?P<rsrc>tracks|sets|reposts|likes|spotlight) + (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight) )? /?(?:[?#].*)?$ ''' @@ -476,13 +479,17 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): 'title': 'Grynpyret (Spotlight)', }, 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/soft-cell-official/albums', + 'only_matching': True, }] _BASE_URL_MAP = { - 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE, + 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, } @@ -490,6 +497,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _TITLE_MAP = { 'all': 'All', 'tracks': 'Tracks', + 'albums': 'Albums', 'sets': 'Playlists', 'reposts': 'Reposts', 'likes': 'Likes', From b6423e6ca215e1583e013cf7b2c1faf8d3dcace7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 04:11:32 +0700 Subject: [PATCH 0344/1201] [soundcloud:user] Update tests --- youtube_dl/extractor/soundcloud.py | 48 ++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 030840fd8..13463ae4f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -438,40 +438,47 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): ''' IE_NAME = 'soundcloud:user' _TESTS = [{ - 'url': 'https://soundcloud.com/the-akashic-chronicler', + 'url': 'https://soundcloud.com/soft-cell-official', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (All)', + 'id': '207965082', + 'title': 'Soft Cell (All)', }, - 'playlist_mincount': 74, + 'playlist_mincount': 28, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', + 'url': 'https://soundcloud.com/soft-cell-official/tracks', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Tracks)', + 'id': '207965082', + 'title': 'Soft Cell (Tracks)', }, - 'playlist_mincount': 37, + 'playlist_mincount': 27, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/sets', + 'url': 'https://soundcloud.com/soft-cell-official/albums', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Playlists)', + 'id': '207965082', + 'title': 'Soft Cell (Albums)', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/jcv246/sets', + 'info_dict': { + 'id': '12982173', + 'title': 'Jordi / cv (Playlists)', }, 'playlist_mincount': 2, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', + 'url': 'https://soundcloud.com/jcv246/reposts', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Reposts)', + 'id': '12982173', + 'title': 'Jordi / cv (Reposts)', }, - 'playlist_mincount': 7, + 'playlist_mincount': 6, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/likes', + 'url': 'https://soundcloud.com/clalberg/likes', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Likes)', + 'id': '11817582', + 'title': 'clalberg (Likes)', }, - 'playlist_mincount': 321, + 'playlist_mincount': 5, }, { 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { @@ -479,9 +486,6 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): 'title': 'Grynpyret (Spotlight)', }, 'playlist_mincount': 1, - }, { - 'url': 'https://soundcloud.com/soft-cell-official/albums', - 'only_matching': True, }] _BASE_URL_MAP = { From e9fef7ee4e666b60bc7a757391f16e2be76f6cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 05:44:31 +0700 Subject: [PATCH 0345/1201] [YoutubeDL] Fallback to ie_key of matching extractor while making download archive id when no explicit ie_key is provided (#19022) --- youtube_dl/YoutubeDL.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 80ed8d7e5..c168415ce 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2060,15 +2060,21 @@ class YoutubeDL(object): self.report_warning('Unable to remove downloaded original file') def _make_archive_id(self, info_dict): + video_id = info_dict.get('id') + if not video_id: + return # Future-proof against any change in case # and backwards compatibility with prior versions - extractor = info_dict.get('extractor_key') + extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: - if 'id' in info_dict: - extractor = info_dict.get('ie_key') # key in a playlist - if extractor is None: - return None # Incomplete video information - return extractor.lower() + ' ' + info_dict['id'] + # Try to find matching extractor for the URL and take its ie_key + for ie in self._ies: + if ie.suitable(info_dict['url']): + extractor = ie.ie_key() + break + else: + return + return extractor.lower() + ' ' + video_id def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -2076,7 +2082,7 @@ class YoutubeDL(object): return False vid_id = self._make_archive_id(info_dict) - if vid_id is None: + if not vid_id: return False # Incomplete video information try: From b9bc1cff721b6f63e733c6ababeec45b92f1484b Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Fri, 1 Feb 2019 19:04:00 -0400 Subject: [PATCH 0346/1201] [drtuber] Extract duration --- youtube_dl/extractor/drtuber.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 5c41c8022..2baea585b 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -4,7 +4,9 @@ import re from .common import InfoExtractor from ..utils import ( + int_or_none, NO_DEFAULT, + parse_duration, str_to_int, ) @@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor): }) self._sort_formats(formats) + duration = int_or_none(video_data.get('duration')) or parse_duration( + video_data.get('duration_format')) + title = self._html_search_regex( (r'<h1[^>]+class=["\']title[^>]+>([^<]+)', r'<title>([^<]+)\s*@\s+DrTuber', @@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor): 'comment_count': comment_count, 'categories': categories, 'age_limit': self._rta_search(webpage), + 'duration': duration, } From 6cc6e0c34d0f67747be7bac91690820f47b26acb Mon Sep 17 00:00:00 2001 From: Cory Hall <corydantehall@gmail.com> Date: Thu, 31 Jan 2019 20:51:37 -0500 Subject: [PATCH 0347/1201] [soundcloud:pagedplaylist] Add ie and title to entries (#19022) rel: https://github.com/rg3/youtube-dl/issues/19022 --- youtube_dl/extractor/soundcloud.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 13463ae4f..1c8d3c53b 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -395,18 +395,20 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): # Empty collection may be returned, in this case we proceed # straight to next_href - def resolve_permalink_url(candidates): - for cand in candidates: + def append_url_result(entries, item): + for cand in (item, item.get('track'), item.get('playlist')): if isinstance(cand, dict): permalink_url = cand.get('permalink_url') - entry_id = self._extract_id(cand) if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id + return entries.append( + self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title'))) for e in collection: - permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) - if permalink_url: - entries.append(self.url_result(permalink_url, video_id=entry_id)) + append_url_result(entries, e) next_href = response.get('next_href') if not next_href: From 7c5307f4c4e91ef6551d70cd844b93fbdc5c3cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Feb 2019 23:40:06 +0700 Subject: [PATCH 0348/1201] [soundcloud:pagedplaylist] Improve (closes #19086) --- youtube_dl/extractor/soundcloud.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1c8d3c53b..5536e7851 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, unified_strdate, update_url_query, + url_or_none, ) @@ -395,20 +396,23 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): # Empty collection may be returned, in this case we proceed # straight to next_href - def append_url_result(entries, item): - for cand in (item, item.get('track'), item.get('playlist')): - if isinstance(cand, dict): - permalink_url = cand.get('permalink_url') - if permalink_url and permalink_url.startswith('http'): - return entries.append( - self.url_result( - permalink_url, - ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=self._extract_id(cand), - video_title=cand.get('title'))) + def resolve_entry(candidates): + for cand in candidates: + if not isinstance(cand, dict): + continue + permalink_url = url_or_none(cand.get('permalink_url')) + if not permalink_url: + continue + return self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title')) for e in collection: - append_url_result(entries, e) + entry = resolve_entry((e, e.get('track'), e.get('playlist'))) + if entry: + entries.append(entry) next_href = response.get('next_href') if not next_href: From 0efcb5a2fe0c3024d3e5affe74b3d0d416413ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Feb 2019 00:33:45 +0700 Subject: [PATCH 0349/1201] [vporn] Remove extractor (closes #16276) Handled by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/vporn.py | 123 ----------------------------- 2 files changed, 124 deletions(-) delete mode 100644 youtube_dl/extractor/vporn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b40be42e6..693c16e49 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1362,7 +1362,6 @@ from .voxmedia import ( VoxMediaVolumeIE, VoxMediaIE, ) -from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE from .vrv import ( diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py deleted file mode 100644 index 858ac9e71..000000000 --- a/youtube_dl/extractor/vporn.py +++ /dev/null @@ -1,123 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - str_to_int, - urljoin, -) - - -class VpornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)' - _TESTS = [ - { - 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/', - 'md5': 'facf37c1b86546fa0208058546842c55', - 'info_dict': { - 'id': '497944', - 'display_id': 'violet-on-her-th-birthday', - 'ext': 'mp4', - 'title': 'Violet on her 19th birthday', - 'description': 'Violet dances in front of the camera which is sure to get you horny.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'kileyGrope', - 'categories': ['Masturbation', 'Teen'], - 'duration': 393, - 'age_limit': 18, - 'view_count': int, - }, - 'skip': 'video removed', - }, - { - 'url': 'http://www.vporn.com/female/hana-shower/523564/', - 'md5': 'ced35a4656198a1664cf2cda1575a25f', - 'info_dict': { - 'id': '523564', - 'display_id': 'hana-shower', - 'ext': 'mp4', - 'title': 'Hana Shower', - 'description': 'Hana showers at the bathroom.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Hmmmmm', - 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], - 'duration': 588, - 'age_limit': 18, - 'view_count': int, - } - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!' - if errmsg in webpage: - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) - - title = self._html_search_regex( - r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() - description = self._html_search_regex( - r'class="(?:descr|description_txt)">(.*?)</div>', - webpage, 'description', fatal=False) - thumbnail = urljoin('http://www.vporn.com', self._html_search_regex( - r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', - default=None)) - - uploader = self._html_search_regex( - r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>', - webpage, 'uploader', fatal=False) - - categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage) - - duration = parse_duration(self._search_regex( - r'Runtime:\s*</span>\s*(\d+ min \d+ sec)', - webpage, 'duration', fatal=False)) - - view_count = str_to_int(self._search_regex( - r'class="views">([\d,\.]+) [Vv]iews<', - webpage, 'view count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r"'Comments \(([\d,\.]+)\)'", - webpage, 'comment count', default=None)) - - formats = [] - - for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage): - video_url = video[1] - fmt = { - 'url': video_url, - 'format_id': video[0], - } - m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url) - if m: - fmt.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - 'vbr': int(m.group('vbr')), - }) - formats.append(fmt) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'categories': categories, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'age_limit': 18, - 'formats': formats, - } From eecf788b90fa4d49567c714f5a613fdd2b6e2507 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Feb 2019 09:10:09 +0100 Subject: [PATCH 0350/1201] [teachable] add support for courses.workitdaily.com (closes #18871) --- youtube_dl/extractor/teachable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 47ac95ee8..c1a9deafe 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -27,6 +27,7 @@ class TeachableBaseIE(InfoExtractor): 'market.saleshacker.com': 'saleshacker', 'learnability.org': 'learnability', 'edurila.com': 'edurila', + 'courses.workitdaily.com': 'workitdaily', } _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) From 07fbfef1c7e36b25dd7098be73ab76b87378a015 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Feb 2019 12:10:41 +0100 Subject: [PATCH 0351/1201] [radiocanada] switch to the new media requests(closes #19115) --- youtube_dl/extractor/radiocanada.py | 133 ++++++++-------------------- 1 file changed, 39 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 302f67d96..58e294892 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -5,14 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, - find_xpath_attr, determine_ext, + ExtractorError, int_or_none, unified_strdate, - xpath_element, - ExtractorError, - determine_protocol, unsmuggle_url, ) @@ -61,107 +57,53 @@ class RadioCanadaIE(InfoExtractor): 'only_matching': True, } ] + _GEO_COUNTRIES = ['CA'] + + def _call_api(self, path, video_id, app_code, query): + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + 'output': 'json', + }) + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, headers={ + 'Authorization': 'Client-Key 773aea60-0e80-41bb-9c7f-e6d7c3ad17fb' + }, query=query) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) app_code, video_id = re.match(self._VALID_URL, url).groups() - metadata = self._download_xml( - 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', - video_id, note='Downloading metadata XML', query={ - 'appCode': app_code, - 'idMedia': video_id, - }) + metas = self._call_api('meta/v1/index.ashx', video_id, app_code, {})['Metas'] def get_meta(name): - el = find_xpath_attr(metadata, './/Meta', 'name', name) - return el.text if el is not None else None + for meta in metas: + if meta.get('name') == name: + text = meta.get('text') + if text: + return text # protectionType does not necessarily mean the video is DRM protected (see # https://github.com/rg3/youtube-dl/pull/18609). if get_meta('protectionType'): self.report_warning('This video is probably DRM protected.') - device_types = ['ipad'] - if not smuggled_data: - device_types.append('flash') - device_types.append('android') - - formats = [] - error = None - # TODO: extract f4m formats - # f4m formats can be extracted using flashhd device_type but they produce unplayable file - for device_type in device_types: - validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' - query = { - 'appCode': app_code, - 'idMedia': video_id, - 'connectionType': 'broadband', - 'multibitrate': 'true', - 'deviceType': device_type, - } - if smuggled_data: - validation_url = 'https://services.radio-canada.ca/media/validation/v2/' - query.update(smuggled_data) - else: - query.update({ - # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction - 'paysJ391wsHjbOJwvCs26toz': 'CA', - 'bypasslock': 'NZt5K62gRqfc', - }) - v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) - v_url = xpath_text(v_data, 'url') - if not v_url: - continue - if v_url == 'null': - error = xpath_text(v_data, 'message') - continue - ext = determine_ext(v_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - v_url, video_id, f4m_id='hds', fatal=False)) - else: - ext = determine_ext(v_url) - bitrates = xpath_element(v_data, 'bitrates') - for url_e in bitrates.findall('url'): - tbr = int_or_none(url_e.get('bitrate')) - if not tbr: - continue - f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) - protocol = determine_protocol({'url': f_url}) - f = { - 'format_id': '%s-%d' % (protocol, tbr), - 'url': f_url, - 'ext': 'flv' if protocol == 'rtmp' else ext, - 'protocol': protocol, - 'width': int_or_none(url_e.get('width')), - 'height': int_or_none(url_e.get('height')), - 'tbr': tbr, - } - mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url) - if mobj: - f.update({ - 'url': mobj.group('url') + mobj.group('auth'), - 'play_path': mobj.group('playpath'), - }) - formats.append(f) - if protocol == 'rtsp': - base_url = self._search_regex( - r'rtsp://([^?]+)', f_url, 'base url', default=None) - if base_url: - base_url = 'http://' + base_url - formats.extend(self._extract_m3u8_formats( - base_url + '/playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - base_url + '/manifest.f4m', video_id, - f4m_id='hds', fatal=False)) - if not formats and error: + query = { + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'multibitrate': 'true', + } + if smuggled_data: + query.update(smuggled_data) + v_data = self._call_api('validation/v2/', video_id, app_code, query) + v_url = v_data.get('url') + if not v_url: + error = v_data['message'] + if error == "Le contenu sélectionné n'est pas disponible dans votre pays": + raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) + formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') self._sort_formats(formats) subtitles = {} @@ -189,8 +131,8 @@ class RadioCanadaIE(InfoExtractor): class RadioCanadaAudioVideoIE(InfoExtractor): 'radiocanada:audiovideo' - _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 'info_dict': { 'id': '7527184', @@ -203,7 +145,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', + 'only_matching': True, + }] def _real_extract(self, url): return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) From 70c3ee13671798d7e3b80ea9be863ef73bd08653 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Mon, 4 Feb 2019 13:06:04 -0400 Subject: [PATCH 0352/1201] [pornhd] Extract like count --- youtube_dl/extractor/pornhd.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index b52879c7a..a079cd32a 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -23,6 +23,7 @@ class PornHdIE(InfoExtractor): 'description': 'md5:3748420395e03e31ac96857a8f125b2b', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, + 'like_count': int, 'age_limit': 18, } }, { @@ -37,6 +38,7 @@ class PornHdIE(InfoExtractor): 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, + 'like_count': int, 'age_limit': 18, }, 'skip': 'Not available anymore', @@ -85,6 +87,11 @@ class PornHdIE(InfoExtractor): r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, 'thumbnail', fatal=False, group='url') + like_count = int_or_none(self._search_regex( + (r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes', + r'class=["\']save-count["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) + return { 'id': video_id, 'display_id': display_id, @@ -92,6 +99,7 @@ class PornHdIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'view_count': view_count, + 'like_count': like_count, 'formats': formats, 'age_limit': 18, } From 48fb963b2f9495922a4acf751608167cbc273693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Feb 2019 00:07:37 +0700 Subject: [PATCH 0353/1201] [pornhd] Fix formats extraction --- youtube_dl/extractor/pornhd.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index a079cd32a..27d65d4b9 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -4,9 +4,11 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, js_to_json, + urljoin, ) @@ -14,7 +16,7 @@ class PornHdIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' _TESTS = [{ 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', - 'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5', + 'md5': '87f1540746c1d32ec7a2305c12b96b25', 'info_dict': { 'id': '9864', 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', @@ -67,12 +69,14 @@ class PornHdIE(InfoExtractor): formats = [] for format_id, video_url in sources.items(): + video_url = urljoin(url, video_url) if not video_url: continue height = int_or_none(self._search_regex( r'^(\d+)[pP]', format_id, 'height', default=None)) formats.append({ 'url': video_url, + 'ext': determine_ext(video_url, 'mp4'), 'format_id': format_id, 'height': height, }) From d2d970d07ec82f648b62bff8b15ac0b57d0d0496 Mon Sep 17 00:00:00 2001 From: JChris246 <chris.401@live.com> Date: Mon, 4 Feb 2019 13:33:54 -0400 Subject: [PATCH 0354/1201] [pornhub] Fix tags and categories extraction (closes #13720) --- youtube_dl/extractor/pornhub.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index be93d5d48..428324ef0 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -16,7 +16,6 @@ from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, - js_to_json, orderedSet, remove_quotes, str_to_int, @@ -303,14 +302,17 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - page_params = self._parse_json(self._search_regex( - r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', - webpage, 'page parameters', group='data', default='{}'), - video_id, transform_source=js_to_json, fatal=False) - tags = categories = None - if page_params: - tags = page_params.get('tags', '').split(',') - categories = page_params.get('categories', '').split(',') + def _get_items(class_name): + div = self._search_regex( + r'<div class="' + class_name + '">([\S\s]+?)</div>', + webpage, class_name, default=None) + if div: + return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)] + else: + return None + + categories = _get_items('categoriesWrapper') + tags = _get_items('tagsWrapper') return { 'id': video_id, From 5dda1edef93d94c9a49672f905df0c49c75c5739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Feb 2019 23:06:55 +0700 Subject: [PATCH 0355/1201] [pornhub] Improve and simplify (closes #19135) --- youtube_dl/extractor/pornhub.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 428324ef0..641083da7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -302,17 +302,12 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - def _get_items(class_name): + def extract_list(meta_key): div = self._search_regex( - r'<div class="' + class_name + '">([\S\s]+?)</div>', - webpage, class_name, default=None) + r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' + % meta_key, webpage, meta_key, default=None) if div: - return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)] - else: - return None - - categories = _get_items('categoriesWrapper') - tags = _get_items('tagsWrapper') + return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) return { 'id': video_id, @@ -327,8 +322,8 @@ class PornHubIE(PornHubBaseIE): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, - 'tags': tags, - 'categories': categories, + 'tags': extract_list('tags'), + 'categories': extract_list('categories'), 'subtitles': subtitles, } From 8fecc7353df35f6cac305c04a4e203fb2bbb4827 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Feb 2019 13:59:12 +0100 Subject: [PATCH 0356/1201] [toutv] fix authentication(closes #16398)(closes #18700) --- youtube_dl/extractor/radiocanada.py | 47 +++++++++++++++++--------- youtube_dl/extractor/toutv.py | 51 +++++++++-------------------- 2 files changed, 47 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 58e294892..dd95f99f2 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -4,12 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, int_or_none, unified_strdate, - unsmuggle_url, ) @@ -58,23 +58,35 @@ class RadioCanadaIE(InfoExtractor): } ] _GEO_COUNTRIES = ['CA'] + _access_token = None + _claims = None - def _call_api(self, path, video_id, app_code, query): + def _call_api(self, path, video_id=None, app_code=None, query=None): + if not query: + query = {} query.update({ - 'appCode': app_code, - 'idMedia': video_id, + 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', 'output': 'json', }) - return self._download_json( - 'https://services.radio-canada.ca/media/' + path, video_id, headers={ - 'Authorization': 'Client-Key 773aea60-0e80-41bb-9c7f-e6d7c3ad17fb' - }, query=query) + if video_id: + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + }) + if self._access_token: + query['access_token'] = self._access_token + try: + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): + data = self._parse_json(e.cause.read().decode(), None) + error = data.get('error_description') or data['errorMessage']['text'] + raise ExtractorError(error, expected=True) + raise - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - app_code, video_id = re.match(self._VALID_URL, url).groups() - - metas = self._call_api('meta/v1/index.ashx', video_id, app_code, {})['Metas'] + def _extract_info(self, app_code, video_id): + metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] def get_meta(name): for meta in metas: @@ -93,14 +105,16 @@ class RadioCanadaIE(InfoExtractor): 'deviceType': 'ipad', 'multibitrate': 'true', } - if smuggled_data: - query.update(smuggled_data) + if self._claims: + query['claims'] = self._claims v_data = self._call_api('validation/v2/', video_id, app_code, query) v_url = v_data.get('url') if not v_url: error = v_data['message'] if error == "Le contenu sélectionné n'est pas disponible dans votre pays": raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) + if error == 'Le contenu sélectionné est disponible seulement en premium': + self.raise_login_required(error) raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') @@ -128,6 +142,9 @@ class RadioCanadaIE(InfoExtractor): 'formats': formats, } + def _real_extract(self, url): + return self._extract_info(*re.match(self._VALID_URL, url).groups()) + class RadioCanadaAudioVideoIE(InfoExtractor): 'radiocanada:audiovideo' diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 2e7876cc5..f1ab91cf2 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -3,22 +3,19 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .radiocanada import RadioCanadaIE from ..utils import ( - int_or_none, - js_to_json, - urlencode_postdata, extract_attributes, - smuggle_url, + int_or_none, + merge_dicts, + urlencode_postdata, ) -class TouTvIE(InfoExtractor): +class TouTvIE(RadioCanadaIE): _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' - _access_token = None - _claims = None _TESTS = [{ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor): email, password = self._get_login_info() if email is None: return - state = 'http://ici.tou.tv/' - webpage = self._download_webpage(state, None, 'Downloading homepage') - toutvlogin = self._parse_json(self._search_regex( - r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) - authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' login_webpage = self._download_webpage( - authorize_url, None, 'Downloading login page', query={ - 'client_id': toutvlogin['clientId'], - 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'https://services.radio-canada.ca/auth/oauth/v2/authorize', + None, 'Downloading login page', query={ + 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', + 'redirect_uri': 'https://ici.tou.tv/logincallback', 'response_type': 'token', - 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', - 'state': state, + 'scope': 'id.write media-validation.read', + 'state': '/', }) def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): @@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor): self._access_token = self._search_regex( r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', urlh.geturl(), 'access token') - self._claims = self._download_json( - 'https://services.radio-canada.ca/media/validation/v2/getClaims', - None, 'Extracting Claims', query={ - 'token': self._access_token, - 'access_token': self._access_token, - })['claims'] + self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): path = self._match_id(url) @@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor): self.report_warning('This video is probably DRM protected.', path) video_id = metadata['IdMedia'] details = metadata['Details'] - title = details['OriginalTitle'] - video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) - if self._access_token and self._claims: - video_url = smuggle_url(video_url, { - 'access_token': self._access_token, - 'claims': self._claims, - }) - return { - '_type': 'url_transparent', - 'url': video_url, + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': details.get('OriginalTitle'), 'thumbnail': details.get('ImageUrl'), 'duration': int_or_none(details.get('LengthInSeconds')), - } + }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id)) From 241c5d7d384dcb01a62702274cfbead01f537145 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Feb 2019 19:38:10 +0100 Subject: [PATCH 0357/1201] [trutv] fix extraction(closes #17336) --- youtube_dl/extractor/trutv.py | 84 +++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index 3a5782525..ce892c8c5 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -4,44 +4,72 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE +from ..utils import ( + int_or_none, + parse_iso8601, +) class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))' + _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))' _TEST = { - 'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html', - 'md5': '2cdc844f317579fed1a7251b087ff417', + 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', 'info_dict': { - 'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets', + 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', 'ext': 'mp4', - 'title': 'You Won\'t Believe These Sports Bets', - 'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.', - 'upload_date': '20130305', - } + 'title': 'Sunlight-Activated Flower', + 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): - path, video_id = re.match(self._VALID_URL, url).groups() - auth_required = False - if path: - data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path + series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups() + + if video_id: + path = 'episode' + display_id = video_id else: - webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';", - webpage, 'video id', default=video_id) - auth_required = self._search_regex( - r'TTV\.TVE\.authRequired\s*=\s*(true|false);', - webpage, 'auth required', default='false') == 'true' - data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id - return self._extract_cvp_info( - data_src, path, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', - 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do', - }, - }, { + path = 'series/clip' + display_id = clip_slug + + data = self._download_json( + 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id), + display_id) + video_data = data['episode'] if video_id else data['info'] + media_id = video_data['mediaId'] + title = video_data['title'].strip() + + info = self._extract_ngtv_info( + media_id, {}, { 'url': url, 'site_name': 'truTV', - 'auth_required': auth_required, + 'auth_required': video_data.get('isAuthRequired'), }) + + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('srcUrl') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + }) + + info.update({ + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnails': thumbnails, + 'timestamp': parse_iso8601(video_data.get('publicationDate')), + 'series': video_data.get('showTitle'), + 'season_number': int_or_none(video_data.get('seasonNum')), + 'episode_number': int_or_none(video_data.get('episodeNum')), + }) + return info From f06a1cabe8b3831b5f2ff3bc27f5e7336c597e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 Feb 2019 23:57:58 +0700 Subject: [PATCH 0358/1201] [spankbang] Extend _VALID_URL --- youtube_dl/extractor/spankbang.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 67500b69c..e48cfff71 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -12,7 +12,7 @@ from ..utils import ( class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -41,13 +41,22 @@ class SpankBangIE(InfoExtractor): # 4k 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', 'only_matching': True, + }, { + 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', + 'only_matching': True, + }, { + 'url': 'https://m.spankbang.com/3vvn/play', + 'only_matching': True, + }, { + 'url': 'https://spankbang.com/2y3td/embed/', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, headers={ - 'Cookie': 'country=US' - }) + webpage = self._download_webpage( + url.replace('/%s/embed' % video_id, '/%s/video' % video_id), + video_id, headers={'Cookie': 'country=US'}) if re.search(r'<[^>]+\bid=["\']video_removed', webpage): raise ExtractorError( From 49bd993fd9adbcf6b5c11a7ec11c2b4a552e49c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 00:09:50 +0700 Subject: [PATCH 0359/1201] [spankbang:playlist] Add extractor (closes #19145) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/spankbang.py | 33 +++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 693c16e49..d7685cd87 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1058,7 +1058,10 @@ from .southpark import ( SouthParkEsIE, SouthParkNlIE ) -from .spankbang import SpankBangIE +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index e48cfff71..fbe6ef31a 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + orderedSet, parse_duration, parse_resolution, str_to_int, @@ -12,7 +13,7 @@ from ..utils import ( class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)' + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -103,3 +104,33 @@ class SpankBangIE(InfoExtractor): 'formats': formats, 'age_limit': age_limit, } + + +class SpankBangPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+' + _TEST = { + 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', + 'info_dict': { + 'id': 'ug0k', + 'title': 'Big Ass Titties', + }, + 'playlist_mincount': 50, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage( + url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) + + entries = [self.url_result( + 'https://spankbang.com/%s/video' % video_id, + ie=SpankBangIE.ie_key(), video_id=video_id) + for video_id in orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))] + + title = self._html_search_regex( + r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title', + fatal=False) + + return self.playlist_result(entries, playlist_id, title) From 22f5f5c6fcd2d7f0c9f1ff3019fe0b957b771f44 Mon Sep 17 00:00:00 2001 From: Ales Jirasek <schunkac@gmail.com> Date: Wed, 10 Oct 2018 23:47:21 +0200 Subject: [PATCH 0360/1201] [malltv] Add extractor (closes #18058) --- test/test_InfoExtractor.py | 2 ++ youtube_dl/extractor/common.py | 2 +- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/malltv.py | 58 ++++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 5 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/malltv.py diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 75fa0bbb7..f0aa8466b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase): <meta content='Foo' property=og:foobar> <meta name="og:test1" content='foo > < bar'/> <meta name="og:test2" content="foo >//< bar"/> + <meta property=og-test3 content='Ill-formatted opengraph'/> ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') @@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c4ea2882f..c3b0586a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,7 +1058,7 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' - property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' + property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)' % {'prop': re.escape(prop)}) template = r'<meta[^>]+?%s[^>]+?%s' return [ diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d7685cd87..f212b5116 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -619,6 +619,7 @@ from .mailru import ( MailRuMusicSearchIE, ) from .makertv import MakerTVIE +from .malltv import MallTVIE from .mangomolo import ( MangomoloVideoIE, MangomoloLiveIE, diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py new file mode 100644 index 000000000..7e0876ecc --- /dev/null +++ b/youtube_dl/extractor/malltv.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import parse_duration, merge_dicts + + +class MallTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:.+/)?(?P<id>.+)(?:\?.*$|$)' + _TESTS = [ + { + 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'md5': '9ced0de056534410837077e23bfba796', + 'info_dict': { + 'id': 't0zzt0', + 'ext': 'mp4', + 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', + 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', + 'upload_date': '20181007', + 'timestamp': 1538870400 + } + }, + { + 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'md5': '9ced0de056534410837077e23bfba796', + 'only_matching': 1, + 'info_dict': { + 'id': 't0zzt0', + 'ext': 'mp4', + 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', + 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', + 'upload_date': '20181007', + 'timestamp': 1538870400 + } + } + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + src_id_regex = r'(?P<src><source src=([\"\'])?.+?/(?P<id>\w{6,}?)/index)(?P<after>\1?[^>]*?>)' + video_id = self._search_regex(src_id_regex, webpage, 'ID', + group='id') + info = self._search_json_ld(webpage, video_id, default={}) + html = re.sub(src_id_regex, r'\g<src>.m3u8\g<after>', webpage) + media = self._parse_html5_media_entries(url, html, video_id) + thumbnail = info.get('thumbnail', self._og_search_thumbnail(webpage)) + duration = parse_duration(info.get('duration')) + result = { + 'id': video_id, + 'title': info.get('title', self._og_search_title(webpage)), + 'description': self._og_search_description(webpage) + } + result.update({'thumbnail': thumbnail}) + result.update({'duration': duration}) + + return merge_dicts(media[0], info, result) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d0cb65814..f5a0bb4b0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([ ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" -JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' +JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' def preferredencoding(): From 4de3cb883c61eeec56d8d271375a0624d481ad37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 00:43:10 +0700 Subject: [PATCH 0361/1201] [malltv] Fix issues and simplify (closes #17856) --- youtube_dl/extractor/malltv.py | 85 ++++++++++++++++------------------ 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py index 7e0876ecc..e13c2e11a 100644 --- a/youtube_dl/extractor/malltv.py +++ b/youtube_dl/extractor/malltv.py @@ -2,57 +2,52 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor -from ..utils import parse_duration, merge_dicts +from ..utils import merge_dicts class MallTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:.+/)?(?P<id>.+)(?:\?.*$|$)' - _TESTS = [ - { - 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', - 'md5': '9ced0de056534410837077e23bfba796', - 'info_dict': { - 'id': 't0zzt0', - 'ext': 'mp4', - 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', - 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', - 'upload_date': '20181007', - 'timestamp': 1538870400 - } - }, - { - 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', - 'md5': '9ced0de056534410837077e23bfba796', - 'only_matching': 1, - 'info_dict': { - 'id': 't0zzt0', - 'ext': 'mp4', - 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', - 'description': 'Pokud někdo hospodaří s penězmi daňových poplatníků, pak logicky chceme vědět, jak s nimi nakládá. Objem dotací pro neziskovky roste, ale opravdu jsou tyto organizace „pijavice", jak o nich hovoří And', - 'upload_date': '20181007', - 'timestamp': 1538870400 - } + _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'md5': '1c4a37f080e1f3023103a7b43458e518', + 'info_dict': { + 'id': 't0zzt0', + 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'ext': 'mp4', + 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', + 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb', + 'duration': 216, + 'timestamp': 1538870400, + 'upload_date': '20181007', + 'view_count': int, } - ] + }, { + 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - src_id_regex = r'(?P<src><source src=([\"\'])?.+?/(?P<id>\w{6,}?)/index)(?P<after>\1?[^>]*?>)' - video_id = self._search_regex(src_id_regex, webpage, 'ID', - group='id') - info = self._search_json_ld(webpage, video_id, default={}) - html = re.sub(src_id_regex, r'\g<src>.m3u8\g<after>', webpage) - media = self._parse_html5_media_entries(url, html, video_id) - thumbnail = info.get('thumbnail', self._og_search_thumbnail(webpage)) - duration = parse_duration(info.get('duration')) - result = { - 'id': video_id, - 'title': info.get('title', self._og_search_title(webpage)), - 'description': self._og_search_description(webpage) - } - result.update({'thumbnail': thumbnail}) - result.update({'duration': duration}) - return merge_dicts(media[0], info, result) + webpage = self._download_webpage( + url, display_id, headers=self.geo_verification_headers()) + + SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b' + video_id = self._search_regex( + SOURCE_RE, webpage, 'video id', group='id') + + media = self._parse_html5_media_entries( + url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id, + m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0] + + info = self._search_json_ld(webpage, video_id, default={}) + + return merge_dicts(media, info, { + 'id': video_id, + 'display_id': display_id, + 'title': self._og_search_title(webpage, default=None) or display_id, + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + }) From 1211bb6dace4773f67bbf46b8944317679573a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 01:08:48 +0700 Subject: [PATCH 0362/1201] [YoutubeDL] Improve _make_archive_id (closes #19149) --- youtube_dl/YoutubeDL.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c168415ce..bc9fc270c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,6 +82,7 @@ from .utils import ( sanitize_url, sanitized_Request, std_headers, + str_or_none, subtitles_filename, UnavailableVideoError, url_basename, @@ -2067,9 +2068,12 @@ class YoutubeDL(object): # and backwards compatibility with prior versions extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: + url = str_or_none(info_dict.get('url')) + if not url: + return # Try to find matching extractor for the URL and take its ie_key for ie in self._ies: - if ie.suitable(info_dict['url']): + if ie.suitable(url): extractor = ie.ie_key() break else: From f1f5b47255a44e791a84b769c523127e0f047578 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 01:10:12 +0700 Subject: [PATCH 0363/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4872cd9fc..b0fbde43d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version <unreleased> + +Core +* [utils] Improve JSON-LD regular expression (#18058) +* [YoutubeDL] Fallback to ie_key of matching extractor while making + download archive id when no explicit ie_key is provided (#19022) + +Extractors ++ [malltv] Add support for mall.tv (#18058, #17856) ++ [spankbang:playlist] Add support for playlists (#19145) +* [spankbang] Extend URL regular expression +* [trutv] Fix extraction (#17336) +* [toutv] Fix authentication (#16398, #18700) +* [pornhub] Fix tags and categories extraction (#13720, #19135) +* [pornhd] Fix formats extraction ++ [pornhd] Extract like count (#19123, #19125) +* [radiocanada] Switch to the new media requests (#19115) ++ [teachable] Add support for courses.workitdaily.com (#18871) +- [vporn] Remove extractor (#16276) ++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086) ++ [drtuber] Extract duration (#19078) +* [soundcloud] Fix paged playlists extraction, add support for albums and update client id +* [soundcloud] Update client id +* [drtv] Improve preference (#19079) ++ [openload] Add support for openload.pw and oload.pw (#18930) ++ [openload] Add support for oload.info (#19073) +* [crackle] Authorize media detail request (#16931) + + version 2019.01.30.1 Core From 04eacf54530e6a17129d3f1b90f759f9935f2b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Feb 2019 01:12:51 +0700 Subject: [PATCH 0364/1201] release 2019.02.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 423a08e4d..7128d998f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.30.1 +[debug] youtube-dl version 2019.02.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b0fbde43d..398528f76 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.02.08 Core * [utils] Improve JSON-LD regular expression (#18058) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2918520c3..32fe6b647 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -476,6 +476,7 @@ - **mailru:music**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru - **MakerTV** + - **MallTV** - **mangomolo:live** - **mangomolo:video** - **ManyVids** @@ -827,6 +828,7 @@ - **southpark.nl** - **southparkstudios.dk** - **SpankBang** + - **SpankBangPlaylist** - **Spankwire** - **Spiegel** - **Spiegel:Article**: Articles on spiegel.de @@ -1057,7 +1059,6 @@ - **Voot** - **VoxMedia** - **VoxMediaVolume** - - **Vporn** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be diff --git a/youtube_dl/version.py b/youtube_dl/version.py index be3bbdd73..4dc5a611e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.30.1' +__version__ = '2019.02.08' From 91effe22a091035bc5abace2fcf562a0db89090f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 8 Feb 2019 07:21:31 +0100 Subject: [PATCH 0365/1201] [linkedin:learning] extract chapter_number and chapter_id(closes #19162) --- youtube_dl/extractor/linkedin.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 259fc4c5e..5a86b0064 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -34,12 +34,15 @@ class LinkedInLearningBaseIE(InfoExtractor): 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, }, query=query)['elements'][0] - def _get_video_id(self, urn, course_slug, video_slug): + def _get_urn_id(self, video_data): + urn = video_data.get('urn') if urn: mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) if mobj: return mobj.group(1) - return '%s/%s' % (course_slug, video_slug) + + def _get_video_id(self, video_data, course_slug, video_slug): + return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) def _real_initialize(self): email, password = self._get_login_info() @@ -123,7 +126,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) return { - 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), + 'id': self._get_video_id(video_data, course_slug, video_slug), 'title': title, 'formats': formats, 'thumbnail': video_data.get('defaultThumbnail'), @@ -154,18 +157,21 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE): course_data = self._call_api(course_slug, 'chapters,description,title') entries = [] - for chapter in course_data.get('chapters', []): + for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1): chapter_title = chapter.get('title') + chapter_id = self._get_urn_id(chapter) for video in chapter.get('videos', []): video_slug = video.get('slug') if not video_slug: continue entries.append({ '_type': 'url_transparent', - 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), + 'id': self._get_video_id(video, course_slug, video_slug), 'title': video.get('title'), 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), 'chapter': chapter_title, + 'chapter_number': chapter_number, + 'chapter_id': chapter_id, 'ie_key': LinkedInLearningIE.ie_key(), }) From e9dee7f1b26e1c23b011a5ad5433d4debc6b48ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Feb 2019 23:49:37 +0700 Subject: [PATCH 0366/1201] [trunews] Add extractor (closes #19153) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/trunews.py | 75 ++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/trunews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f212b5116..3e1b63b4b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1171,6 +1171,7 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trunews import TruNewsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/trunews.py b/youtube_dl/extractor/trunews.py new file mode 100644 index 000000000..b0c7caabf --- /dev/null +++ b/youtube_dl/extractor/trunews.py @@ -0,0 +1,75 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + dict_get, + float_or_none, + int_or_none, + unified_timestamp, + update_url_query, + url_or_none, +) + + +class TruNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', + 'md5': 'a19c024c3906ff954fac9b96ce66bb08', + 'info_dict': { + 'id': '5c5a21e65d3c196e1c0020cc', + 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech', + 'ext': 'mp4', + 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?", + 'description': 'md5:c583b72147cc92cf21f56a31aff7a670', + 'duration': 3685, + 'timestamp': 1549411440, + 'upload_date': '20190206', + }, + 'add_ie': ['Zype'], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + video = self._download_json( + 'https://api.zype.com/videos', display_id, query={ + 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H', + 'per_page': 1, + 'active': 'true', + 'friendly_title': display_id, + })['response'][0] + + zype_id = video['_id'] + + thumbnails = [] + thumbnails_list = video.get('thumbnails') + if isinstance(thumbnails_list, list): + for thumbnail in thumbnails_list: + if not isinstance(thumbnail, dict): + continue + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + '_type': 'url_transparent', + 'url': update_url_query( + 'https://player.zype.com/embed/%s.js' % zype_id, + {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}), + 'ie_key': 'Zype', + 'id': zype_id, + 'display_id': display_id, + 'title': video.get('title'), + 'description': dict_get(video, ('description', 'ott_description', 'short_description')), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('published_at')), + 'average_rating': float_or_none(video.get('rating')), + 'view_count': int_or_none(video.get('request_count')), + 'thumbnails': thumbnails, + } From f516f44094d2244a805d8d0ac4d809fc6cd16782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Feb 2019 23:44:08 +0700 Subject: [PATCH 0367/1201] [soundcloud] Extract more metadata --- youtube_dl/extractor/soundcloud.py | 65 +++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5536e7851..15da3496e 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -16,7 +16,8 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, - unified_strdate, + try_get, + unified_timestamp, update_url_query, url_or_none, ) @@ -51,12 +52,17 @@ class SoundcloudIE(InfoExtractor): 'info_dict': { 'id': '62986583', 'ext': 'mp3', - 'upload_date': '20121011', + 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music', - 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', + 'timestamp': 1349920598, + 'upload_date': '20121011', 'duration': 143, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, } }, # not streamable song @@ -68,9 +74,14 @@ class SoundcloudIE(InfoExtractor): 'title': 'Goldrushed', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', + 'timestamp': 1337635207, 'upload_date': '20120521', - 'duration': 227, + 'duration': 30, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, 'params': { # rtmp @@ -85,11 +96,16 @@ class SoundcloudIE(InfoExtractor): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # private link (alt format) @@ -100,11 +116,16 @@ class SoundcloudIE(InfoExtractor): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # downloadable song @@ -117,9 +138,14 @@ class SoundcloudIE(InfoExtractor): 'title': 'Bus Brakes', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'uploader': 'oddsamples', + 'timestamp': 1389232924, 'upload_date': '20140109', 'duration': 17, 'license': 'cc-by-sa', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # private link, downloadable format @@ -132,9 +158,14 @@ class SoundcloudIE(InfoExtractor): 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'uploader': 'Ori Uplift Music', + 'timestamp': 1504206263, 'upload_date': '20170831', 'duration': 7449, 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, }, # no album art, use avatar pic for thumbnail @@ -147,10 +178,15 @@ class SoundcloudIE(InfoExtractor): 'title': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'uploader': 'garyvee', + 'timestamp': 1488152409, 'upload_date': '20170226', 'duration': 207, 'thumbnail': r're:https?://.*\.jpg', 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, }, 'params': { 'skip_download': True, @@ -176,22 +212,33 @@ class SoundcloudIE(InfoExtractor): def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): track_id = compat_str(info['id']) + title = info['title'] name = full_title or track_id if quiet: self.report_extraction(name) thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') if isinstance(thumbnail, compat_str): thumbnail = thumbnail.replace('-large', '-t500x500') + username = try_get(info, lambda x: x['user']['username'], compat_str) + + def extract_count(key): + return int_or_none(info.get('%s_count' % key)) + result = { 'id': track_id, - 'uploader': info.get('user', {}).get('username'), - 'upload_date': unified_strdate(info.get('created_at')), - 'title': info['title'], + 'uploader': username, + 'timestamp': unified_timestamp(info.get('created_at')), + 'title': title, 'description': info.get('description'), 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), 'license': info.get('license'), + 'view_count': extract_count('playback'), + 'like_count': extract_count('favoritings'), + 'comment_count': extract_count('comment'), + 'repost_count': extract_count('reposts'), + 'genre': info.get('genre'), } formats = [] query = {'client_id': self._CLIENT_ID} From 4c0e0dc9dc13d53a334f75f7d7b9073f79a2dfc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Feb 2019 00:49:51 +0700 Subject: [PATCH 0368/1201] [rutube:embed] Fix extraction and add support private videos (closes #19163) --- youtube_dl/extractor/rutube.py | 115 +++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 10ac8ed1f..8f54d5675 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -21,7 +21,17 @@ from ..utils import ( class RutubeBaseIE(InfoExtractor): - def _extract_video(self, video, video_id=None, require_title=True): + def _download_api_info(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/video/%s/' % video_id, + video_id, 'Downloading video JSON', + 'Unable to download video JSON', query=query) + + @staticmethod + def _extract_info(video, video_id=None, require_title=True): title = video['title'] if require_title else video.get('title') age_limit = video.get('is_adult') @@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor): category = try_get(video, lambda x: x['category']['name']) return { - 'id': video.get('id') or video_id, + 'id': video.get('id') or video_id if video_id else video['id'], 'title': title, 'description': video.get('description'), 'thumbnail': video.get('thumbnail_url'), @@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor): 'is_live': bool_or_none(video.get('is_livestream')), } + def _download_and_extract_info(self, video_id, query=None): + return self._extract_info( + self._download_api_info(video_id, query=query), video_id) + + def _download_api_options(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/play/options/%s/' % video_id, + video_id, 'Downloading options JSON', + 'Unable to download options JSON', + headers=self.geo_verification_headers(), query=query) + + def _extract_formats(self, options, video_id): + formats = [] + for format_id, format_url in options['video_balancer'].items(): + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + return formats + + def _download_and_extract_formats(self, video_id, query=None): + return self._extract_formats( + self._download_api_options(video_id, query=query), video_id) + class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' @@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE): _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': '79938ade01294ef7e27574890d0d3769', + 'md5': '1d24f180fac7a02f3900712e5a5764d6', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', - 'duration': 80, + 'duration': 81, 'uploader': 'NTDRussian', 'uploader_id': '29790', 'timestamp': 1381943602, @@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - - video = self._download_json( - 'http://rutube.ru/api/video/%s/?format=json' % video_id, - video_id, 'Downloading video JSON') - - info = self._extract_video(video, video_id) - - options = self._download_json( - 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON', - headers=self.geo_verification_headers()) - - formats = [] - for format_id, format_url in options['video_balancer'].items(): - ext = determine_ext(format_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - format_url, video_id, f4m_id=format_id, fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - info['formats'] = formats + info = self._download_and_extract_info(video_id) + info['formats'] = self._download_and_extract_formats(video_id) return info -class RutubeEmbedIE(InfoExtractor): +class RutubeEmbedIE(RutubeBaseIE): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' @@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor): 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', - 'ext': 'flv', + 'ext': 'mp4', 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', @@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor): }, { 'url': 'http://rutube.ru/play/embed/8083783', 'only_matching': True, + }, { + # private video + 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', + 'only_matching': True, }] def _real_extract(self, url): embed_id = self._match_id(url) - webpage = self._download_webpage(url, embed_id) - - canonical_url = self._html_search_regex( - r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, - 'Canonical URL') - return self.url_result(canonical_url, RutubeIE.ie_key()) + # Query may contain private videos token and should be passed to API + # requests (see #19163) + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + options = self._download_api_options(embed_id, query) + video_id = options['effective_video'] + formats = self._extract_formats(options, video_id) + info = self._download_and_extract_info(video_id, query) + info.update({ + 'extractor_key': 'Rutube', + 'formats': formats, + }) + return info class RutubePlaylistBaseIE(RutubeBaseIE): @@ -181,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE): video_url = url_or_none(result.get('video_url')) if not video_url: continue - entry = self._extract_video(result, require_title=False) + entry = self._extract_info(result, require_title=False) entry.update({ '_type': 'url', 'url': video_url, From d777f3e81c5f9f6c7be39586703f1f076a2025a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Feb 2019 04:39:23 +0700 Subject: [PATCH 0369/1201] [tvplayhome] Fix episode metadata extraction (closes #19190) --- youtube_dl/extractor/tvplay.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 8f1ff3b76..7c07b26bc 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -537,8 +537,9 @@ class TVPlayHomeIE(InfoExtractor): r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number', default=None)) episode = self._search_regex( - r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode', - default=None, group='value') + (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, + 'episode', default=None, group='value') episode_number = int_or_none(self._search_regex( r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number', default=None)) From 7d8b89163c43dfec27dab5250183e52fce838389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Feb 2019 04:41:28 +0700 Subject: [PATCH 0370/1201] [tvplayhome] Fix video id extraction (closes #19190) --- youtube_dl/extractor/tvplay.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 7c07b26bc..d82d48f94 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -493,10 +493,9 @@ class TVPlayHomeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_id = self._search_regex( - r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id', - default=None) + r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id') - if video_id: + if len(video_id) < 8: return self.url_result( 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id) From 985637cbbfc1a79091eb2f5ca4afec84f6616c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 12 Feb 2019 00:13:50 +0700 Subject: [PATCH 0371/1201] [twitch] Add new source format detection approach (closes #19193) --- youtube_dl/extractor/twitch.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 401615683..8c87f6dd3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -136,7 +136,12 @@ class TwitchBaseIE(InfoExtractor): source = next(f for f in formats if f['format_id'] == 'Source') source['preference'] = 10 except StopIteration: - pass # No Source stream present + for f in formats: + if '/chunked/' in f['url']: + f.update({ + 'source_preference': 10, + 'format_note': 'Source', + }) self._sort_formats(formats) From 6f5c1807f43b9dfd17fdc6932ae7ecb6c77fb1a0 Mon Sep 17 00:00:00 2001 From: bitraid <bitraid@protonmail.ch> Date: Tue, 12 Feb 2019 19:02:29 +0200 Subject: [PATCH 0372/1201] [imgur] Use video id as title fallback (closes #18590) --- youtube_dl/extractor/imgur.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 0eb54db3f..a5ba03efa 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -27,6 +27,10 @@ class ImgurIE(InfoExtractor): }, { 'url': 'https://i.imgur.com/crGpqCV.mp4', 'only_matching': True, + }, { + # no title + 'url': 'https://i.imgur.com/jxBXAMC.gifv', + 'only_matching': True, }] def _real_extract(self, url): @@ -87,7 +91,7 @@ class ImgurIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': self._og_search_title(webpage), + 'title': self._og_search_title(webpage, default=video_id), } From 7bee705d8f110f09d8e72b1c863ff197ccc1d4f1 Mon Sep 17 00:00:00 2001 From: yonaikerlol <lawlietrs7@gmail.com> Date: Thu, 14 Feb 2019 11:28:16 -0400 Subject: [PATCH 0373/1201] [openload] Add support for oload.live --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a2ae25272..c1dcbb7eb 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) ) )/ (?:f|embed)/ @@ -346,6 +346,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.pw/f/WyKgK8s94N0', 'only_matching': True, + }, { + 'url': 'https://oload.live/f/-Z58UZ-GR4M', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 794c1b6e02591b04da931fa59745bc47bfae7492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 14 Feb 2019 23:40:46 +0700 Subject: [PATCH 0374/1201] [vshare] Pass Referer to download request (closes #19205, closes #19221) --- youtube_dl/extractor/vshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index e4ec77889..c631ac1fa 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -48,7 +48,7 @@ class VShareIE(InfoExtractor): webpage = self._download_webpage( 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, - video_id) + video_id, headers={'Referer': url}) title = self._html_search_regex( r'<title>([^<]+)', webpage, 'title') From 2b2da3ba10cc325d00b665aae87f0fa8508bccdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Feb 2019 23:56:29 +0700 Subject: [PATCH 0375/1201] [rai] Relax _VALID_URL (closes #19232) --- youtube_dl/extractor/rai.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 548a6553b..149153b8f 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -288,7 +288,7 @@ class RaiPlayPlaylistIE(InfoExtractor): class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ # var uniquename = "ContentItem-..." # data-id="ContentItem-..." @@ -375,6 +375,9 @@ class RaiIE(RaiBaseIE): # Direct MMS URL 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'only_matching': True, + }, { + 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', + 'only_matching': True, }] def _extract_from_content_id(self, content_id, url): From ba2e3730d125eab952eded3bb7749d479a2262d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Feb 2019 22:45:53 +0700 Subject: [PATCH 0376/1201] [noovo] Fix extraction (closes #19230) --- youtube_dl/extractor/noovo.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/noovo.py b/youtube_dl/extractor/noovo.py index 974de3c3e..b40770d07 100644 --- a/youtube_dl/extractor/noovo.py +++ b/youtube_dl/extractor/noovo.py @@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - bc_url = BrightcoveNewIE._extract_url(self, webpage) + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') data = self._parse_json( self._search_regex( @@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor): return { '_type': 'url_transparent', 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'id': brightcove_id, 'title': title, 'description': description, 'series': series, From ae65c93a26f2b3cf806477a3ee891aa461b5c6b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 00:58:13 +0700 Subject: [PATCH 0377/1201] [udemy] Update User-Agent and detect captcha (closes #14713, closes #15839, closes #18126) --- youtube_dl/extractor/udemy.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 105826e9b..89a7f6ade 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -123,10 +123,22 @@ class UdemyIE(InfoExtractor): def _download_webpage_handle(self, *args, **kwargs): headers = kwargs.get('headers', {}).copy() - headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers - return super(UdemyIE, self)._download_webpage_handle( + ret = super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) + if not ret: + return ret + webpage, _ = ret + if any(p in webpage for p in ( + '>Please verify you are a human', + 'Access to this page has been denied because we believe you are using automation tools to browse the website', + '"_pxCaptcha"')): + raise ExtractorError( + 'Udemy asks you to solve a CAPTCHA. Login with browser, ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies.', expected=True) + return ret def _download_json(self, url_or_request, *args, **kwargs): headers = { From d7d513891b7e63337218c5cb0bf743c8f7044381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 01:05:01 +0700 Subject: [PATCH 0378/1201] [udemy] Extend _VALID_URLs (closes #14330, closes #15883) --- youtube_dl/extractor/udemy.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89a7f6ade..ae8de9897 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -29,7 +29,7 @@ class UdemyIE(InfoExtractor): IE_NAME = 'udemy' _VALID_URL = r'''(?x) https?:// - www\.udemy\.com/ + (?:[^/]+\.)?udemy\.com/ (?: [^#]+\#/lecture/| lecture/view/?\?lectureId=| @@ -64,6 +64,9 @@ class UdemyIE(InfoExtractor): # only outputs rendition 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0', 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757', + 'only_matching': True, }] def _extract_course_info(self, webpage, video_id): @@ -415,8 +418,14 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P[^/?#&]+)' - _TESTS = [] + _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.udemy.com/java-tutorial/', + 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/', + 'only_matching': True, + }] @classmethod def suitable(cls, url): From c9a0ea6e51eff28b9a383a47215870fd5875fc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 05:00:16 +0700 Subject: [PATCH 0379/1201] [bilibili] Update keys (closes #19233) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4d6b051fe..3746671d3 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor): }] }] - _APP_KEY = '84956560bc028eb7' - _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' def _report_error(self, result): if 'message' in result: From 659e93fcf5c0480ac461cda412335cecf6a5595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 07:12:10 +0700 Subject: [PATCH 0380/1201] [linuxacademy] Add extractor (closes #12207) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/linuxacademy.py | 174 +++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 youtube_dl/extractor/linuxacademy.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3e1b63b4b..c70452dcd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -593,6 +593,7 @@ from .linkedin import ( LinkedInLearningIE, LinkedInLearningCourseIE, ) +from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE from .liveleak import ( LiveLeakIE, diff --git a/youtube_dl/extractor/linuxacademy.py b/youtube_dl/extractor/linuxacademy.py new file mode 100644 index 000000000..a78c6556e --- /dev/null +++ b/youtube_dl/extractor/linuxacademy.py @@ -0,0 +1,174 @@ +from __future__ import unicode_literals + +import json +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_HTTPError, + compat_str, +) +from ..utils import ( + ExtractorError, + orderedSet, + unescapeHTML, + urlencode_postdata, + urljoin, +) + + +class LinuxAcademyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?linuxacademy\.com/cp/ + (?: + courses/lesson/course/(?P\d+)/lesson/(?P\d+)| + modules/view/id/(?P\d+) + ) + ''' + _TESTS = [{ + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', + 'info_dict': { + 'id': '1498-2', + 'ext': 'mp4', + 'title': "Introduction to the Practitioner's Brief", + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Requires Linux Academy account credentials', + }, { + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', + 'only_matching': True, + }, { + 'url': 'https://linuxacademy.com/cp/modules/view/id/154', + 'info_dict': { + 'id': '154', + 'title': 'AWS Certified Cloud Practitioner', + 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', + }, + 'playlist_count': 41, + 'skip': 'Requires Linux Academy account credentials', + }] + + _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' + _ORIGIN_URL = 'https://linuxacademy.com' + _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' + _NETRC_MACHINE = 'linuxacademy' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + def random_string(): + return ''.join([ + random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') + for _ in range(32)]) + + webpage, urlh = self._download_webpage_handle( + self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ + 'client_id': self._CLIENT_ID, + 'response_type': 'token id_token', + 'redirect_uri': self._ORIGIN_URL, + 'scope': 'openid email user_impersonation profile', + 'audience': self._ORIGIN_URL, + 'state': random_string(), + 'nonce': random_string(), + }) + + login_data = self._parse_json( + self._search_regex( + r'atob\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'login info', group='value'), None, + transform_source=lambda x: compat_b64decode(x).decode('utf-8') + )['extraParams'] + + login_data.update({ + 'client_id': self._CLIENT_ID, + 'redirect_uri': self._ORIGIN_URL, + 'tenant': 'lacausers', + 'connection': 'Username-Password-Authentication', + 'username': username, + 'password': password, + 'sso': 'true', + }) + + login_state_url = compat_str(urlh.geturl()) + + try: + login_page = self._download_webpage( + 'https://login.linuxacademy.com/usernamepassword/login', None, + 'Downloading login page', data=json.dumps(login_data).encode(), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read(), None) + message = error.get('description') or error['code'] + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) + raise + + callback_page, urlh = self._download_webpage_handle( + 'https://login.linuxacademy.com/login/callback', None, + 'Downloading callback page', + data=urlencode_postdata(self._hidden_inputs(login_page)), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + + access_token = self._search_regex( + r'access_token=([^=&]+)', compat_str(urlh.geturl()), + 'access token') + + self._download_webpage( + 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' + % access_token, None, 'Downloading token validation page') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') + item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) + + webpage = self._download_webpage(url, item_id) + + # course path + if course_id: + entries = [ + self.url_result( + urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) + for lesson_url in orderedSet(re.findall( + r']+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', + webpage))] + title = unescapeHTML(self._html_search_regex( + (r'class=["\']course-title["\'][^>]*>(?P[^<]+)', + r'var\s+title\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'title', default=None, group='value')) + description = unescapeHTML(self._html_search_regex( + r'var\s+description\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'description', default=None, group='value')) + return self.playlist_result(entries, course_id, title, description) + + # single video path + info = self._extract_jwplayer_data( + webpage, item_id, require_title=False, m3u8_id='hls',) + title = self._search_regex( + (r'>Lecture\s*:\s*(?P[^<]+)', + r'lessonName\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage, + 'title', group='value') + info.update({ + 'id': item_id, + 'title': title, + }) + return info From 3c9647372e78134777d201e157a5ef42345c9da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 13:38:21 +0700 Subject: [PATCH 0381/1201] [tvp] Fix description extraction, make thumbnail optional and fix tests --- youtube_dl/extractor/tvp.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 3954f0b93..f9bf600b0 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -19,12 +19,12 @@ class TVPIE(InfoExtractor): _TESTS = [{ 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', - 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', 'info_dict': { 'id': '194536', 'ext': 'mp4', - 'title': 'Czas honoru, I seria – odc. 13', - 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d', + 'title': 'Czas honoru, odc. 13 – Władek', + 'description': 'md5:437f48b93558370b031740546b696e24', }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', @@ -45,6 +45,7 @@ class TVPIE(InfoExtractor): 'title': 'Wiadomości, 28.09.2017, 19:30', 'description': 'Wydanie główne codziennego serwisu informacyjnego.' }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 'only_matching': True, @@ -75,8 +76,10 @@ class TVPIE(InfoExtractor): return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -87,6 +90,14 @@ class TVPEmbedIE(InfoExtractor): _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P\d+)' _TESTS = [{ + 'url': 'tvp:194536', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', + 'info_dict': { + 'id': '194536', + 'ext': 'mp4', + 'title': 'Czas honoru, odc. 13 – Władek', + }, + }, { 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { From 34568dc2967d227630ed9d7150deaa62a689b937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 13:39:00 +0700 Subject: [PATCH 0382/1201] [tvp] Detect unavailable videos --- youtube_dl/extractor/tvp.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index f9bf600b0..536b580fc 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -98,6 +98,7 @@ class TVPEmbedIE(InfoExtractor): 'title': 'Czas honoru, odc. 13 – Władek', }, }, { + # not available 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { @@ -105,6 +106,7 @@ class TVPEmbedIE(InfoExtractor): 'ext': 'mp4', 'title': 'Panorama, 07.12.2015, 15:40', }, + 'skip': 'Transmisja została zakończona lub materiał niedostępny', }, { 'url': 'tvp:22670268', 'only_matching': True, @@ -116,10 +118,13 @@ class TVPEmbedIE(InfoExtractor): webpage = self._download_webpage( 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) - error_massage = get_element_by_attribute('class', 'msg error', webpage) - if error_massage: + error = self._html_search_regex( + r'(?s)]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)

', + webpage, 'error', default=None) or clean_html( + get_element_by_attribute('class', 'msg error', webpage)) + if error: raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error_massage)), expected=True) + self.IE_NAME, clean_html(error)), expected=True) title = self._search_regex( r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P.+?)\1', From d93083789bf9c318b18d52ac132e9495345b9ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Feb 2019 14:09:30 +0700 Subject: [PATCH 0383/1201] [tvp:series] Fix extraction --- youtube_dl/extractor/tvp.py | 67 ++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 536b580fc..05669a366 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor from ..utils import ( - determine_ext, clean_html, - get_element_by_attribute, + determine_ext, ExtractorError, + get_element_by_attribute, + orderedSet, ) @@ -198,46 +200,35 @@ class TVPEmbedIE(InfoExtractor): class TVPSeriesIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)/video' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem', + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { - 'title': 'Ogniem i mieczem', - 'id': '4278026', + 'id': '38678312', }, - 'playlist_count': 4, - }, { - 'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat', - 'info_dict': { - 'title': 'Boso przez świat', - 'id': '9329207', - }, - 'playlist_count': 86, + 'playlist_count': 115, }] + def _entries(self, url, display_id): + for page_num in itertools.count(1): + page = self._download_webpage( + url, display_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + + video_ids = orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, + page)) + + if not video_ids: + break + + for video_id in video_ids: + yield self.url_result( + 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), + video_id=video_id) + def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id, tries=5) - - title = self._html_search_regex( - r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series') - playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id') - playlist = self._download_webpage( - 'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend' - 'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5, - note='Downloading playlist') - - videos_paths = re.findall( - '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) - entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) - for v_path in videos_paths] - - return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': display_id, - 'title': title, - 'entries': entries, - } + mobj = re.match(self._VALID_URL, url) + display_id, playlist_id = mobj.group('display_id', 'id') + return self.playlist_result(self._entries(url, display_id), playlist_id) From 388cfbd3d8915ebb99714ac8e7ce4151edf96d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Feb 2019 14:27:00 +0700 Subject: [PATCH 0384/1201] [tvp:website] Improve support --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/tvp.py | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c70452dcd..923dfe7f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1218,7 +1218,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, - TVPSeriesIE, + TVPWebsiteIE, ) from .tvplay import ( TVPlayIE, diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 05669a366..accff75b5 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -198,19 +198,36 @@ class TVPEmbedIE(InfoExtractor): } -class TVPSeriesIE(InfoExtractor): +class TVPWebsiteIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)/video' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' _TESTS = [{ + # series 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { 'id': '38678312', }, 'playlist_count': 115, + }, { + # film + 'url': 'https://vod.tvp.pl/website/gloria,35139666', + 'info_dict': { + 'id': '36637049', + 'ext': 'mp4', + 'title': 'Gloria, Gloria', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['TVPEmbed'], + }, { + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', + 'only_matching': True, }] - def _entries(self, url, display_id): + def _entries(self, display_id, playlist_id): + url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) for page_num in itertools.count(1): page = self._download_webpage( url, display_id, 'Downloading page %d' % page_num, @@ -231,4 +248,5 @@ class TVPSeriesIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id, playlist_id = mobj.group('display_id', 'id') - return self.playlist_result(self._entries(url, display_id), playlist_id) + return self.playlist_result( + self._entries(display_id, playlist_id), playlist_id) From c76fc5b22a70f9ac24fe7e34c37aa8ef82e85c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 18 Feb 2019 02:10:06 +0700 Subject: [PATCH 0385/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 398528f76..adbdf166d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version <unreleased> + +Extractors +* [tvp:website] Fix and improve extraction ++ [tvp] Detect unavailable videos +* [tvp] Fix description extraction and make thumbnail optional ++ [linuxacademy] Add support for linuxacademy.com (#12207) +* [bilibili] Update keys (#19233) +* [udemy] Extend URL regular expressions (#14330, #15883) +* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) +* [noovo] Fix extraction (#19230) +* [rai] Relax URL regular expression (#19232) ++ [vshare] Pass Referer to download request (#19205, #19221) ++ [openload] Add support for oload.live (#19222) +* [imgur] Use video id as title fallback (#18590) ++ [twitch] Add new source format detection approach (#19193) +* [tvplayhome] Fix video id extraction (#19190) +* [tvplayhome] Fix episode metadata extraction (#19190) +* [rutube:embed] Fix extraction (#19163) ++ [rutube:embed] Add support private videos (#19163) ++ [soundcloud] Extract more metadata ++ [trunews] Add support for trunews.com (#19153) ++ [linkedin:learning] Extract chapter_number and chapter_id (#19162) + + version 2019.02.08 Core From 77a842c8926625fe791ed36613f183bb195394cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 18 Feb 2019 02:11:11 +0700 Subject: [PATCH 0386/1201] release 2019.02.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7128d998f..ff626883d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.02.08 +[debug] youtube-dl version 2019.02.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index adbdf166d..f9dd7928f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.02.18 Extractors * [tvp:website] Fix and improve extraction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 32fe6b647..d8a8d7ede 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -458,6 +458,7 @@ - **LineTV** - **linkedin:learning** - **linkedin:learning:course** + - **LinuxAcademy** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -915,6 +916,7 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** + - **TruNews** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4dc5a611e..ea1d5a4a5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.08' +__version__ = '2019.02.18' From caf48f557a8f4f904c88346bcfc462069b8745bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 21 Feb 2019 05:59:07 +0700 Subject: [PATCH 0387/1201] [metacafe] Fix family filter bypass (closes #19287) --- youtube_dl/extractor/metacafe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 28f59f63c..9e92416d1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_urllib_parse, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor): headers = { # Disable family filter - 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False}) + 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False})) } # AnyClip videos require the flashversion cookie so that we get the link From 37b239b3b66ea9e2a71bae41e9da6dba8ee5554c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Feb 2019 00:43:29 +0700 Subject: [PATCH 0388/1201] [downloader/external] Fix infinite retries for curl (closes #19303) --- youtube_dl/downloader/external.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 958d00aac..0b88bfd94 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -121,7 +121,11 @@ class CurlFD(ExternalFD): cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') cmd += self._option('--limit-rate', 'ratelimit') - cmd += self._option('--retry', 'retries') + retry = self._option('--retry', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '2147483647' + cmd += retry cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') From 8c80603f1adea843d96c0598b902106c7a3efb7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Feb 2019 00:58:56 +0700 Subject: [PATCH 0389/1201] [downloader/external] Add support for rate limit and retries for wget --- youtube_dl/downloader/external.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 0b88bfd94..22e6093b3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -164,6 +164,12 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--tries', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '0' + cmd += retry cmd += self._option('--bind-address', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') From f0228f56fb2441510aa966ba9298e388b209cde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 Feb 2019 21:01:25 +0700 Subject: [PATCH 0390/1201] [bbccouk] Make subtitles non fatal (#19651) --- youtube_dl/extractor/bbc.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index eac9a5a46..13340ec64 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import itertools +import re +import xml from .common import InfoExtractor from ..utils import ( @@ -17,6 +18,7 @@ from ..utils import ( parse_iso8601, try_get, unescapeHTML, + url_or_none, urlencode_postdata, urljoin, ) @@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor): def _get_subtitles(self, media, programme_id): subtitles = {} for connection in self._extract_connections(media): - captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') + cc_url = url_or_none(connection.get('href')) + if not cc_url: + continue + captions = self._download_xml( + cc_url, programme_id, 'Downloading captions', fatal=False) + if not isinstance(captions, xml.etree.ElementTree.Element): + continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ { From 55b8588f0e4dd9597b6da5c46d05b9dd1e9f5960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 Feb 2019 23:19:15 +0700 Subject: [PATCH 0391/1201] [servus] Fix extraction (closes #19297) --- youtube_dl/extractor/servus.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py index 264e1dd8b..e579d42cf 100644 --- a/youtube_dl/extractor/servus.py +++ b/youtube_dl/extractor/servus.py @@ -1,31 +1,44 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class ServusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)' + _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)' _TESTS = [{ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/', - 'md5': '046dee641cda1c4cabe13baef3be2c1c', + 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', 'info_dict': { 'id': 'AA-1T6VBU5PW1W12', 'ext': 'mp4', - 'title': 'Die Grünen aus Volkssicht', - 'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Die Grünen aus Sicht des Volkes', + 'description': 'md5:1247204d85783afe3682644398ff2ec4', + 'thumbnail': r're:^https?://.*\.jpg', } }, { 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/', 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/', + 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id = self._match_id(url).upper() webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + title = self._search_regex( + (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + title = re.sub(r'\s*-\s*Servus TV\s*$', '', title) description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) From db1c3a9d3f202cc6f3fd83a2a918869e7c0d147f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 Feb 2019 03:41:15 +0700 Subject: [PATCH 0392/1201] [periscope] Extract width and height (closes #20015) --- youtube_dl/extractor/periscope.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 8afe541ec..b337a56c0 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + int_or_none, parse_iso8601, unescapeHTML, ) @@ -75,6 +76,14 @@ class PeriscopeIE(PeriscopeBaseIE): 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + width = int_or_none(broadcast.get('width')) + height = int_or_none(broadcast.get('height')) + + def add_width_and_height(f): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + video_urls = set() formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): @@ -83,16 +92,21 @@ class PeriscopeIE(PeriscopeBaseIE): continue video_urls.add(video_url) if format_id != 'rtmp': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( video_url, token, 'mp4', entry_protocol='m3u8_native' if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=False)) + m3u8_id=format_id, fatal=False) + if len(m3u8_formats) == 1: + add_width_and_height(m3u8_formats[0]) + formats.extend(m3u8_formats) continue - formats.append({ + rtmp_format = { 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', - }) + } + add_width_and_height(rtmp_format) + formats.append(rtmp_format) self._sort_formats(formats) return { From 9d9a8676dc02101069cf5fa9862500d39352538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Feb 2019 23:26:52 +0700 Subject: [PATCH 0393/1201] [francetv:site] Extend video id regex (closes #20029, closes #20071) --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 2ffe83a78..3c4ef08a8 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: From ff60ec8f029d12c119855ec82d7ce9ecda388651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 00:47:18 +0700 Subject: [PATCH 0394/1201] [npo] Fix extraction (#20084) --- youtube_dl/extractor/npo.py | 120 +++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 5a427c396..857845d35 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -12,11 +12,16 @@ from ..utils import ( ExtractorError, fix_xml_ampersands, int_or_none, + merge_dicts, orderedSet, parse_duration, qualities, + str_or_none, strip_jsonp, unified_strdate, + unified_timestamp, + url_or_none, + urlencode_postdata, ) @@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - return self._get_info(video_id) + try: + return self._get_info(url, video_id) + except ExtractorError: + return self._get_old_info(video_id) - def _get_info(self, video_id): + def _get_info(self, url, video_id): + token = self._download_json( + 'https://www.npostart.nl/api/token', video_id, + 'Downloading token', headers={ + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + })['token'] + + player = self._download_json( + 'https://www.npostart.nl/player/%s' % video_id, video_id, + 'Downloading player JSON', data=urlencode_postdata({ + 'autoplay': 0, + 'share': 1, + 'pageUrl': url, + 'hasAdConsent': 0, + '_token': token, + })) + + player_token = player['token'] + + format_urls = set() + formats = [] + for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'): + streams = self._download_json( + 'https://start-player.npo.nl/video/%s/streams' % video_id, + video_id, 'Downloading %s profile JSON' % profile, fatal=False, + query={ + 'profile': profile, + 'quality': 'npo', + 'tokenId': player_token, + 'streamType': 'broadcast', + }) + if not streams: + continue + stream = streams.get('stream') + if not isinstance(stream, dict): + continue + stream_url = url_or_none(stream.get('src')) + if not stream_url or stream_url in format_urls: + continue + format_urls.add(stream_url) + if stream.get('protection') is not None: + continue + stream_type = stream.get('type') + stream_ext = determine_ext(stream_url) + if stream_type == 'application/dash+xml' or stream_ext == 'mpd': + formats.extend(self._extract_mpd_formats( + stream_url, video_id, mpd_id='dash', fatal=False)) + elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + elif '.ism/Manifest' in stream_url: + formats.extend(self._extract_ism_formats( + stream_url, video_id, ism_id='mss', fatal=False)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + info = { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + embed_url = url_or_none(player.get('embedUrl')) + if embed_url: + webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed page', fatal=False) + if webpage: + video = self._parse_json( + self._search_regex( + r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video', + default='{}'), video_id) + if video: + title = video.get('episodeTitle') + subtitles = {} + subtitles_list = video.get('subtitles') + if isinstance(subtitles_list, list): + for cc in subtitles_list: + cc_url = url_or_none(cc.get('src')) + if not cc_url: + continue + lang = str_or_none(cc.get('language')) or 'nl' + subtitles.setdefault(lang, []).append({ + 'url': cc_url, + }) + return merge_dicts({ + 'title': title, + 'description': video.get('description'), + 'thumbnail': url_or_none( + video.get('still_image_url') or video.get('orig_image_url')), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('broadcastDate')), + 'creator': video.get('channel'), + 'series': video.get('title'), + 'episode': title, + 'episode_number': int_or_none(video.get('episodeNumber')), + 'subtitles': subtitles, + }, info) + + return info + + def _get_old_info(self, video_id): metadata = self._download_json( 'http://e.omroep.nl/metadata/%s' % video_id, video_id, @@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE): # JSON else: video_url = stream_info.get('url') - if not video_url or video_url in urls: + if not video_url or 'vodnotavailable.' in video_url or video_url in urls: continue urls.add(video_url) if determine_ext(video_url) == 'm3u8': From 333f617b1207cb53efaa5e2f7af174cfa87deee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 01:02:36 +0700 Subject: [PATCH 0395/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index f9dd7928f..f717f99a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Core ++ [downloader/external] Add support for rate limit and retries for wget +* [downloader/external] Fix infinite retries for curl (#19303) + +Extractors +* [npo] Fix extraction (#20084) +* [francetv:site] Extend video id regex (#20029, #20071) ++ [periscope] Extract width and height (#20015) +* [servus] Fix extraction (#19297) +* [bbccouk] Make subtitles non fatal (#19651) +* [metacafe] Fix family filter bypass (#19287) + + version 2019.02.18 Extractors From 04c33bdfb3cd73e71bf0788f02998cab30cf1da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 01:03:51 +0700 Subject: [PATCH 0396/1201] release 2019.03.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ff626883d..71a500f04 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.02.18 +[debug] youtube-dl version 2019.03.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f717f99a8..018a30641 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.03.01 Core + [downloader/external] Add support for rate limit and retries for wget diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea1d5a4a5..42ba37f15 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.18' +__version__ = '2019.03.01' From 06242d44fe261999e2424d9ecb00f20ff30ccb9b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 08:14:34 +0100 Subject: [PATCH 0397/1201] [vimeo] add support for Vimeo Pro portfolio protected videos(closes #20070) --- youtube_dl/extractor/vimeo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6215b3258..6f32ea6f1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -502,7 +502,11 @@ class VimeoIE(VimeoBaseInfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') orig_url = url - if mobj.group('pro') or mobj.group('player'): + if mobj.group('pro'): + # some videos require portfolio_id to be present in player url + # https://github.com/rg3/youtube-dl/issues/20070 + url = self._extract_url(url, self._download_webpage(url, video_id)) + elif mobj.group('player'): url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id From c5b02efe20cff1612104fd731c7f02cbbce4f5f3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 15:08:11 +0100 Subject: [PATCH 0398/1201] [sixplay] handle videos with empty assets(closes #20016) --- youtube_dl/extractor/sixplay.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 0c4f865ef..35bc9fa50 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -61,7 +61,8 @@ class SixPlayIE(InfoExtractor): quality_key = qualities(['lq', 'sd', 'hq', 'hd']) formats = [] subtitles = {} - for asset in clip_data['assets']: + assets = clip_data.get('assets') or [] + for asset in assets: asset_url = asset.get('full_physical_path') protocol = asset.get('protocol') if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls: From 398e1e21d6cbf6eb1e8e7e84de4fad30b7d59613 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 15:34:05 +0100 Subject: [PATCH 0399/1201] [espn] extend _VALID_URL regex(closes #20013) --- youtube_dl/extractor/espn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 127c69b2e..8cc9bd165 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -29,7 +29,8 @@ class ESPNIE(OnceIE): (?: .*?\?.*?\bid=| /_/id/ - ) + )| + [^/]+/video/ ) )| (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ @@ -94,6 +95,9 @@ class ESPNIE(OnceIE): }, { 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', + 'only_matching': True, }] def _real_extract(self, url): From dca0e0040ae97b2fc0cd54d5e819a5a278937350 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Mar 2019 08:01:42 +0100 Subject: [PATCH 0400/1201] Revert "use older login method(closes #11572)" This reverts commit cc6a960e134614f8af2a42dcd8bf146d63638a3c. --- youtube_dl/extractor/crunchyroll.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 5e2cbe41d..ce2e2d3ba 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -56,17 +56,6 @@ class CrunchyrollBaseIE(InfoExtractor): if username is None: return - self._download_webpage( - 'https://www.crunchyroll.com/?a=formhandler', - None, 'Logging in', 'Wrong login info', - data=urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'next_url': 'https://www.crunchyroll.com/acct/membership', - 'name': username, - 'password': password, - })) - - ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -110,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - ''' def _real_initialize(self): self._login() From a8f83f0c56e81b871a46c18fa9ebc6643370fa48 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Mar 2019 08:25:47 +0100 Subject: [PATCH 0401/1201] [crunchyroll] fix is_logged check --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ce2e2d3ba..fd1e7afad 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -60,7 +60,7 @@ class CrunchyrollBaseIE(InfoExtractor): self._LOGIN_URL, None, 'Downloading login page') def is_logged(webpage): - return '<title>Redirecting' in webpage + return 'href="/logout"' in webpage # Already logged in if is_logged(login_page): From 7465e0aee2301c3e86fe38d6e0ef5ad01c16ec79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Mar 2019 06:25:45 +0700 Subject: [PATCH 0402/1201] [spankbang] Fix extraction (closes #20023) --- youtube_dl/extractor/spankbang.py | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index fbe6ef31a..f11d728ca 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -9,6 +9,8 @@ from ..utils import ( parse_duration, parse_resolution, str_to_int, + url_or_none, + urlencode_postdata, ) @@ -64,16 +66,49 @@ class SpankBangIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) formats = [] - for mobj in re.finditer( - r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', - webpage): - format_id, format_url = mobj.group('id', 'url') + + def extract_format(format_id, format_url): + f_url = url_or_none(format_url) + if not f_url: + return f = parse_resolution(format_id) f.update({ - 'url': format_url, + 'url': f_url, 'format_id': format_id, }) formats.append(f) + + STREAM_URL_PREFIX = 'stream_url_' + + for mobj in re.finditer( + r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' + % STREAM_URL_PREFIX, webpage): + extract_format(mobj.group('id', 'url')) + + if not formats: + stream_key = self._search_regex( + r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + webpage, 'stream key', group='value') + + sb_csrf_session = self._get_cookies( + 'https://spankbang.com')['sb_csrf_session'].value + + stream = self._download_json( + 'https://spankbang.com/api/videos/stream', video_id, + 'Downloading stream JSON', data=urlencode_postdata({ + 'id': stream_key, + 'data': 0, + 'sb_csrf_session': sb_csrf_session, + }), headers={ + 'Referer': url, + 'X-CSRFToken': sb_csrf_session, + }) + + for format_id, format_url in stream.items(): + if format_id.startswith(STREAM_URL_PREFIX): + extract_format( + format_id[len(STREAM_URL_PREFIX):], format_url) + self._sort_formats(formats) title = self._html_search_regex( From 7aeb788e564d397face83b580362189753edd9dd Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@me.com> Date: Sun, 3 Mar 2019 02:16:48 +0100 Subject: [PATCH 0403/1201] [travis] Remove sudo: false Travis now recommends removing `sudo: false` from configuration: https://blog.travis-ci.com/2018-11-19-required-linux-infrastructure-migration. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 79287ccf6..82e81d078 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ python: - "3.6" - "pypy" - "pypy3" -sudo: false env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download From 8ae113ca9df0abd790e3391cd529bac42fce304f Mon Sep 17 00:00:00 2001 From: dimqua <dimqua@users.noreply.github.com> Date: Sun, 3 Mar 2019 04:19:36 +0300 Subject: [PATCH 0404/1201] [youtube] Add more invidious instances See [Invidious-Instances](https://github.com/omarroth/invidious/wiki/Invidious-Instances) for the reference. --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c8bf98b58..457e2acea 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -352,6 +352,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| (?:www\.)?invidio\.us/| + (?:www\.)?invidious\.snopyta\.org/| + (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: From 0a5baf9c210df9f492ae48dd8fdae90561c971bd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Mar 2019 06:18:15 +0100 Subject: [PATCH 0405/1201] [libsyn] improve extraction(closes #20229) --- youtube_dl/extractor/libsyn.py | 64 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index f7311f483..2cf444258 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_duration, + strip_or_none, unified_strdate, ) @@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor): 'id': '6385796', 'ext': 'mp3', 'title': "Champion Minded - Developing a Growth Mindset", - 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', + # description fetched using another request: + # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 + # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', 'upload_date': '20180320', 'thumbnail': 're:^https?://.*', }, @@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor): }] def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - url = m.group('mainurl') + url, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, video_id) - podcast_title = self._search_regex( - r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) - if podcast_title: - podcast_title = podcast_title.strip() - episode_title = self._search_regex( - r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title') - if episode_title: - episode_title = episode_title.strip() + data = self._parse_json(self._search_regex( + r'var\s+playlistItem\s*=\s*({.+?});', + webpage, 'JSON data block'), video_id) + + episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage) + if not episode_title: + self._search_regex( + [r'data-title="([^"]+)"', r'<title>(.+?)'], + webpage, 'episode title') + episode_title = episode_title.strip() + + podcast_title = strip_or_none(clean_html(self._search_regex( + r'

([^<]+)

', webpage, 'podcast title', + default=None) or get_element_by_class('podcast-title', webpage))) title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title + formats = [] + for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')): + f_url = data.get(k) + if not f_url: + continue + formats.append({ + 'url': f_url, + 'format_id': format_id, + }) + description = self._html_search_regex( r'(.+?)

', webpage, 'description', default=None) @@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor): # Strip non-breaking and normal spaces description = description.replace('\u00A0', ' ').strip() release_date = unified_strdate(self._search_regex( - r'
Released: ([^<]+)<', webpage, 'release date', fatal=False)) - - data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') - data = json.loads(data_json) - - formats = [{ - 'url': data['media_url'], - 'format_id': 'main', - }, { - 'url': data['media_url_libsyn'], - 'format_id': 'libsyn', - }] - thumbnail = data.get('thumbnail_url') - duration = parse_duration(data.get('duration')) + r'
Released: ([^<]+)<', + webpage, 'release date', default=None) or data.get('release_date')) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': data.get('thumbnail_url'), 'upload_date': release_date, - 'duration': duration, + 'duration': parse_duration(data.get('duration')), 'formats': formats, } From e7e62441cdde6dca6211c073be73677f195a0dff Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 3 Mar 2019 13:23:59 +0100 Subject: [PATCH 0406/1201] [utils] strip #HttpOnly_ prefix from cookies files (#20219) --- test/test_YoutubeDLCookieJar.py | 10 ++++++++++ test/testdata/cookies/httponly_cookies.txt | 6 ++++++ youtube_dl/utils.py | 18 +++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 test/testdata/cookies/httponly_cookies.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 6a8243590..f959798de 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -29,6 +29,16 @@ class TestYoutubeDLCookieJar(unittest.TestCase): tf.close() os.remove(tf.name) + def test_strip_httponly_prefix(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt new file mode 100644 index 000000000..c46541d6b --- /dev/null +++ b/test/testdata/cookies/httponly_cookies.txt @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE +www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5a0bb4b0..a71eda85d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1141,6 +1141,8 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + _HTTPONLY_PREFIX = '#HttpOnly_' + def save(self, filename=None, ignore_discard=False, ignore_expires=False): # Store session cookies with `expires` set to 0 instead of an empty # string @@ -1150,7 +1152,21 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) def load(self, filename=None, ignore_discard=False, ignore_expires=False): - compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires) + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + + cf = io.StringIO() + with open(filename) as f: + for line in f: + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + cf.write(compat_str(line)) + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to # an empty string or 0. MozillaCookieJar only recognizes the former # (see [1]). So we need force the latter to be recognized as session From 39c780fdec2c62135f37e3565efedf7dcad605ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 00:37:39 +0700 Subject: [PATCH 0407/1201] [extractor/common] Return MPD manifest as format's url meta field (#20242) For symmetry with other segmented media --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c3b0586a0..1fa8048b8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2120,7 +2120,7 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': base_url, + 'url': mpd_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), From c790e93ab5db5f318fb094b8a45f9160cdf4bd9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 00:39:15 +0700 Subject: [PATCH 0408/1201] [extractor/common] Clarify url and manifest_url meta fields --- youtube_dl/extractor/common.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1fa8048b8..641e50f3c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -102,10 +102,20 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url The mandatory URL representing the media: + for plain file media - HTTP URL of this file, + for RTMP - RTMP URL, + for HLS - URL of the M3U8 media playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of - fragmented media (DASH, hls, hds) + fragmented media: + for HLS - URL of the M3U8 master playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). From 5dcd630dca9b75ec2ca920ae7799252e0e0bb599 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Mar 2019 22:26:32 +0100 Subject: [PATCH 0409/1201] [paramountnetwork] fix mgid extraction(closes #20241) --- youtube_dl/extractor/spike.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 6090e0066..21b93a5b3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,8 +46,12 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): _GEO_COUNTRIES = ['US'] def _extract_mgid(self, webpage): - cs = self._parse_json(self._search_regex( + root_data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+})', - webpage, 'data'), None)['children'] - c = next(c for c in cs if c.get('type') == 'VideoPlayer') + webpage, 'data'), None) + + def find_sub_data(data, data_type): + return next(c for c in data['children'] if c.get('type') == data_type) + + c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer') return c['props']['media']['video']['config']['uri'] From d9eb580a796ef6c9a248fdd8896ccf85349c35eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:45:40 +0700 Subject: [PATCH 0410/1201] [extractor/common] Do not fail on invalid data while parsing F4M manifest in non fatal mode --- youtube_dl/extractor/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 641e50f3c..55ce1a888 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,6 +13,7 @@ import socket import sys import time import math +import xml from ..compat import ( compat_cookiejar, @@ -1464,6 +1465,9 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: + return [] + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: From c17eb5b4b06cfa2c8bffb378b0a5c84d4c5a6834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:54:25 +0700 Subject: [PATCH 0411/1201] [rai] Improve extraction (closes #20253) --- youtube_dl/extractor/rai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 149153b8f..207a6c247 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -74,11 +74,11 @@ class RaiBaseIE(InfoExtractor): if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): continue - if ext == 'm3u8': + if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif ext == 'f4m': + elif ext == 'f4m' or platform == 'flash': manifest_url = update_url_query( media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) From bb6f112d9d57d7c6260de132cad604c1c05bc5a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:57:39 +0700 Subject: [PATCH 0412/1201] [npo] Improve ISM extraction --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 857845d35..ad62f8ec6 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -238,7 +238,7 @@ class NPOIE(NPOBaseIE): formats.extend(self._extract_m3u8_formats( stream_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - elif '.ism/Manifest' in stream_url: + elif re.search(r'\.isml?/Manifest', stream_url): formats.extend(self._extract_ism_formats( stream_url, video_id, ism_id='mss', fatal=False)) else: From e5ada4f3ad771d4cf3f533efb2597a3f1618ce75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 00:33:08 +0700 Subject: [PATCH 0413/1201] [extractor/common] Fallback url to base URL for DASH formats --- youtube_dl/extractor/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 55ce1a888..a17f7cbc4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,7 +108,10 @@ class InfoExtractor(object): for RTMP - RTMP URL, for HLS - URL of the M3U8 media playlist, for HDS - URL of the F4M manifest, - for DASH - URL of the MPD manifest, + for DASH - URL of the MPD manifest or + base URL representing the media + if MPD manifest is parsed from + a string, for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of @@ -2134,7 +2137,8 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': mpd_url, + # NB: mpd_url may be empty when MPD manifest is parsed from a string + 'url': mpd_url or base_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), From 399f76870d7dc72631e7da1f54a46ed8a039c838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:18:52 +0700 Subject: [PATCH 0414/1201] [compat] Introduce compat_etree_Element --- test/test_compat.py | 7 +++++++ youtube_dl/compat.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index 51fe6aa0b..4822260ac 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_getenv, compat_setenv, + compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_shlex_split, @@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) + def test_compat_etree_Element(self): + try: + compat_etree_Element.text + except AttributeError: + self.fail('compat_etree_Element is not a type') + def test_compat_etree_fromstring(self): xml = ''' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7b770340f..b2fe62f12 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2508,6 +2508,15 @@ class _TreeBuilder(etree.TreeBuilder): pass +try: + # xml.etree.ElementTree.Element is a method in Python <=2.6 and + # the following will crash with: + # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types + isinstance(None, xml.etree.ElementTree.Element) + from xml.etree.ElementTree import Element as compat_etree_Element +except TypeError: # Python <=2.6 + from xml.etree.ElementTree import _ElementInterface as compat_etree_Element + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2969,6 +2978,7 @@ __all__ = [ 'compat_cookiejar', 'compat_cookies', 'compat_ctypes_WINFUNCTYPE', + 'compat_etree_Element', 'compat_etree_fromstring', 'compat_etree_register_namespace', 'compat_expanduser', From ee0ba927aac067dec533a618540e43ed3deebaba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:21:57 +0700 Subject: [PATCH 0415/1201] Use compat_etree_Element --- youtube_dl/extractor/bbc.py | 4 ++-- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/extractor/crunchyroll.py | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 13340ec64..d479d2577 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import itertools import re -import xml from .common import InfoExtractor from ..utils import ( @@ -23,6 +22,7 @@ from ..utils import ( urljoin, ) from ..compat import ( + compat_etree_Element, compat_HTTPError, compat_urlparse, ) @@ -317,7 +317,7 @@ class BBCCoUkIE(InfoExtractor): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, xml.etree.ElementTree.Element): + if not isinstance(captions, compat_etree_Element): continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a17f7cbc4..4839edbf7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,11 +13,11 @@ import socket import sys import time import math -import xml from ..compat import ( compat_cookiejar, compat_cookies, + compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, @@ -802,7 +802,7 @@ class InfoExtractor(object): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). + Return a tuple (xml as an compat_etree_Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -823,7 +823,7 @@ class InfoExtractor(object): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an xml.etree.ElementTree.Element. + Return the xml as an compat_etree_Element. See _download_webpage docstring for arguments specification. """ @@ -1468,7 +1468,7 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: + if not isinstance(manifest, compat_etree_Element) and not fatal: return [] # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index fd1e7afad..5948154f8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re import json -import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -12,6 +11,7 @@ from .common import InfoExtractor from .vrv import VRVIE from ..compat import ( compat_b64decode, + compat_etree_Element, compat_etree_fromstring, compat_urllib_parse_urlencode, compat_urllib_request, @@ -390,7 +390,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, etree.Element): + if not isinstance(sub_doc, compat_etree_Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -507,7 +507,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, etree.Element): + if isinstance(streamdata, compat_etree_Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, etree.Element): + if isinstance(stream_info, compat_etree_Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -593,7 +593,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text season = episode = episode_number = duration = thumbnail = None - if isinstance(metadata, etree.Element): + if isinstance(metadata, compat_etree_Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) From a551768acfd177e425f518c43a2992a50a2ff69f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:27:22 +0700 Subject: [PATCH 0416/1201] [facebook] Improve uploader extraction (closes #20250) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 74954049d..789dd79d5 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -424,7 +424,7 @@ class FacebookIE(InfoExtractor): uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False) or self._og_search_title(webpage, fatal=False) + default=None) or self._og_search_title(webpage, fatal=False) timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) From 97157c692c94e3853a6ad1b8a220f064815b6957 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Tue, 5 Mar 2019 14:34:34 -0400 Subject: [PATCH 0417/1201] [openload] Add support for oload.space --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c1dcbb7eb..bae7c7ee7 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space) ) )/ (?:f|embed)/ @@ -349,6 +349,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.live/f/-Z58UZ-GR4M', 'only_matching': True, + }, { + 'url': 'https://oload.space/f/IY4eZSst3u8/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From d347b52b63282b3276815fd03fc63a1bc8b82cf5 Mon Sep 17 00:00:00 2001 From: 0x9fff00 <0x9fff00+git@protonmail.ch> Date: Tue, 5 Mar 2019 20:11:32 +0100 Subject: [PATCH 0418/1201] [urplay] Extract timestamp (#20235) --- youtube_dl/extractor/urplay.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py index 8e6fd4731..6030b7cb5 100644 --- a/youtube_dl/extractor/urplay.py +++ b/youtube_dl/extractor/urplay.py @@ -2,18 +2,31 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import unified_timestamp class URPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', - 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb', + 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', + 'md5': 'ff5b0c89928f8083c74bbd5099c9292d', + 'info_dict': { + 'id': '203704', + 'ext': 'mp4', + 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', + 'timestamp': 1513512768, + 'upload_date': '20171217', + }, + }, { + 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'info_dict': { 'id': '190031', 'ext': 'mp4', 'title': 'Tripp, Trapp, Träd : Sovkudde', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', + 'timestamp': 1440093600, + 'upload_date': '20150820', }, }, { 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', @@ -51,6 +64,7 @@ class URPlayIE(InfoExtractor): 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), 'thumbnail': urplayer_data.get('image'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), 'series': urplayer_data.get('series_title'), 'subtitles': subtitles, 'formats': formats, From fca9baf0da9720bac25d160924204395930191fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 02:45:33 +0700 Subject: [PATCH 0419/1201] [test] Fix test_compat_etree_Element --- test/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index 4822260ac..86ff389fd 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -93,7 +93,7 @@ class TestCompat(unittest.TestCase): def test_compat_etree_Element(self): try: - compat_etree_Element.text + compat_etree_Element.items except AttributeError: self.fail('compat_etree_Element is not a type') From 829685b88a0c7610a874b980bc25b308c4f34590 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 6 Mar 2019 09:20:27 +0100 Subject: [PATCH 0420/1201] [toutv] fix authentication(closes #20261) --- youtube_dl/extractor/toutv.py | 53 +++++++++-------------------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index f1ab91cf2..124ca064c 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,14 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import json from .radiocanada import RadioCanadaIE from ..utils import ( - extract_attributes, int_or_none, merge_dicts, - urlencode_postdata, ) @@ -38,47 +36,24 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] + _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' def _real_initialize(self): email, password = self._get_login_info() if email is None: return - login_webpage = self._download_webpage( - 'https://services.radio-canada.ca/auth/oauth/v2/authorize', - None, 'Downloading login page', query={ - 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', - 'redirect_uri': 'https://ici.tou.tv/logincallback', - 'response_type': 'token', - 'scope': 'id.write media-validation.read', - 'state': '/', - }) - - def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): - form, form_elem = re.search( - r'(?s)((]+?%s[^>]*?>).+?)' % form_spec_re, wp).groups() - form_data = self._hidden_inputs(form) - form_url = extract_attributes(form_elem).get('action') or default_form_url - return form_url, form_data - - post_url, form_data = extract_form_url_and_data( - login_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login', - r'(?:id|name)="Form-login"') - form_data.update({ - 'login-email': email, - 'login-password': password, - }) - consent_webpage = self._download_webpage( - post_url, None, 'Logging in', data=urlencode_postdata(form_data)) - post_url, form_data = extract_form_url_and_data( - consent_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent') - _, urlh = self._download_webpage_handle( - post_url, None, 'Following Redirection', - data=urlencode_postdata(form_data)) - self._access_token = self._search_regex( - r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', - urlh.geturl(), 'access token') + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): From 7b6e76087080eac54e14cdead4e3bc0225c654b5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 6 Mar 2019 09:28:14 +0100 Subject: [PATCH 0421/1201] [toutv] detect invalid login error --- youtube_dl/extractor/toutv.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 124ca064c..25e1fd46d 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -4,7 +4,9 @@ from __future__ import unicode_literals import json from .radiocanada import RadioCanadaIE +from ..compat import compat_HTTPError from ..utils import ( + ExtractorError, int_or_none, merge_dicts, ) @@ -42,18 +44,24 @@ class TouTvIE(RadioCanadaIE): email, password = self._get_login_info() if email is None: return - self._access_token = self._download_json( - 'https://services.radio-canada.ca/toutv/profiling/accounts/login', - None, 'Logging in', data=json.dumps({ - 'ClientId': self._CLIENT_KEY, - 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', - 'Email': email, - 'Password': password, - 'Scope': 'id.write media-validation.read', - }).encode(), headers={ - 'Authorization': 'client-key ' + self._CLIENT_KEY, - 'Content-Type': 'application/json;charset=utf-8', - })['access_token'] + try: + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['Message'] + raise ExtractorError(error, expected=True) + raise self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): From 9d74ea6d36696396392974e94a40dce1e5a881a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Mar 2019 23:26:59 +0700 Subject: [PATCH 0422/1201] [francetv:site] Relax video id regex and update test (closes #20268) --- youtube_dl/extractor/francetv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3c4ef08a8..6101fb6bd 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': '162311093', + 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', @@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: From bba35695eb4ab9cc70624583375ba3d15b4e6cc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 02:52:08 +0700 Subject: [PATCH 0423/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ChangeLog b/ChangeLog index 018a30641..272191a01 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +version + +Core +* [extractor/common] Use compat_etree_Element ++ [compat] Introduce compat_etree_Element +* [extractor/common] Fallback url to base URL for DASH formats +* [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode +* [extractor/common] Return MPD manifest as format's url meta field (#20242) +* [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + +Extractors +* [francetv:site] Relax video id regular expression (#20268) +* [toutv] Detect invalid login error +* [toutv] Fix authentication (#20261) ++ [urplay] Extract timestamp (#20235) ++ [openload] Add support for oload.space (#20246) +* [facebook] Improve uploader extraction (#20250) +* [bbc] Use compat_etree_Element +* [crunchyroll] Use compat_etree_Element +* [npo] Improve ISM extraction +* [rai] Improve extraction (#20253) +* [paramountnetwork] Fix mgid extraction (#20241) +* [libsyn] Improve extraction (#20229) ++ [youtube] Add more invidious instances to URL regular expression (#20228) +* [spankbang] Fix extraction (#20023) +* [espn] Extend URL regular expression (#20013) +* [sixplay] Handle videos with empty assets (#20016) ++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + + version 2019.03.01 Core From 10734553feef497e2810a23bbe62a0b3d630e78d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 02:53:18 +0700 Subject: [PATCH 0424/1201] release 2019.03.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 71a500f04..5f97e2cbe 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.03.01 +[debug] youtube-dl version 2019.03.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 272191a01..eda94ad33 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.03.09 Core * [extractor/common] Use compat_etree_Element diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 42ba37f15..f72fee57f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.01' +__version__ = '2019.03.09' From 0d08bcdb70008f0d500afbd19059b3c0971a4776 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Mar 2019 09:37:28 +0100 Subject: [PATCH 0425/1201] [fox] detect geo restriction and authentication errors(#20208) --- youtube_dl/extractor/fox.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 0ffceeb7c..f30d3cba8 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -6,10 +6,12 @@ import uuid from .adobepass import AdobePassIE from ..compat import ( + compat_HTTPError, compat_str, compat_urllib_parse_unquote, ) from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_duration, @@ -48,6 +50,7 @@ class FOXIE(AdobePassIE): 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, }] + _GEO_BYPASS = False _HOME_PAGE_URL = 'https://www.fox.com/' _API_KEY = 'abdcbed02c124d393b39e818a4312055' _access_token = None @@ -58,9 +61,22 @@ class FOXIE(AdobePassIE): } if self._access_token: headers['Authorization'] = 'Bearer ' + self._access_token - return self._download_json( - 'https://api2.fox.com/v2.0/' + path, - video_id, data=data, headers=headers) + try: + return self._download_json( + 'https://api2.fox.com/v2.0/' + path, + video_id, data=data, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + entitlement_issues = self._parse_json( + e.cause.read().decode(), video_id)['entitlementIssues'] + for e in entitlement_issues: + if e.get('errorCode') == 1005: + raise ExtractorError( + 'This video is only available via cable service provider ' + 'subscription. You may want to use --cookies.', expected=True) + messages = ', '.join([e['message'] for e in entitlement_issues]) + raise ExtractorError(messages, expected=True) + raise def _real_initialize(self): if not self._access_token: @@ -81,7 +97,15 @@ class FOXIE(AdobePassIE): title = video['name'] release_url = video['url'] - m3u8_url = self._download_json(release_url, video_id)['playURL'] + try: + m3u8_url = self._download_json(release_url, video_id)['playURL'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.read().decode(), video_id) + if error.get('exception') == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=['US']) + raise ExtractorError(error['description'], expected=True) + raise formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') From 276550371313dbfe7d94ceb294bd1284c1e7c404 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Mar 2019 15:03:32 +0100 Subject: [PATCH 0426/1201] [vimeo:review] improve config url extraction and extract original format(closes #20305) --- youtube_dl/extractor/vimeo.py | 64 +++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6f32ea6f1..e3ec550f0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -195,6 +195,32 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'subtitles': subtitles, } + def _extract_original_format(self, url, video_id): + download_data = self._download_json( + url, video_id, fatal=False, + query={'action': 'load_download_config'}, + headers={'X-Requested-With': 'XMLHttpRequest'}) + if download_data: + source_file = download_data.get('source_file') + if isinstance(source_file, dict): + download_url = source_file.get('download_url') + if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): + source_name = source_file.get('public_name', 'Original') + if self._is_valid_url(download_url, video_id, '%s video' % source_name): + ext = (try_get( + source_file, lambda x: x['extension'], + compat_str) or determine_ext( + download_url, None) or 'mp4').lower() + return { + 'url': download_url, + 'ext': ext, + 'width': int_or_none(source_file.get('width')), + 'height': int_or_none(source_file.get('height')), + 'filesize': parse_filesize(source_file.get('size')), + 'format_id': source_name, + 'preference': 1, + } + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -659,29 +685,11 @@ class VimeoIE(VimeoBaseInfoExtractor): comment_count = None formats = [] - download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest'}) - download_data = self._download_json(download_request, video_id, fatal=False) - if download_data: - source_file = download_data.get('source_file') - if isinstance(source_file, dict): - download_url = source_file.get('download_url') - if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): - source_name = source_file.get('public_name', 'Original') - if self._is_valid_url(download_url, video_id, '%s video' % source_name): - ext = (try_get( - source_file, lambda x: x['extension'], - compat_str) or determine_ext( - download_url, None) or 'mp4').lower() - formats.append({ - 'url': download_url, - 'ext': ext, - 'width': int_or_none(source_file.get('width')), - 'height': int_or_none(source_file.get('height')), - 'filesize': parse_filesize(source_file.get('size')), - 'format_id': source_name, - 'preference': 1, - }) + + source_format = self._extract_original_format( + 'https://vimeo.com/' + video_id, video_id) + if source_format: + formats.append(source_format) info_dict_config = self._parse_config(config, video_id) formats.extend(info_dict_config['formats']) @@ -940,7 +948,7 @@ class VimeoGroupsIE(VimeoAlbumIE): class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P[^/]+)' + _VALID_URL = r'(?Phttps://vimeo\.com/[^/]+/review/(?P[^/]+)/[0-9a-f]{10})' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'md5': 'c507a72f780cacc12b2248bb4006d253', @@ -992,7 +1000,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): data = self._parse_json(self._search_regex( r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config = data.get('vimeo_esi', {}).get('config', {}) + config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl']) if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( @@ -1000,10 +1009,13 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): return config_url def _real_extract(self, url): - video_id = self._match_id(url) + page_url, video_id = re.match(self._VALID_URL, url).groups() config_url = self._get_config_url(url, video_id) config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) + source_format = self._extract_original_format(page_url, video_id) + if source_format: + info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) info_dict['id'] = video_id return info_dict From 067aa17edf5a46a8cbc4d6b90864eddf051fa2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 19:14:41 +0700 Subject: [PATCH 0427/1201] Start moving to ytdl-org --- .github/ISSUE_TEMPLATE.md | 10 +++--- .github/ISSUE_TEMPLATE_tmpl.md | 10 +++--- .github/PULL_REQUEST_TEMPLATE.md | 4 +-- CONTRIBUTING.md | 20 +++++------ README.md | 42 +++++++++++------------ devscripts/buildserver.py | 2 +- devscripts/create-github-release.py | 4 +-- devscripts/gh-pages/update-feed.py | 4 +-- devscripts/release.sh | 2 +- devscripts/show-downloads-statistics.py | 2 +- setup.py | 2 +- test/test_InfoExtractor.py | 16 ++++----- test/test_YoutubeDL.py | 6 ++-- test/test_all_urls.py | 6 ++-- youtube-dl.plugin.zsh | 2 +- youtube_dl/YoutubeDL.py | 16 ++++----- youtube_dl/__init__.py | 2 +- youtube_dl/compat.py | 6 ++-- youtube_dl/downloader/external.py | 4 +-- youtube_dl/downloader/f4m.py | 6 ++-- youtube_dl/downloader/hls.py | 4 +-- youtube_dl/downloader/http.py | 2 +- youtube_dl/extractor/arkena.py | 2 +- youtube_dl/extractor/bambuser.py | 4 +-- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/brightcove.py | 6 ++-- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/common.py | 20 +++++------ youtube_dl/extractor/commonmistakes.py | 2 +- youtube_dl/extractor/crunchyroll.py | 4 +-- youtube_dl/extractor/dailymotion.py | 4 +-- youtube_dl/extractor/dreisat.py | 2 +- youtube_dl/extractor/francetv.py | 2 +- youtube_dl/extractor/generic.py | 10 +++--- youtube_dl/extractor/googledrive.py | 2 +- youtube_dl/extractor/kuwo.py | 2 +- youtube_dl/extractor/liveleak.py | 6 ++-- youtube_dl/extractor/msn.py | 2 +- youtube_dl/extractor/nhl.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/once.py | 2 +- youtube_dl/extractor/pbs.py | 6 ++-- youtube_dl/extractor/pluralsight.py | 6 ++-- youtube_dl/extractor/pornhub.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 4 +-- youtube_dl/extractor/radiocanada.py | 2 +- youtube_dl/extractor/rtlnl.py | 2 +- youtube_dl/extractor/theplatform.py | 4 +-- youtube_dl/extractor/toutv.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 2 +- youtube_dl/extractor/veehd.py | 2 +- youtube_dl/extractor/vevo.py | 2 +- youtube_dl/extractor/vimeo.py | 4 +-- youtube_dl/extractor/vk.py | 2 +- youtube_dl/extractor/vlive.py | 2 +- youtube_dl/extractor/yandexmusic.py | 4 +-- youtube_dl/extractor/youtube.py | 44 ++++++++++++------------- youtube_dl/options.py | 2 +- youtube_dl/update.py | 2 +- youtube_dl/utils.py | 18 +++++----- 61 files changed, 182 insertions(+), 182 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5f97e2cbe..911e912a4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,12 +6,12 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones +- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser ### What is the purpose of your *issue*? @@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. --- ### Description of your *issue*, suggested solution and other information -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 8edbd5a0f..8b7e73417 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -6,12 +6,12 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones +- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser ### What is the purpose of your *issue*? @@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. --- ### Description of your *issue*, suggested solution and other information -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ba4ca7553..e69b907d8 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,8 +7,8 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6c1739860..cd9ccbe96 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,11 +42,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? @@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -98,7 +98,7 @@ If you want to add support for a new site, first of all **make sure** this site After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): -1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) +1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) 2. Check out the source code with: git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git @@ -150,9 +150,9 @@ After you have ensured this site is distributing its content legally, you can fo # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 youtube_dl/extractor/yourextractor.py @@ -177,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: - `id` (media identifier) - `title` (media title) @@ -185,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. -[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example @@ -341,7 +341,7 @@ Incorrect: ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -349,7 +349,7 @@ Use `try_get` for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. +Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/README.md b/README.md index c1572f771..e476045b2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl) +[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl) youtube-dl - download videos from youtube.com or other video platforms @@ -43,7 +43,7 @@ Or with [MacPorts](https://www.macports.org/): sudo port install youtube-dl -Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). +Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html). # DESCRIPTION **youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. @@ -685,7 +685,7 @@ You can merge the video and audio of two formats into a single file using `-f . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). +Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` @@ -1342,11 +1342,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 1344b4d87..4a4295ba9 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -322,7 +322,7 @@ class GITBuilder(GITInfoBuilder): class YoutubeDLBuilder(object): - authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org'] def __init__(self, **kwargs): if self.repoName != 'youtube-dl': diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 30716ad8e..428111b3f 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -27,8 +27,8 @@ from youtube_dl.utils import ( class GitHubReleaser(object): - _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' - _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s' _NETRC_MACHINE = 'github.com' def __init__(self, debuglevel=0): diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index e93eb60fb..506a62377 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -10,7 +10,7 @@ import textwrap atom_template = textwrap.dedent("""\ - + youtube-dl releases https://yt-dl.org/feed/youtube-dl-updates-feed @TIMESTAMP@ @@ -21,7 +21,7 @@ entry_template = textwrap.dedent(""" https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@ New version @VERSION@ - +
Downloads available at https://yt-dl.org/downloads/@VERSION@/ diff --git a/devscripts/release.sh b/devscripts/release.sh index 4db5def5d..4c413bf6d 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -96,7 +96,7 @@ git push origin "$version" REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 -wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py index e25d28411..6c8d1cc2d 100644 --- a/devscripts/show-downloads-statistics.py +++ b/devscripts/show-downloads-statistics.py @@ -24,7 +24,7 @@ total_bytes = 0 for page in itertools.count(1): releases = json.loads(compat_urllib_request.urlopen( - 'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page + 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page ).read().decode('utf-8')) if not releases: diff --git a/setup.py b/setup.py index dfb669ad2..af68b485e 100644 --- a/setup.py +++ b/setup.py @@ -104,7 +104,7 @@ setup( version=__version__, description=DESCRIPTION, long_description=LONG_DESCRIPTION, - url='https://github.com/rg3/youtube-dl', + url='https://github.com/ytdl-org/youtube-dl', author='Ricardo Garcia', author_email='ytdl@yt-dl.org', maintainer='Sergey M.', diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index f0aa8466b..da6cd39b6 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -201,7 +201,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_m3u8_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/11507 + # https://github.com/ytdl-org/youtube-dl/issues/11507 # http://pluzz.francetv.fr/videos/le_ministere.html 'pluzz_francetv_11507', 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', @@ -263,7 +263,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/11995 + # https://github.com/ytdl-org/youtube-dl/issues/11995 # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor 'teamcoco_11995', 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', @@ -337,7 +337,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/12211 + # https://github.com/ytdl-org/youtube-dl/issues/12211 # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601 'toggle_mobile_12211', 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', @@ -501,7 +501,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/18923 + # https://github.com/ytdl-org/youtube-dl/issues/18923 # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa 'ted_18923', 'http://hls.ted.com/talks/31241.m3u8', @@ -570,9 +570,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_mpd_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/13919 + # https://github.com/ytdl-org/youtube-dl/issues/13919 # Also tests duplicate representation ids, see - # https://github.com/rg3/youtube-dl/issues/15111 + # https://github.com/ytdl-org/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', [{ @@ -652,7 +652,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'height': 1080, }] ), ( - # https://github.com/rg3/youtube-dl/pull/14844 + # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', 'http://unknown/manifest.mpd', [{ @@ -748,7 +748,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_f4m_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/14660 + # https://github.com/ytdl-org/youtube-dl/issues/14660 'custom_base_url', 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', [{ diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1d7452744..ce9666171 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -411,7 +411,7 @@ class TestFormatSelection(unittest.TestCase): # For extractors with incomplete formats (all formats are audio-only or # video-only) best and worst should fallback to corresponding best/worst # video-only or audio-only formats (as per - # https://github.com/rg3/youtube-dl/pull/5556) + # https://github.com/ytdl-org/youtube-dl/pull/5556) formats = [ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, @@ -442,7 +442,7 @@ class TestFormatSelection(unittest.TestCase): self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_format_selection_issue_10083(self): - # See https://github.com/rg3/youtube-dl/issues/10083 + # See https://github.com/ytdl-org/youtube-dl/issues/10083 formats = [ {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, @@ -853,7 +853,7 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(result, [2, 3, 4]) def test_urlopen_no_file_protocol(self): - # see https://github.com/rg3/youtube-dl/issues/8227 + # see https://github.com/ytdl-org/youtube-dl/issues/8227 ydl = YDL() self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd') diff --git a/test/test_all_urls.py b/test/test_all_urls.py index cd1cd4b24..465ce0050 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -110,7 +110,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) - # https://github.com/rg3/youtube-dl/issues/1930 + # https://github.com/ytdl-org/youtube-dl/issues/1930 def test_soundcloud_not_matching_sets(self): self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) @@ -119,12 +119,12 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) def test_pbs(self): - # https://github.com/rg3/youtube-dl/issues/2350 + # https://github.com/ytdl-org/youtube-dl/issues/2350 self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) def test_yahoo_https(self): - # https://github.com/rg3/youtube-dl/issues/2701 + # https://github.com/ytdl-org/youtube-dl/issues/2701 self.assertMatch( 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh index 4edab5214..17ab1341a 100644 --- a/youtube-dl.plugin.zsh +++ b/youtube-dl.plugin.zsh @@ -7,7 +7,7 @@ # https://github.com/zsh-users/antigen # Install youtube-dl: -# antigen bundle rg3/youtube-dl +# antigen bundle ytdl-org/youtube-dl # Bundles installed by antigen are available for use immediately. # Update youtube-dl (and all other antigen bundles): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bc9fc270c..3b92acd97 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -889,7 +889,7 @@ class YoutubeDL(object): # url_transparent. In such cases outer metadata (from ie_result) # should be propagated to inner one (info). For this to happen # _type of info should be overridden with url_transparent. This - # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. + # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. if new_result.get('_type') == 'url': new_result['_type'] = 'url_transparent' @@ -1606,7 +1606,7 @@ class YoutubeDL(object): # by extractor are incomplete or not (i.e. whether extractor provides only # video-only or audio-only formats) for proper formats selection for # extractors with such incomplete formats (see - # https://github.com/rg3/youtube-dl/pull/5556). + # https://github.com/ytdl-org/youtube-dl/pull/5556). # Since formats may be filtered during format selection and may not match # the original formats the results may be incorrect. Thus original formats # or pre-calculated metrics should be passed to format selection routines @@ -1614,7 +1614,7 @@ class YoutubeDL(object): # We will pass a context object containing all necessary additional data # instead of just formats. # This fixes incorrect format selection issue (see - # https://github.com/rg3/youtube-dl/issues/10083). + # https://github.com/ytdl-org/youtube-dl/issues/10083). incomplete_formats = ( # All formats are video-only or all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or @@ -1810,7 +1810,7 @@ class YoutubeDL(object): if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters - # See https://github.com/rg3/youtube-dl/issues/10268 + # See https://github.com/ytdl-org/youtube-dl/issues/10268 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) except (OSError, IOError): @@ -2229,7 +2229,7 @@ class YoutubeDL(object): return if type('') is not compat_str: - # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) + # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326) self.report_warning( 'Your Python is broken! Update to a newer and supported version') @@ -2323,7 +2323,7 @@ class YoutubeDL(object): proxies = {'http': opts_proxy, 'https': opts_proxy} else: proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) + # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = PerRequestProxyHandler(proxies) @@ -2336,7 +2336,7 @@ class YoutubeDL(object): # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which # can be used for malicious purposes (see - # https://github.com/rg3/youtube-dl/issues/8227) + # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): @@ -2348,7 +2348,7 @@ class YoutubeDL(object): # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/rg3/youtube-dl/issues/1309 for details) + # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ba435ea42..94788d936 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -48,7 +48,7 @@ from .YoutubeDL import YoutubeDL def _real_main(argv=None): # Compatibility fixes for Windows if sys.platform == 'win32': - # https://github.com/rg3/youtube-dl/issues/820 + # https://github.com/ytdl-org/youtube-dl/issues/820 codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) workaround_optparse_bug9161() diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b2fe62f12..7992a23ca 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2364,7 +2364,7 @@ except ImportError: # Python 2 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus # implementations from cpython 3.4.3's stdlib. Python 2's version - # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244) + # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244) def compat_urllib_parse_unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" @@ -2828,7 +2828,7 @@ else: compat_socket_create_connection = socket.create_connection -# Fix https://github.com/rg3/youtube-dl/issues/4223 +# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # See http://bugs.python.org/issue9161 for what is broken def workaround_optparse_bug9161(): op = optparse.OptionParser() @@ -2953,7 +2953,7 @@ if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, # PyPy2 prior to version 5.4.0 expects byte strings as Windows function # names, see the original PyPy issue [1] and the youtube-dl one [2]. # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name - # 2. https://github.com/rg3/youtube-dl/pull/4392 + # 2. https://github.com/ytdl-org/youtube-dl/pull/4392 def compat_ctypes_WINFUNCTYPE(*args, **kwargs): real = ctypes.WINFUNCTYPE(*args, **kwargs) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 22e6093b3..5f73f7f0f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -239,7 +239,7 @@ class FFmpegFD(ExternalFD): # setting -seekable prevents ffmpeg from guessing if the server # supports seeking(by adding the header `Range: bytes=0-`), which # can cause problems in some cases - # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 + # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] @@ -334,7 +334,7 @@ class FFmpegFD(ExternalFD): # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable - # files (see https://github.com/rg3/youtube-dl/issues/8300). + # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if sys.platform != 'win32': proc.communicate(b'q') raise diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 15e71be9a..9b15a0e15 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -324,8 +324,8 @@ class F4mFD(FragmentFD): urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests - # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 - # and https://github.com/rg3/youtube-dl/issues/7823) + # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/ytdl-org/youtube-dl/issues/7823) manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) @@ -409,7 +409,7 @@ class F4mFD(FragmentFD): # In tests, segments may be truncated, and thus # FlvReader may not be able to parse the whole # chunk. If so, write the segment as is - # See https://github.com/rg3/youtube-dl/issues/9214 + # See https://github.com/ytdl-org/youtube-dl/issues/9214 dest_stream.write(down_data) break raise diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 4def8e2d5..419e73576 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -152,8 +152,8 @@ class HlsFD(FragmentFD): except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. - # See https://github.com/rg3/youtube-dl/issues/10165, - # https://github.com/rg3/youtube-dl/issues/10448). + # See https://github.com/ytdl-org/youtube-dl/issues/10165, + # https://github.com/ytdl-org/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: self.report_retry_fragment(err, frag_index, count, fragment_retries) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 5b1e96013..08670ee3c 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -111,7 +111,7 @@ class HttpFD(FileDownloader): # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see - # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) + # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') if content_range: diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py index 4495ddbb0..854f58767 100644 --- a/youtube_dl/extractor/arkena.py +++ b/youtube_dl/extractor/arkena.py @@ -103,7 +103,7 @@ class ArkenaIE(InfoExtractor): f_url, video_id, mpd_id=kind, fatal=False)) elif kind == 'silverlight': # TODO: process when ism is supported (see - # https://github.com/rg3/youtube-dl/issues/8118) + # https://github.com/ytdl-org/youtube-dl/issues/8118) continue else: tbr = float_or_none(f.get('Bitrate'), 1000) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 34f1b3d83..4400ff9c1 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -23,7 +23,7 @@ class BambuserIE(InfoExtractor): _TEST = { 'url': 'http://bambuser.com/v/4050584', - # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 + # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388 # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641', 'info_dict': { 'id': '4050584', @@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor): }, 'params': { # It doesn't respect the 'Range' header, it would download the whole video - # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 + # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59 'skip_download': True, }, } diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index d479d2577..e76507951 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -208,7 +208,7 @@ class BBCCoUkIE(InfoExtractor): }, 'skip': 'Now it\'s really geo-restricted', }, { - # compact player (https://github.com/rg3/youtube-dl/issues/8147) + # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 465ae396e..c0345e2c3 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -126,7 +126,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'playlist_mincount': 7, }, { - # playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965) + # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', 'info_dict': { 'id': '1522758701001', @@ -155,10 +155,10 @@ class BrightcoveLegacyIE(InfoExtractor): {params} """ - # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 + # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 object_str = re.sub(r'(', lambda m: m.group(1) + '/>', object_str) - # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 + # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 object_str = object_str.replace('<--', ' %s' % ( - srt_subtitles_timecode(start), - srt_subtitles_timecode(end)), - text, - os.linesep, - )) + alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) + ssa += os.linesep + 'Dialogue:Marked=0,%s,%s,Default,%s%s' % ( + self._ass_subtitles_timecode(start), + self._ass_subtitles_timecode(end), + '{\\a%d}' % alignment if alignment != 2 else '', + text.replace('\n', '\\N').replace('', '{\\i1}').replace('', '{\\i0}')) if sub_lang == 'vostf': sub_lang = 'fr' @@ -91,8 +106,8 @@ class ADNIE(InfoExtractor): 'ext': 'json', 'data': json.dumps(sub), }, { - 'ext': 'srt', - 'data': srt, + 'ext': 'ssa', + 'data': ssa, }]) return subtitles @@ -100,7 +115,15 @@ class ADNIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_config = self._parse_json(self._search_regex( - r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) + r'playerConfig\s*=\s*({.+});', webpage, + 'player config', default='{}'), video_id, fatal=False) + if not player_config: + config_url = urljoin(self._BASE_URL, self._search_regex( + r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', + webpage, 'config url')) + player_config = self._download_json( + config_url, video_id, + 'Downloading player config JSON metadata')['player'] video_info = {} video_info_str = self._search_regex( @@ -129,12 +152,15 @@ class ADNIE(InfoExtractor): encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() links_data = self._download_json( - urljoin(self._BASE_URL, links_url), video_id, headers={ + urljoin(self._BASE_URL, links_url), video_id, + 'Downloading links JSON metadata', headers={ 'Authorization': 'Bearer ' + authorization, }) links = links_data.get('links') or {} metas = metas or links_data.get('meta') or {} - sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token + sub_path = sub_path or links_data.get('subtitles') or \ + 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id + sub_path += '&token=' + token error = links_data.get('error') title = metas.get('title') or video_info['title'] @@ -142,9 +168,11 @@ class ADNIE(InfoExtractor): for format_id, qualities in links.items(): if not isinstance(qualities, dict): continue - for load_balancer_url in qualities.values(): + for quality, load_balancer_url in qualities.items(): load_balancer_data = self._download_json( - load_balancer_url, video_id, fatal=False) or {} + load_balancer_url, video_id, + 'Downloading %s %s JSON metadata' % (format_id, quality), + fatal=False) or {} m3u8_url = load_balancer_data.get('location') if not m3u8_url: continue From 2bbde1d09afb2225fb7bd245bcf77a0715a58f29 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Apr 2019 17:59:20 +0100 Subject: [PATCH 0483/1201] [adn] fix subtitle compatibility with ffmpeg --- youtube_dl/extractor/adn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 2eb4d1dc7..1e04a55a6 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -81,10 +81,10 @@ class ADNIE(InfoExtractor): ssa = '''[Script Info] ScriptType:V4.00 [V4 Styles] -Format:Name,Fontname,Fontsize,PrimaryColour,Bold,BorderStyle,Outline,Alignment,MarginL,MarginR,MarginV -Style:Default,Arial,18,16777215,-1,1,1,2,20,20,20 +Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding +Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0 [Events] -Format:Marked,Start,End,Style,Text''' +Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for current in sub: start, end, text, line_align, position_align = ( float_or_none(current.get('startTime')), @@ -94,7 +94,7 @@ Format:Marked,Start,End,Style,Text''' if start is None or end is None or text is None: continue alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) - ssa += os.linesep + 'Dialogue:Marked=0,%s,%s,Default,%s%s' % ( + ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( self._ass_subtitles_timecode(start), self._ass_subtitles_timecode(end), '{\\a%d}' % alignment if alignment != 2 else '', From 69e6efac1669da68c0746419657160311cde2671 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 08:26:04 +0100 Subject: [PATCH 0484/1201] [teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) --- youtube_dl/extractor/teamcoco.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 73469cc5d..7640cf00a 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -16,7 +16,7 @@ from ..utils import ( class TeamcocoIE(TurnerBaseIE): - _VALID_URL = r'https?://teamcoco\.com/(?P([^/]+/)*[^/?#]+)' + _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P([^/]+/)*[^/?#]+)' _TESTS = [ { 'url': 'http://teamcoco.com/video/mary-kay-remote', @@ -79,15 +79,20 @@ class TeamcocoIE(TurnerBaseIE): }, { 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv', 'only_matching': True, + }, { + 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft', + 'only_matching': True, } ] def _graphql_call(self, query_template, object_type, object_id): find_object = 'find' + object_type return self._download_json( - 'http://teamcoco.com/graphql/', object_id, data=json.dumps({ + 'https://teamcoco.com/graphql', object_id, data=json.dumps({ 'query': query_template % (find_object, object_id) - }))['data'][find_object] + }).encode(), headers={ + 'Content-Type': 'application/json', + })['data'][find_object] def _real_extract(self, url): display_id = self._match_id(url) @@ -145,7 +150,12 @@ class TeamcocoIE(TurnerBaseIE): 'accessTokenType': 'jws', })) else: - video_sources = self._graphql_call('''{ + d = self._download_json( + 'https://teamcoco.com/_truman/d/' + video_id, + video_id, fatal=False) or {} + video_sources = d.get('meta') or {} + if not video_sources: + video_sources = self._graphql_call('''{ %s(id: "%s") { src } From afb74964162eaee64c5c9b72990837daae945fec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 11:45:49 +0100 Subject: [PATCH 0485/1201] [adultswim] fix extraction(closes #18025) --- youtube_dl/extractor/adultswim.py | 194 ++++++++++++++++++------------ 1 file changed, 118 insertions(+), 76 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 88c96a950..8d1d9ac7d 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -1,13 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .turner import TurnerBaseIE from ..utils import ( + determine_ext, + float_or_none, int_or_none, + mimetype2ext, + parse_age_limit, + parse_iso8601, strip_or_none, - url_or_none, + try_get, ) @@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Rick and Morty - Pilot', 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', - 'timestamp': 1493267400, - 'upload_date': '20170427', + 'timestamp': 1543294800, + 'upload_date': '20181127', }, 'params': { # m3u8 download @@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }, { 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'info_dict': { @@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE): }, { 'url': 'http://www.adultswim.com/videos/attack-on-titan', 'info_dict': { - 'id': 'b7A69dzfRzuaXIECdxW8XQ', + 'id': 'attack-on-titan', 'title': 'Attack on Titan', - 'description': 'md5:6c8e003ea0777b47013e894767f5e114', + 'description': 'md5:41caa9416906d90711e31dc00cb7db7e', }, 'playlist_mincount': 12, }, { @@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }] def _real_extract(self, url): show_path, episode_path = re.match(self._VALID_URL, url).groups() display_id = episode_path or show_path - webpage = self._download_webpage(url, display_id) - initial_data = self._parse_json(self._search_regex( - r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});', - webpage, 'initial data'), display_id) - - is_stream = show_path == 'streams' - if is_stream: - if not episode_path: - episode_path = 'live-stream' - - video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path) - video_id = video_data.get('stream') - - if not video_id: - entries = [] - for episode in video_data.get('archiveEpisodes', []): - episode_url = url_or_none(episode.get('url')) - if not episode_url: - continue - entries.append(self.url_result( - episode_url, 'AdultSwim', episode.get('id'))) - return self.playlist_result( - entries, video_data.get('id'), video_data.get('title'), - strip_or_none(video_data.get('description'))) + query = '''query { + getShowBySlug(slug:"%s") { + %%s + } +}''' % show_path + if episode_path: + query = query % '''title + getVideoBySlug(slug:"%s") { + _id + auth + description + duration + episodeNumber + launchDate + mediaID + seasonNumber + poster + title + tvRating + }''' % episode_path + ['getVideoBySlug'] else: - show_data = initial_data['show'] + query = query % '''metaDescription + title + videos(first:1000,sort:["episode_number"]) { + edges { + node { + _id + slug + } + } + }''' + show_data = self._download_json( + 'https://www.adultswim.com/api/search', display_id, + data=json.dumps({'query': query}).encode(), + headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] + if episode_path: + video_data = show_data['getVideoBySlug'] + video_id = video_data['_id'] + episode_title = title = video_data['title'] + series = show_data.get('title') + if series: + title = '%s - %s' % (series, title) + info = { + 'id': video_id, + 'title': title, + 'description': strip_or_none(video_data.get('description')), + 'duration': float_or_none(video_data.get('duration')), + 'formats': [], + 'subtitles': {}, + 'age_limit': parse_age_limit(video_data.get('tvRating')), + 'thumbnail': video_data.get('poster'), + 'timestamp': parse_iso8601(video_data.get('launchDate')), + 'series': series, + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode': episode_title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + } - if not episode_path: - entries = [] - for video in show_data.get('videos', []): - slug = video.get('slug') - if not slug: + auth = video_data.get('auth') + media_id = video_data.get('mediaID') + if media_id: + info.update(self._extract_ngtv_info(media_id, { + # CDN_TOKEN_APP_ID from: + # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js + 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': auth, + })) + + if not auth: + extract_data = self._download_json( + 'https://www.adultswim.com/api/shows/v1/videos/' + video_id, + video_id, query={'fields': 'stream'}, fatal=False) or {} + assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] + for asset in assets: + asset_url = asset.get('url') + if not asset_url: continue - entries.append(self.url_result( - 'http://adultswim.com/videos/%s/%s' % (show_path, slug), - 'AdultSwim', video.get('id'))) - return self.playlist_result( - entries, show_data.get('id'), show_data.get('title'), - strip_or_none(show_data.get('metadata', {}).get('description'))) + ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) + if ext == 'm3u8': + info['formats'].extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + continue + # info['formats'].extend(self._extract_f4m_formats( + # asset_url, video_id, f4m_id='hds', fatal=False)) + elif ext in ('scc', 'ttml', 'vtt'): + info['subtitles'].setdefault('en', []).append({ + 'url': asset_url, + }) + self._sort_formats(info['formats']) - video_data = show_data['sluggedVideo'] - video_id = video_data['id'] - - info = self._extract_cvp_info( - 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id, - video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', - 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', - }, - }, { - 'url': url, - 'site_name': 'AdultSwim', - 'auth_required': video_data.get('auth'), - }) - - info.update({ - 'id': video_id, - 'display_id': display_id, - 'description': info.get('description') or strip_or_none(video_data.get('description')), - }) - if not is_stream: - info.update({ - 'duration': info.get('duration') or int_or_none(video_data.get('duration')), - 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')), - 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')), - 'episode': info['title'], - 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')), - }) - - info['series'] = video_data.get('collection_title') or info.get('series') - if info['series'] and info['series'] != info['title']: - info['title'] = '%s - %s' % (info['series'], info['title']) - - return info + return info + else: + entries = [] + for edge in show_data.get('videos', {}).get('edges', []): + video = edge.get('node') or {} + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('_id'))) + return self.playlist_result( + entries, show_path, show_data.get('title'), + strip_or_none(show_data.get('metaDescription'))) From 19041a38773b1022480d50587da7db4a0e6fa869 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 16:18:57 +0100 Subject: [PATCH 0486/1201] [youtube] extract srv[1-3] subtitle formats(#20566) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 886fc1591..132572c88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -484,7 +484,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False From a7978f8e2acc8c34989b7f289a16180e71902193 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 18:08:43 +0100 Subject: [PATCH 0487/1201] [hbo] fix extraction and extract subtitles(closes #14629)(closes #13709) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/hbo.py | 102 +++++++++++------------------ 2 files changed, 39 insertions(+), 68 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 254dbc946..c1c5d1953 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -441,10 +441,7 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import ( - HBOIE, - HBOEpisodeIE, -) +from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 859ad5429..44440233d 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -4,16 +4,28 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( xpath_text, xpath_element, int_or_none, parse_duration, + urljoin, ) -class HBOBaseIE(InfoExtractor): +class HBOIE(InfoExtractor): + IE_NAME = 'hbo' + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer', + 'md5': '8126210656f433c452a21367f9ad85b3', + 'info_dict': { + 'id': '22113301', + 'ext': 'mp4', + 'title': 'Game of Thrones - Trailer', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + } _FORMATS_INFO = { 'pro7': { 'width': 1280, @@ -53,10 +65,17 @@ class HBOBaseIE(InfoExtractor): }, } - def _extract_from_id(self, video_id): - video_data = self._download_xml( - 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) - title = xpath_text(video_data, 'title', 'title', True) + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + location_path = self._parse_json(self._html_search_regex( + r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl'] + video_data = self._download_xml(urljoin(url, location_path), display_id) + video_id = xpath_text(video_data, 'id', fatal=True) + episode_title = title = xpath_text(video_data, 'title', fatal=True) + series = xpath_text(video_data, 'program') + if series: + title = '%s - %s' % (series, title) formats = [] for source in xpath_element(video_data, 'videos', 'sources', True): @@ -128,68 +147,23 @@ class HBOBaseIE(InfoExtractor): 'width': width, }) + subtitles = None + caption_url = xpath_text(video_data, 'captionUrl') + if caption_url: + subtitles = { + 'en': [{ + 'url': caption_url, + 'ext': 'ttml' + }], + } + return { 'id': video_id, 'title': title, 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), + 'series': series, + 'episode': episode_title, 'formats': formats, 'thumbnails': thumbnails, + 'subtitles': subtitles, } - - -class HBOIE(HBOBaseIE): - IE_NAME = 'hbo' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 1072, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_from_id(video_id) - - -class HBOEpisodeIE(HBOBaseIE): - IE_NAME = 'hbo:episode' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P[0-9a-z-]+))(?:\.html)?' - - _TESTS = [{ - 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', - 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb', - 'info_dict': { - 'id': '1439518', - 'display_id': 'ep-52-inside-the-episode', - 'ext': 'mp4', - 'title': 'Ep. 52: Inside the Episode', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 240, - }, - }, { - 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver', - 'only_matching': True, - }] - - def _real_extract(self, url): - path, display_id = re.match(self._VALID_URL, url).groups() - - content = self._download_json( - 'http://www.hbo.com/api/content/' + path, display_id)['content'] - - video_id = compat_str((content.get('parsed', {}).get( - 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId']) - - info_dict = self._extract_from_id(video_id) - info_dict['display_id'] = display_id - - return info_dict From 4810655cd65f9bdde1ad240adf7334828243fb0a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 19:35:35 +0100 Subject: [PATCH 0488/1201] [bfi:player] Add new extractor(#19235) --- youtube_dl/extractor/bfi.py | 37 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/bfi.py diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py new file mode 100644 index 000000000..60c8944b5 --- /dev/null +++ b/youtube_dl/extractor/bfi.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class BFIPlayerIE(InfoExtractor): + IE_NAME = 'bfi:player' + _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P[\w-]+)-online' + _TEST = { + 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', + 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', + 'info_dict': { + 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', + 'ext': 'mp4', + 'title': 'Computer Doctor', + 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', + }, + 'skip': 'BFI Player films cannot be played outside of the UK', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + entries = [] + for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): + player_attr = extract_attributes(player_el) + ooyala_id = player_attr.get('data-video-id') + if not ooyala_id: + continue + entries.append(self.url_result( + 'ooyala:' + ooyala_id, 'Ooyala', + ooyala_id, player_attr.get('data-label'))) + return self.playlist_result(entries) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c1c5d1953..01119f2bb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -107,6 +107,7 @@ from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE from .beatport import BeatportIE from .bet import BetIE +from .bfi import BFIPlayerIE from .bigflix import BigflixIE from .bild import BildIE from .bilibili import ( From 9f182c23ba74f76ff716940b2a0568f428d6c2b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Apr 2019 09:22:25 +0100 Subject: [PATCH 0489/1201] [vrv] add basic support for individual movie links(#19229) --- youtube_dl/extractor/vrv.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 6c060ae76..c11da97de 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -150,9 +150,10 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + object_data = self._call_cms(self._get_cms_resource( + 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] + resource_path = object_data['__links__']['resource']['href'] + video_data = self._call_cms(resource_path, video_id, 'video') title = video_data['title'] streams_path = video_data['__links__'].get('streams', {}).get('href') From b9aad6c427ea083143d1e53c0f087ad508bd706a Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Sat, 9 Feb 2019 10:15:53 +0100 Subject: [PATCH 0490/1201] [dvtv] Fix extraction (closes #18514) --- youtube_dl/extractor/dvtv.py | 120 +++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index 20996962a..bc68d07d1 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -10,7 +10,9 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + try_get, unescapeHTML, + parse_iso8601, ) @@ -28,6 +30,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', 'duration': 1484, + 'upload_date': '20141217', + 'timestamp': 1418792400, } }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', @@ -84,6 +88,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta', 'duration': 1103, + 'upload_date': '20170511', + 'timestamp': 1494514200, }, 'params': { 'skip_download': True, @@ -91,43 +97,62 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'only_matching': True, + }, { + # Test live stream video (liveStarter) parsing + 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/', + 'md5': '2e552e483f2414851ca50467054f9d5d', + 'info_dict': { + 'id': '8d116360288011e98c840cc47ab5f122', + 'ext': 'mp4', + 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu', + 'upload_date': '20190204', + 'timestamp': 1549289591, + }, + 'params': { + # Video content is no longer available + 'skip_download': True, + }, }] - def _parse_video_metadata(self, js, video_id, live_js=None): + def _parse_video_metadata(self, js, video_id, timestamp): + data = self._parse_json(js, video_id, transform_source=js_to_json) - if live_js: - data.update(self._parse_json( - live_js, video_id, transform_source=js_to_json)) + + live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) + if live_starter: + data.update(live_starter) title = unescapeHTML(data['title']) formats = [] - for video in data['sources']: - video_url = video.get('file') - if not video_url: - continue - video_type = video.get('type') - ext = determine_ext(video_url, mimetype2ext(video_type)) - if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif video_type == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - else: - label = video.get('label') - height = self._search_regex( - r'^(\d+)[pP]', label or '', 'height', default=None) - format_id = ['http'] - for f in (ext, label): - if f: - format_id.append(f) - formats.append({ - 'url': video_url, - 'format_id': '-'.join(format_id), - 'height': int_or_none(height), - }) + + for tracks in data.get('tracks', {}).values(): + for video in tracks: + video_url = video.get('src') + if not video_url: + continue + video_type = video.get('type') + ext = determine_ext(video_url, mimetype2ext(video_type)) + if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif video_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + else: + label = video.get('label') + height = self._search_regex( + r'^(\d+)[pP]', label or '', 'height', default=None) + format_id = ['http'] + for f in (ext, label): + if f: + format_id.append(f) + formats.append({ + 'url': video_url, + 'format_id': '-'.join(format_id), + 'height': int_or_none(height), + }) self._sort_formats(formats) return { @@ -136,7 +161,7 @@ class DVTVIE(InfoExtractor): 'description': data.get('description'), 'thumbnail': data.get('image'), 'duration': int_or_none(data.get('duration')), - 'timestamp': int_or_none(data.get('pubtime')), + 'timestamp': int_or_none(timestamp), 'formats': formats } @@ -145,32 +170,33 @@ class DVTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - # live content - live_item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - # single video - item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - if item: - return self._parse_video_metadata(item, video_id, live_item) + timestamp = parse_iso8601(self._html_search_meta( + 'article:published_time', webpage, 'published time', default=None)) # playlist items = re.findall( - r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);", + r"(?s)playlist\.push\(({.+?})\);", webpage) - if not items: - items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage) if items: return { '_type': 'playlist', 'id': video_id, 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id) for i in items] + 'entries': [self._parse_video_metadata(i, video_id, timestamp) for i in items] } + # single video + item = self._search_regex( + r'(?s)BBXPlayer.setup\((.+?)\);', + webpage, 'video', default=None) + + if item: + # remove function calls (ex. htmldeentitize) + # TODO this should be fixed in a general way in the js_to_json + item = re.sub(r'\w+?\((.+)\)', r'\1', item) + + if item: + return self._parse_video_metadata(item, video_id, timestamp) + raise ExtractorError('Could not find neither video nor playlist') From 19591facea9abe965a734224bea33948bb57b5f4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Apr 2019 16:29:30 +0100 Subject: [PATCH 0491/1201] [dvtv] remove unnecessary comments and spaces --- youtube_dl/extractor/dvtv.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index bc68d07d1..de7f6d670 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -19,9 +19,7 @@ from ..utils import ( class DVTVIE(InfoExtractor): IE_NAME = 'dvtv' IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P[0-9a-f]{32})' - _TESTS = [{ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'md5': '67cb83e4a955d36e1b5d31993134a0c2', @@ -36,7 +34,7 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { - 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'id': '973eb3bc854e11e498be002590604f2e', }, 'playlist': [{ @@ -115,17 +113,14 @@ class DVTVIE(InfoExtractor): }] def _parse_video_metadata(self, js, video_id, timestamp): - data = self._parse_json(js, video_id, transform_source=js_to_json) + title = unescapeHTML(data['title']) live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) if live_starter: data.update(live_starter) - title = unescapeHTML(data['title']) - formats = [] - for tracks in data.get('tracks', {}).values(): for video in tracks: video_url = video.get('src') @@ -167,36 +162,23 @@ class DVTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - timestamp = parse_iso8601(self._html_search_meta( 'article:published_time', webpage, 'published time', default=None)) - # playlist - items = re.findall( - r"(?s)playlist\.push\(({.+?})\);", - webpage) - + items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) if items: - return { - '_type': 'playlist', - 'id': video_id, - 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id, timestamp) for i in items] - } + return self.playlist_result( + [self._parse_video_metadata(i, video_id, timestamp) for i in items], + video_id, self._html_search_meta('twitter:title', webpage)) - # single video item = self._search_regex( - r'(?s)BBXPlayer.setup\((.+?)\);', + r'(?s)BBXPlayer\.setup\((.+?)\);', webpage, 'video', default=None) - if item: # remove function calls (ex. htmldeentitize) # TODO this should be fixed in a general way in the js_to_json item = re.sub(r'\w+?\((.+)\)', r'\1', item) - - if item: return self._parse_video_metadata(item, video_id, timestamp) raise ExtractorError('Could not find neither video nor playlist') From c701472fc99c90f6ef4fdf0f0169a342f2e9e892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:15:25 +0700 Subject: [PATCH 0492/1201] [platzi] Add extractor (closes #20562) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/platzi.py | 217 +++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 youtube_dl/extractor/platzi.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 01119f2bb..41ab36213 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -868,6 +868,10 @@ from .picarto import ( from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py new file mode 100644 index 000000000..557b2b5ad --- /dev/null +++ b/youtube_dl/extractor/platzi.py @@ -0,0 +1,217 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_or_none, + try_get, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class PlatziIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P\d+)-[^/?\#&]+ + ''' + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in compat_str(urlh.geturl()): + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) From 059cd768b97188fbbf3262696e6511f3aa1795e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:17:54 +0700 Subject: [PATCH 0493/1201] [vk] Remove unused import --- youtube_dl/extractor/vk.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 39376e39f..1072550f1 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -6,10 +6,7 @@ import re import sys from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( clean_html, ExtractorError, From f412970164c349bea81bfd9d95e56fec7d16c8a1 Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Sun, 7 Apr 2019 02:28:31 +0700 Subject: [PATCH 0494/1201] [README.md] Fix lists formatting (closes #20558) Lists have to be separated from the previous paragraph by a blank line in certain variants of Markdown, otherwise they are not interpreted as lists. This change ensures that that the youtube-dl.1 man page, which is generated from README.md with the help of pandoc, is formatted correctly. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e476045b2..92c3a92a1 100644 --- a/README.md +++ b/README.md @@ -642,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can also use special names to select particular edge case formats: + - `best`: Select the best quality format represented by a single file with video and audio. - `worst`: Select the worst quality format represented by a single file with video and audio. - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. @@ -658,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + - `filesize`: The number of bytes, if known in advance - `width`: Width of the video, if known - `height`: Height of the video, if known @@ -668,6 +670,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `fps`: Frame rate Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: + - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use From f4da80803619aea52bb116f6191f78e1bd77d2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:58:40 +0700 Subject: [PATCH 0495/1201] [xvideos] Extract all thumbnails (closes #20432) --- youtube_dl/extractor/xvideos.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index ec2d913fc..166bcf443 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -57,10 +57,17 @@ class XVideosIE(InfoExtractor): webpage, 'title', default=None, group='title') or self._og_search_title(webpage) - thumbnail = self._search_regex( - (r'setThumbUrl\(\s*(["\'])(?P(?:(?!\1).)+)\1', - r'url_bigthumb=(?P.+?)&'), - webpage, 'thumbnail', fatal=False, group='thumbnail') + thumbnails = [] + for preference, thumbnail in enumerate(('', '169')): + thumbnail_url = self._search_regex( + r'setThumbUrl%s\(\s*(["\'])(?P(?:(?!\1).)+)\1' % thumbnail, + webpage, 'thumbnail', default=None, group='thumbnail') + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'preference': preference, + }) + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( @@ -98,6 +105,6 @@ class XVideosIE(InfoExtractor): 'formats': formats, 'title': title, 'duration': duration, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'age_limit': 18, } From 8410653f24ae73e4bc52c11bc0ec595e42567038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 03:18:10 +0700 Subject: [PATCH 0496/1201] [ruutu] Add support for audio podcasts (closes #20473, closes #20545) --- youtube_dl/extractor/ruutu.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index f530f0083..f05401b36 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -59,6 +59,20 @@ class RuutuIE(InfoExtractor): 'url': 'http://www.ruutu.fi/video/3193728', 'only_matching': True, }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': ['HTTP Error 502: Bad Gateway'], + } ] def _real_extract(self, url): @@ -94,6 +108,12 @@ class RuutuIE(InfoExtractor): continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': From aa5338118e77380c753b397c6f544fcfdef1cb22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 04:16:45 +0700 Subject: [PATCH 0497/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3a0b6634f..41adb4204 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version + +Core ++ [downloader/external] Pass rtmp_conn to ffmpeg + +Extractors ++ [ruutu] Add support for audio podcasts (#20473, #20545) ++ [xvideos] Extract all thumbnails (#20432) ++ [platzi] Add support for platzi.com (#20562) +* [dvtv] Fix extraction (#18514, #19174) ++ [vrv] Add basic support for individual movie links (#19229) ++ [bfi:player] Add support for player.bfi.org.uk (#19235) +* [hbo] Fix extraction and extract subtitles (#14629, #13709) +* [youtube] Extract srv[1-3] subtitle formats (#20566) +* [adultswim] Fix extraction (#18025) +* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339) +* [adn] Fix subtitle compatibility with ffmpeg +* [adn] Fix extraction and add support for positioning styles (#20549) +* [vk] Use unique video id (#17848) +* [newstube] Fix extraction +* [rtl2] Actualize extraction ++ [adobeconnect] Add support for adobeconnect.com (#20283) ++ [gaia] Add support for authentication (#14605) ++ [mediasite] Add support for dashed ids and named catalogs (#20531) + + version 2019.04.01 Core From a46d9e5b41bcfdf23038fa9b36aec5d4ba9c6de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 04:19:46 +0700 Subject: [PATCH 0498/1201] release 2019.04.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 6 +++++- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index cc68279d0..5469c73cf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.01*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.07*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.04.01 +[debug] youtube-dl version 2019.04.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 41adb4204..421f247fd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.04.07 Core + [downloader/external] Pass rtmp_conn to ffmpeg diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 12df319eb..df272c479 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **acast:channel** - **AddAnime** - **ADN**: Anime Digital Network + - **AdobeConnect** - **AdobeTV** - **AdobeTVChannel** - **AdobeTVShow** @@ -101,6 +102,7 @@ - **Bellator** - **BellMedia** - **Bet** + - **bfi:player** - **Bigflix** - **Bild**: Bild.de - **BiliBili** @@ -345,7 +347,6 @@ - **Groupon** - **Hark** - **hbo** - - **hbo:episode** - **HearThisAt** - **Heise** - **HellPorno** @@ -489,6 +490,7 @@ - **Mediaset** - **Mediasite** - **MediasiteCatalog** + - **MediasiteNamedCatalog** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 @@ -671,6 +673,8 @@ - **Piksel** - **Pinkbike** - **Pladform** + - **Platzi** + - **PlatziCourse** - **play.fm** - **PlayPlusTV** - **PlaysTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f872cd137..5c7d550f5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.04.01' +__version__ = '2019.04.07' From bf6fb8b9dc921a30df24d9789d5bbb0ac5b370b0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:38:40 -0400 Subject: [PATCH 0499/1201] [openload] add tests --- youtube_dl/extractor/openload.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 25b3bfdbd..130165b8c 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -350,6 +350,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.space/f/IY4eZSst3u8/', 'only_matching': True, + }, { + 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', + 'only_matching': True, + }, { + 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 9ed06812ec8da6f1364acd00261935c334994e62 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:59:41 -0400 Subject: [PATCH 0500/1201] [streamango] add support for streamcherry.com --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index efb259f96..f1e17dd88 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -41,6 +41,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', 'only_matching': True, + }, { + 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', + 'only_matching': True, }] def _real_extract(self, url): From d562cac9dc67bfa2306c6225c261390162527d9e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Apr 2019 12:39:48 +0100 Subject: [PATCH 0501/1201] [stv:player] Add new extractor(closes #20586) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/stv.py | 94 ++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 youtube_dl/extractor/stv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 41ab36213..cc19af5c4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1093,6 +1093,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stv import STVPlayerIE from .sunporno import SunPornoIE from .svt import ( SVTIE, diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py new file mode 100644 index 000000000..ccb074cd4 --- /dev/null +++ b/youtube_dl/extractor/stv.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse +) +from ..utils import ( + extract_attributes, + float_or_none, + int_or_none, + str_or_none, +) + + +class STVPlayerIE(InfoExtractor): + IE_NAME = 'stv:player' + _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' + _TEST = { + 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', + 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'info_dict': { + 'id': '6016487034001', + 'ext': 'mp4', + 'upload_date': '20190321', + 'title': 'Interview with the cast ahead of new Victoria', + 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', + 'timestamp': 1553179628, + 'uploader_id': '1486976045', + }, + 'skip': 'this resource is unavailable outside of the UK', + } + _PUBLISHER_ID = '1486976045' + _PTYPE_MAP = { + 'episode': 'episodes', + 'video': 'shortform', + } + + def _real_extract(self, url): + ptype, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( + r'itemprop="embedURL"[^>]+href="([^"]+)', + webpage, 'embed URL', default=None)).query) + publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + + player_attr = extract_attributes(self._search_regex( + r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + + info = {} + duration = ref_id = series = video_id = None + api_ref_id = player_attr.get('data-player-api-refid') + if api_ref_id: + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), + api_ref_id, fatal=False) + if resp: + result = resp.get('results') or {} + video = result.get('video') or {} + video_id = str_or_none(video.get('id')) + ref_id = video.get('guid') + duration = video.get('length') + programme = result.get('programme') or {} + series = programme.get('name') or programme.get('shortName') + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) + info.update({ + 'description': result.get('summary'), + 'subtitles': subtitles, + 'view_count': int_or_none(result.get('views')), + }) + if not video_id: + video_id = qs.get('videoId', [None])[0] or self._search_regex( + r' Date: Sun, 7 Apr 2019 21:05:50 +0700 Subject: [PATCH 0502/1201] [tiktok] Add support for new URL schema (closes #20573) --- youtube_dl/extractor/tiktok.py | 35 +++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index 083e9f36d..66088b9ab 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/v| + (?:www\.)?tiktok\.com/share/video + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'md5': 'd584b572e92fcd48888051f238022420', 'info_dict': { @@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, 'repost_count': int, } - } + }, { + 'url': 'https://www.tiktok.com/share/video/6606727368545406213', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://m.tiktok.com/v/%s.html' % video_id, video_id) data = self._parse_json(self._search_regex( r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) return self._extract_aweme(data) class TikTokUserIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/h5/share/usr| + (?:www\.)?tiktok\.com/share/user + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', 'info_dict': { 'id': '188294915489964032', }, 'playlist_mincount': 24, - } + }, { + 'url': 'https://www.tiktok.com/share/user/188294915489964032', + 'only_matching': True, + }] def _real_extract(self, url): user_id = self._match_id(url) From 9045d28b5e3eba03908a82dcb868bd1649650ddb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Apr 2019 21:31:01 +0100 Subject: [PATCH 0503/1201] [aol] restrict url regex and improve format extraction --- youtube_dl/extractor/aol.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index cb9279193..dffa9733d 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -12,12 +16,12 @@ from ..utils import ( class AolIE(InfoExtractor): - IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P[^/?#&]+)' + IE_NAME = 'aol.com' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.com/video/(?:[^/]+/)*)(?P[0-9a-f]+)' _TESTS = [{ # video with 5min ID - 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', + 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { 'id': '518167793', @@ -34,7 +38,7 @@ class AolIE(InfoExtractor): } }, { # video with vidible ID - 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', + 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', 'ext': 'mp4', @@ -49,17 +53,17 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { - 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, }, { - 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', - 'only_matching': True, - }, { - 'url': 'http://on.aol.com/video/519442220', + 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', 'only_matching': True, }, { 'url': 'aol-video:5707d6b8e4b090497b04f706', 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', + 'only_matching': True, }] def _real_extract(self, url): @@ -73,7 +77,7 @@ class AolIE(InfoExtractor): video_data = response['data'] formats = [] - m3u8_url = video_data.get('videoMasterPlaylist') + m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) if m3u8_url: formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) @@ -96,6 +100,12 @@ class AolIE(InfoExtractor): 'width': int(mobj.group(1)), 'height': int(mobj.group(2)), }) + else: + qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + f.update({ + 'width': int_or_none(qs.get('w', [None])[0]), + 'height': int_or_none(qs.get('h', [None])[0]), + }) formats.append(f) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) From 9c017253e83ac3dfd363566ccbb9fc63f4e2ac07 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 8 Apr 2019 16:34:03 +0100 Subject: [PATCH 0504/1201] [jwplatfom] do not match manifest URLs(#20596) --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index d19a6a774..647b905f1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', From 5ca3459828cc0d752f02dab3e9c02cca85185cbb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:20:26 +0100 Subject: [PATCH 0505/1201] [kaltura] sanitize embed URLs --- youtube_dl/extractor/kaltura.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index fdf7f5bbc..79162f665 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -145,6 +145,8 @@ class KalturaIE(InfoExtractor): ) if mobj: embed_info = mobj.groupdict() + for k, v in embed_info.items(): + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( From 4bc12b8f819cd0a393e5800d4dc2ecf24401e199 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:21:46 +0100 Subject: [PATCH 0506/1201] [dispeak] improve mp4 bitrate extraction --- youtube_dl/extractor/dispeak.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index c05f601e2..c345e0274 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor): stream_name = xpath_text(a_format, 'streamName', fatal=True) video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') url = video_root + video_path - vbr = xpath_text(a_format, 'bitrate') + bitrate = xpath_text(a_format, 'bitrate') + tbr = int_or_none(bitrate) + vbr = int_or_none(self._search_regex( + r'-(\d+)\.mp4', video_path, 'vbr', default=None)) + abr = tbr - vbr if tbr and vbr else None video_formats.append({ + 'format_id': bitrate, 'url': url, - 'vbr': int_or_none(vbr), + 'tbr': tbr, + 'vbr': vbr, + 'abr': abr, }) return video_formats From 118f7add3b9690884edb4dc887995f3815243c78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:23:47 +0100 Subject: [PATCH 0507/1201] [gdc] add support for kaltura embeds and update tests(closes #20575) --- youtube_dl/extractor/gdcvault.py | 96 ++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 8806dc48a..2f555c1d4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,22 +3,24 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .kaltura import KalturaIE from ..utils import ( HEADRequest, sanitized_Request, + smuggle_url, urlencode_postdata, ) class GDCVaultIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)/(?P(\w|-)+)?' + _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)(?:/(?P[\w-]+))?' _NETRC_MACHINE = 'gdcvault' _TESTS = [ { 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 'md5': '7ce8388f544c88b7ac11c7ab1b593704', 'info_dict': { - 'id': '1019721', + 'id': '201311826596_AWNY', 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'ext': 'mp4', 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' @@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor): { 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 'info_dict': { - 'id': '1015683', + 'id': '201203272_1330951438328RSXR', 'display_id': 'Embracing-the-Dark-Art-of', 'ext': 'flv', 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' @@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor): 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', 'info_dict': { - 'id': '1023460', + 'id': '840376_BQRC', 'ext': 'mp4', 'display_id': 'Tenacious-Design-and-The-Interface', 'title': 'Tenacious Design and The Interface of \'Destiny\'', @@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor): # Multiple audios 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', 'info_dict': { - 'id': '1014631', - 'ext': 'flv', + 'id': '12396_1299111843500GMPX', + 'ext': 'mp4', 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', }, - 'params': { - 'skip_download': True, # Requires rtmpdump - 'format': 'jp', # The japanese audio - } + # 'params': { + # 'skip_download': True, # Requires rtmpdump + # 'format': 'jp', # The japanese audio + # } }, { # gdc-player.html 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', 'info_dict': { - 'id': '1435', + 'id': '9350_1238021887562UHXB', 'display_id': 'An-American-engine-in-Tokyo', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', }, + }, + { + # Kaltura Embed + 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling', + 'info_dict': { + 'id': '0_h1fg8j3p', + 'ext': 'mp4', + 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)', + 'timestamp': 1554401811, + 'upload_date': '20190404', + 'uploader_id': 'joe@blazestreaming.com', + }, 'params': { - 'skip_download': True, # Requires rtmpdump + 'format': 'mp4-408', }, }, ] @@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor): return start_page def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - display_id = mobj.group('name') or video_id + video_id, name = re.match(self._VALID_URL, url).groups() + display_id = name or video_id webpage_url = 'http://www.gdcvault.com/play/' + video_id start_page = self._download_webpage(webpage_url, display_id) @@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor): start_page, 'url', default=None) if direct_url: title = self._html_search_regex( - r'Session Name\s*(.*?)', + r'Session Name:?\s*(.*?)', start_page, 'title') video_url = 'http://www.gdcvault.com' + direct_url # resolve the url so that we can detect the correct extension - head = self._request_webpage(HEADRequest(video_url), video_id) - video_url = head.geturl() + video_url = self._request_webpage( + HEADRequest(video_url), video_id).geturl() return { 'id': video_id, @@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor): 'title': title, } - PLAYER_REGEX = r'' - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root', default=None) - if xml_root is None: - # Probably need to authenticate - login_res = self._login(webpage_url, display_id) - if login_res is None: - self.report_warning('Could not login.') - else: - start_page = login_res - # Grab the url from the authenticated page - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root') + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') - xml_name = self._html_search_regex( - r'', + r'', + r'', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) From 427cc215310804127b55744fcc3664ede38a4a0d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 10 Jun 2019 15:17:26 +0100 Subject: [PATCH 0647/1201] [biqle] remove unnecessary regex group --- youtube_dl/extractor/biqle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index c5c374845..af21e3ee5 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -42,7 +42,7 @@ class BIQLEIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) embed_url = self._proto_relative_url(self._search_regex( - r'', + r'', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) From 8361e7f93489f226542517216b2127ff170ca996 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 12 Jun 2019 21:41:46 +0100 Subject: [PATCH 0648/1201] [toutv] update client key(closes #21370) --- youtube_dl/extractor/toutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 00f58a087..44b022fca 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -38,7 +38,7 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] - _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' + _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4' def _real_initialize(self): email, password = self._get_login_info() From 28cc2241e44ff0c0704cfffaca6d47d377041aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:56:17 +0700 Subject: [PATCH 0649/1201] [utils] Restrict parse_codecs and add theora as known vcodec (#21381) --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 11 +++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 71980b3fc..659c6ece5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -822,6 +822,15 @@ class TestUtil(unittest.TestCase): 'vcodec': 'av01.0.05M.08', 'acodec': 'none', }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ead9bd862..a1f586b80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2529,7 +2529,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): @@ -2540,13 +2540,8 @@ def parse_codecs(codecs_str): if not vcodec and not acodec: if len(splited_codecs) == 2: return { - 'vcodec': vcodec, - 'acodec': acodec, - } - elif len(splited_codecs) == 1: - return { - 'vcodec': 'none', - 'acodec': vcodec, + 'vcodec': splited_codecs[0], + 'acodec': splited_codecs[1], } else: return { From b85eae0f057a0afdf1da9d6034c19327c8de33cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:59:05 +0700 Subject: [PATCH 0650/1201] [youtube] Hardcode codec metadata for av01 video only formats (closes #21381) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 963c73a2d..7b630b191 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -500,6 +500,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') From 7c24a58bdb60af80137beac85c8804c70194a455 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 16 Jun 2019 06:32:17 +0100 Subject: [PATCH 0651/1201] [sixplay] add support for rtlmost.hu(#21405) --- youtube_dl/extractor/sixplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 2a72af11b..7ec66ecf3 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -19,7 +19,7 @@ from ..utils import ( class SixPlayIE(InfoExtractor): IE_NAME = '6play' - _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P[0-9]+)' + _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P[0-9]+)' _TESTS = [{ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', @@ -35,6 +35,9 @@ class SixPlayIE(InfoExtractor): }, { 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', 'only_matching': True, + }, { + 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', + 'only_matching': True, }] def _real_extract(self, url): @@ -43,6 +46,7 @@ class SixPlayIE(InfoExtractor): '6play.fr': ('6play', 'm6web'), 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), + 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), }.get(domain, ('6play', 'm6web')) data = self._download_json( From c40714cdee0ce3de1a5f6e17a61d3ee4c610ae63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 20 Jun 2019 00:57:58 +0700 Subject: [PATCH 0652/1201] [youtube] Make --write-annotations non fatal (closes #21452) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7b630b191..1010c8616 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1581,8 +1581,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return video_id def _extract_annotations(self, video_id): - url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + return self._download_webpage( + 'https://www.youtube.com/annotations_invideo', video_id, + note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + query={ + 'features': 1, + 'legacy': 1, + 'video_id': video_id, + }) @staticmethod def _extract_chapters(description, duration): From abefc03f517e9208b9d0c35e7e683941a40bb152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 22:58:42 +0700 Subject: [PATCH 0653/1201] [youtube] Update signature function patterns (closes #21469, closes #21476) --- youtube_dl/extractor/youtube.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1010c8616..83b6ac134 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1312,11 +1312,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + # Obsolete patterns + r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From bc6438c092be6ca63843a349eee1db2b5d398d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:01:09 +0700 Subject: [PATCH 0654/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c4d485ff1..10394a3b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) + +Extractors +* [youtube] Update signature function patterns (#21469, #21476) +* [youtube] Make --write-annotations non fatal (#21452) ++ [sixplay] Add support for rtlmost.hu (#21405) +* [youtube] Hardcode codec metadata for av01 video only formats (#21381) +* [toutv] Update client key (#21370) ++ [biqle] Add support for new embed domain +* [cbs] Improve DRM protected videos detection (#21339) + + version 2019.06.08 Core From 9842d29d660b1ffe7873823542085879ba9d86a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:04:09 +0700 Subject: [PATCH 0655/1201] release 2019.06.21 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 104ad598c..7a2b16827 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index f711af040..d6180e672 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index ae865a8b0..7cb981abf 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8246b570e..802fa2313 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 292c2e697..5153864a1 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 10394a3b6..2d9988da3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.21 Core * [utils] Restrict parse_codecs and add theora as known vcodec (#21381) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6aa666bc9..33474a452 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.08' +__version__ = '2019.06.21' From 4681441d2faf54615962029c7240601e339281bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:07:26 +0700 Subject: [PATCH 0656/1201] [crunchyroll:playlist] Fix and relax title extraction (closes #21291, closes #21443) --- youtube_dl/extractor/crunchyroll.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 588c3c71b..75b56ee42 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -661,9 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): webpage = self._download_webpage( self._add_skip_wall(url), show_id, headers=self.geo_verification_headers()) - title = self._html_search_regex( - r'(?s)]*>\s*(.*?)', - webpage, 'title') + title = self._html_search_meta('name', webpage, default=None) + episode_paths = re.findall( r'(?s)
  • ]+>.*? Date: Sat, 22 Jun 2019 00:15:52 +0700 Subject: [PATCH 0657/1201] [crunchyroll] Move Accept-Language workaround to video extractor since it causes playlists not to list any videos --- youtube_dl/extractor/crunchyroll.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 75b56ee42..85a9a577f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -103,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor): def _real_initialize(self): self._login() - def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) - else sanitized_Request(url_or_request)) - # Accept-Language must be set explicitly to accept any language to avoid issues - # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. - # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction - # should be imposed or not (from what I can see it just takes the first language - # ignoring the priority and requires it to correspond the IP). By the way this causes - # Crunchyroll to not work in georestriction cases in some browsers that don't place - # the locale lang first in header. However allowing any language seems to workaround the issue. - request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) - @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) @@ -269,6 +256,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): '1080': ('80', '108'), } + def _download_webpage(self, url_or_request, *args, **kwargs): + request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + else sanitized_Request(url_or_request)) + # Accept-Language must be set explicitly to accept any language to avoid issues + # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. + # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction + # should be imposed or not (from what I can see it just takes the first language + # ignoring the priority and requires it to correspond the IP). By the way this causes + # Crunchyroll to not work in georestriction cases in some browsers that don't place + # the locale lang first in header. However allowing any language seems to workaround the issue. + request.add_header('Accept-Language', '*') + return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) + def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(compat_b64decode(data)) iv = bytes_to_intlist(compat_b64decode(iv)) From 9c2aaac2685b34143ed770d5e0c7f3906ab1107d Mon Sep 17 00:00:00 2001 From: Emmanuel Froissart Date: Wed, 12 Jun 2019 13:55:07 +0200 Subject: [PATCH 0658/1201] [tf1] Fix wat id extraction (closes #21365) --- youtube_dl/extractor/tf1.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 903f47380..091350848 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import js_to_json + class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" @@ -43,12 +45,40 @@ class TF1IE(InfoExtractor): }, { 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html', 'only_matching': True, + }, { + 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html', + 'info_dict': { + 'id': '13641379', + 'ext': 'mp4', + 'title': 'md5:f392bc52245dc5ad43771650c96fb620', + 'description': 'md5:44bc54f0a21322f5b91d68e76a544eae', + 'upload_date': '20190611', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, + }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - webpage, 'wat id', group='id') + vids_data_string = self._html_search_regex( + r'', + webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None + if vids_data_string is not None: + vids_data = self._parse_json( + vids_data_string, video_id, + transform_source=js_to_json) + video_data = [v for v in vids_data.values() + if 'slug' in v and v['slug'] == video_id] + if len(video_data) > 0 and 'streamId' in video_data[0]: + wat_id = video_data[0]['streamId'] + if wat_id is None: + wat_id = self._html_search_regex( + [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' + ], + webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 1c11204056566c2983f0a837897d882581880f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:37:49 +0700 Subject: [PATCH 0659/1201] [tf1] Improve extraction and fix issues (closes #21372) --- youtube_dl/extractor/tf1.py | 42 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 091350848..55e2a0721 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -2,8 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor - -from ..utils import js_to_json +from ..compat import compat_str class TF1IE(InfoExtractor): @@ -62,23 +61,32 @@ class TF1IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - vids_data_string = self._html_search_regex( - r'', - webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None - if vids_data_string is not None: - vids_data = self._parse_json( - vids_data_string, video_id, - transform_source=js_to_json) - video_data = [v for v in vids_data.values() - if 'slug' in v and v['slug'] == video_id] - if len(video_data) > 0 and 'streamId' in video_data[0]: - wat_id = video_data[0]['streamId'] - if wat_id is None: + + data = self._parse_json( + self._search_regex( + r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|)', webpage, + 'data', default='{}'), video_id, fatal=False) + + if data: + try: + wat_id = next( + video.get('streamId') + for key, video in data.items() + if isinstance(video, dict) + and video.get('slug') == video_id) + if not isinstance(wat_id, compat_str) or not wat_id.isdigit(): + wat_id = None + except StopIteration: + pass + + if not wat_id: wat_id = self._html_search_regex( - [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' - ], + (r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2'), webpage, 'wat id', group='id') + return self.url_result('wat:%s' % wat_id, 'Wat') From 31ce6e996666e7512990da01ef58785933dcb2be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 02:22:07 +0700 Subject: [PATCH 0660/1201] [youtube] Add another signature function pattern --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 83b6ac134..b570d5bae 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1314,6 +1314,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', From 21b08463a777a79876721e49d3d07a19bc3fe05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 05:34:46 +0700 Subject: [PATCH 0661/1201] [pornhub] Rework extractors (closes #11922, closes #16078, closes #17454, closes #17936) --- youtube_dl/extractor/pornhub.py | 157 +++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index cb59d526f..72c351d56 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -387,17 +387,81 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): }] -class PornHubUserVideosIE(PornHubPlaylistBaseIE): +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'playlist_mincount': 0, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'playlist_mincount': 0, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserIE, cls).suitable(url)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('id') + return self.url_result( + '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + user_id = mobj.group('id') + + page_url = self._make_page_url(url) + + entries = [] + for page_num in itertools.count(1): + try: + webpage = self._download_webpage( + page_url, user_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + entries.extend(page_entries) + if not self._has_more(webpage): + break + + return self.playlist_result(orderedSet(entries), user_id) + + +class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' _TESTS = [{ - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'info_dict': { - 'id': 'zoe_ph', - }, - 'playlist_mincount': 171, + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 149, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -426,26 +490,69 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, }] - def _real_extract(self, url): + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserVideosIE, cls).suitable(url)) + + def _make_page_url(self, url): + return url + + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }] + + def _make_page_url(self, url): mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - user_id = mobj.group('id') + return '%s/ajax' % mobj.group('url') - entries = [] - for page_num in itertools.count(1): - try: - webpage = self._download_webpage( - url, user_id, 'Downloading page %d' % page_num, - query={'page': page_num}) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - break - raise - page_entries = self._extract_entries(webpage, host) - if not page_entries: - break - entries.extend(page_entries) - - return self.playlist_result(entries, user_id) + @staticmethod + def _has_more(webpage): + return True From 1f7a563ab0efd0745ea66c354255844a9bd36c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 06:01:43 +0700 Subject: [PATCH 0662/1201] [pornhub] Add support for downloading single pages and search pages (closes #15570) --- youtube_dl/extractor/pornhub.py | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 72c351d56..7de585604 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -409,14 +409,14 @@ class PornHubUserIE(PornHubPlaylistBaseIE): @classmethod def suitable(cls, url): return (False - if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubUserIE, cls).suitable(url)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), video_id=user_id) @@ -426,10 +426,13 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): host = mobj.group('host') user_id = mobj.group('id') + page = int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + page_url = self._make_page_url(url) entries = [] - for page_num in itertools.count(1): + for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( page_url, user_id, 'Downloading page %d' % page_num, @@ -448,10 +451,17 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): return self.playlist_result(orderedSet(entries), user_id) -class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' +class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ + (?: + (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| + video/search + ) + ''' _TESTS = [{ - 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', @@ -462,6 +472,12 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): 'id': 'jenny-blighe', }, 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -484,12 +500,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', - 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', @@ -506,9 +516,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): # Newest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', - 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', 'only_matching': True, @@ -521,7 +528,7 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): def suitable(cls, url): return (False if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserVideosIE, cls).suitable(url)) + else super(PornHubPagedVideosIE, cls).suitable(url)) def _make_page_url(self, url): return url From 9634de178d35c5cd767b183c2be82b14bef84209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 08:37:07 +0700 Subject: [PATCH 0663/1201] [pornhub] Add support for more paged video sources --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/pornhub.py | 101 +++++++++++++++++------------ 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1ed8a4b2..9cd7d3ac4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,8 +892,9 @@ from .porncom import PornComIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, - PornHubPlaylistIE, - PornHubUserVideosIE, + PornHubUserIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7de585604..11b8cfcf7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -372,23 +372,8 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): entries, playlist_id, title, playlist.get('description')) -class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/playlist/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/4667351', - 'info_dict': { - 'id': '4667351', - 'title': 'Nataly Hot', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://de.pornhub.com/playlist/4667351', - 'only_matching': True, - }] - - class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -400,23 +385,20 @@ class PornHubUserIE(PornHubPlaylistBaseIE): 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/users/russianveet69', - 'playlist_mincount': 0, + 'only_matching': True, }, { 'url': 'https://www.pornhub.com/channels/povd', - 'playlist_mincount': 0, + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return (False - if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserIE, cls).suitable(url)) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) @@ -424,7 +406,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') - user_id = mobj.group('id') + item_id = mobj.group('id') page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) @@ -435,7 +417,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, user_id, 'Downloading page %d' % page_num, + page_url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -448,18 +430,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), user_id) + return self.playlist_result(orderedSet(entries), item_id) -class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ - (?: - (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| - video/search - ) - ''' +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?P(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -469,20 +444,20 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 149, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', 'info_dict': { - 'id': 'povd', + 'id': 'channels/povd/videos', }, 'playlist_mincount': 293, }, { @@ -522,13 +497,55 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': 'playlist/44121572', + }, + 'playlist_mincount': 132, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, }] @classmethod def suitable(cls, url): return (False - if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubPagedVideosIE, cls).suitable(url)) + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) def _make_page_url(self, url): return url From 091c9b43164f6f3b31f5f911c88a4aeaa0358429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jun 2019 02:13:46 +0700 Subject: [PATCH 0664/1201] [vimeo:likes] Implement extrator in terms of channel extractor This allows to obtain videos' ids before extraction (#21493) --- youtube_dl/extractor/vimeo.py | 50 +++++------------------------------ 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a41178bab..aeee7df8f 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,7 +16,6 @@ from ..utils import ( determine_ext, ExtractorError, js_to_json, - InAdvancePagedList, int_or_none, merge_dicts, NO_DEFAULT, @@ -1065,7 +1064,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') -class VimeoLikesIE(InfoExtractor): +class VimeoLikesIE(VimeoChannelIE): _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' @@ -1073,55 +1072,20 @@ class VimeoLikesIE(InfoExtractor): 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, 'info_dict': { - 'id': 'user755559_likes', - 'description': 'See all the videos urza likes', - 'title': 'Videos urza likes', + 'id': 'user755559', + 'title': 'urza’s Likes', }, }, { 'url': 'https://vimeo.com/stormlapse/likes', 'only_matching': True, }] + def _page_url(self, base_url, pagenum): + return '%s/page:%d/' % (base_url, pagenum) + def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id) - page_count = self._int( - self._search_regex( - r'''(?x)
  • - .*?
  • \s* - ''', webpage, 'page count', default=1), - 'page count', fatal=True) - PAGE_SIZE = 12 - title = self._html_search_regex( - r'(?s)

    (.+?)

    ', webpage, 'title', fatal=False) - description = self._html_search_meta('description', webpage) - - def _get_page(idx): - page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % ( - user_id, idx + 1) - webpage = self._download_webpage( - page_url, user_id, - note='Downloading page %d/%d' % (idx + 1, page_count)) - video_list = self._search_regex( - r'(?s)
      ]*>(.*?)
    ', - webpage, 'video content') - paths = re.findall( - r']*>\s* Date: Sun, 23 Jun 2019 02:16:09 +0700 Subject: [PATCH 0665/1201] [vimeo:channel,group] Make title extraction no fatal --- youtube_dl/extractor/vimeo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index aeee7df8f..b5b44a79a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -813,7 +813,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex( + self._TITLE_RE, webpage, 'list title', fatal=False) def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -954,7 +955,7 @@ class VimeoGroupsIE(VimeoAlbumIE): }] def _extract_list_title(self, webpage): - return self._og_search_title(webpage) + return self._og_search_title(webpage, fatal=False) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 695720ebe81166b3ee249eb3916e3c7819ef57a8 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Sat, 22 Jun 2019 22:31:43 +0100 Subject: [PATCH 0666/1201] [openload] Add support for oload.life (#21495) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32d546e4e..b2918dc85 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -244,7 +244,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -363,6 +363,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.website/embed/drTBl1aOTvk/', 'only_matching': True, + }, { + 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 3031b7c4ed3a446dc83123ce34780f4db56ad4ef Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Sun, 23 Jun 2019 19:04:05 +0900 Subject: [PATCH 0667/1201] [brightcove:new] Add support for playlists (#21331) --- youtube_dl/extractor/brightcove.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c0345e2c3..58ec5c979 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -483,7 +483,7 @@ class BrightcoveLegacyIE(InfoExtractor): class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' - _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*videoId=(?P\d+|ref:[^&]+)' + _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*(?Pvideo|playlist)Id=(?P\d+|ref:[^&]+)' _TESTS = [{ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'md5': 'c8100925723840d4b0d243f7025703be', @@ -516,6 +516,21 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } + }, { + # playlist stream + 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', + 'info_dict': { + 'id': '5718313430001', + 'title': 'No Audio Playlist', + }, + 'playlist_count': 7, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', + 'only_matching': True, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -715,7 +730,7 @@ class BrightcoveNewIE(AdobePassIE): 'ip_blocks': smuggled_data.get('geo_ip_blocks'), }) - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' @@ -736,7 +751,7 @@ class BrightcoveNewIE(AdobePassIE): r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = { 'Accept': 'application/json;pk=%s' % policy_key, } @@ -771,5 +786,12 @@ class BrightcoveNewIE(AdobePassIE): 'tveToken': tve_token, }) + if content_type == 'playlist': + return self.playlist_result( + [self._parse_brightcove_metadata(vid, vid.get('id'), headers) + for vid in json_data.get('videos', []) if vid.get('id')], + json_data.get('id'), json_data.get('name'), + json_data.get('description')) + return self._parse_brightcove_metadata( json_data, video_id, headers=headers) From 27cef8885de4ffaa33f96973df3c50b62504bd49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Jun 2019 23:01:43 +0700 Subject: [PATCH 0668/1201] [beeg] Add support for api/v6 v2 URLs (closes #21511) --- youtube_dl/extractor/beeg.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 192f11ea6..c15a0ac8f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, unified_timestamp, @@ -11,6 +14,7 @@ from ..utils import ( class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P\d+)' _TESTS = [{ + # api/v6 v1 'url': 'http://beeg.com/5416503', 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { @@ -24,6 +28,10 @@ class BeegIE(InfoExtractor): 'tags': list, 'age_limit': 18, } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -41,11 +49,22 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} + for api_path in ('', 'api.'): video = self._download_json( 'https://%sbeeg.com/api/v6/%s/video/%s' % (api_path, beeg_version, video_id), video_id, - fatal=api_path == 'api.') + fatal=api_path == 'api.', query=query) if video: break From 1d83e9bd4b2dbc854f6f8b7f4baa14602a288c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Jun 2019 00:12:31 +0700 Subject: [PATCH 0669/1201] [nfb] Remove extractor (closes #21518) Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/nfb.py | 112 ----------------------------- 2 files changed, 113 deletions(-) delete mode 100644 youtube_dl/extractor/nfb.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9cd7d3ac4..530474f3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -745,7 +745,6 @@ from .nexx import ( NexxIE, NexxEmbedIE, ) -from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE from .nhl import NHLIE diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py deleted file mode 100644 index adcc636bc..000000000 --- a/youtube_dl/extractor/nfb.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - determine_ext, - int_or_none, - qualities, - urlencode_postdata, - xpath_text, -) - - -class NFBIE(InfoExtractor): - IE_NAME = 'nfb' - IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' - - _TEST = { - 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', - 'info_dict': { - 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'flv', - 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:6b8e32dde3abf91e58857b174916620c', - 'duration': 3128, - 'creator': 'Mark Sandiford', - 'uploader': 'Mark Sandiford', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_xml( - 'https://www.nfb.ca/film/%s/player_config' % video_id, - video_id, 'Downloading player config XML', - data=urlencode_postdata({'getConfig': 'true'}), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' - }) - - title, description, thumbnail, duration, uploader, author = [None] * 6 - thumbnails, formats = [[]] * 2 - subtitles = {} - - for media in config.findall('./player/stream/media'): - if media.get('type') == 'posterImage': - quality_key = qualities(('low', 'high')) - thumbnails = [] - for asset in media.findall('assets/asset'): - asset_url = xpath_text(asset, 'default/url', default=None) - if not asset_url: - continue - quality = asset.get('quality') - thumbnails.append({ - 'url': asset_url, - 'id': quality, - 'preference': quality_key(quality), - }) - elif media.get('type') == 'video': - title = xpath_text(media, 'title', fatal=True) - for asset in media.findall('assets/asset'): - quality = asset.get('quality') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', quality or '', 'height', default=None)) - for node in asset: - streamer = xpath_text(node, 'streamerURI', default=None) - if not streamer: - continue - play_path = xpath_text(node, 'url', default=None) - if not play_path: - continue - formats.append({ - 'url': streamer, - 'app': streamer.split('/', 3)[3], - 'play_path': play_path, - 'rtmp_live': False, - 'ext': 'flv', - 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, - 'height': height, - }) - self._sort_formats(formats) - description = clean_html(xpath_text(media, 'description')) - uploader = xpath_text(media, 'author') - duration = int_or_none(media.get('duration')) - for subtitle in media.findall('./subtitles/subtitle'): - subtitle_url = xpath_text(subtitle, 'url', default=None) - if not subtitle_url: - continue - lang = xpath_text(subtitle, 'lang', default='en') - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'duration': duration, - 'creator': uploader, - 'uploader': uploader, - 'formats': formats, - 'subtitles': subtitles, - } From 509bcec37ba26a8c7bc263cf8067495ec7cf120a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 27 Jun 2019 12:06:09 +0100 Subject: [PATCH 0670/1201] [fusion] fix extraction(closes #17775)(closes #21269) --- youtube_dl/extractor/fusion.py | 69 +++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index 25e284d46..a3f44b812 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -1,35 +1,84 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import ( + determine_ext, + int_or_none, + mimetype2ext, + parse_iso8601, +) class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P\d+)' _TESTS = [{ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', 'info_dict': { - 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', + 'id': '3145868', 'ext': 'mp4', 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', 'duration': 140.0, + 'timestamp': 1442589635, + 'uploader': 'UNIVISON', + 'upload_date': '20150918', }, 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Anvato'], }, { 'url': 'http://fusion.tv/video/201781', 'only_matching': True, + }, { + 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) + video = self._download_json( + 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id) - ooyala_code = self._search_regex( - r'data-ooyala-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') + info = { + 'id': video_id, + 'title': video['title'], + 'description': video.get('excerpt'), + 'timestamp': parse_iso8601(video.get('published')), + 'series': video.get('show'), + } - return OoyalaIE._build_url_result(ooyala_code) + formats = [] + src = video.get('src') or {} + for f_id, f in src.items(): + for q_id, q in f.items(): + q_url = q.get('url') + if not q_url: + continue + ext = determine_ext(q_url, mimetype2ext(q.get('type'))) + if ext == 'smil': + formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False)) + elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'): + formats.extend(self._extract_m3u8_formats( + q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': '-'.join([f_id, q_id]), + 'url': q_url, + 'width': int_or_none(q.get('width')), + 'height': int_or_none(q.get('height')), + 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')), + 'ext': 'mp4' if ext == 'm3u8' else ext, + 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + else: + info.update({ + '_type': 'url', + 'url': 'anvato:uni:' + video['video_ids']['anvato'], + 'ie_key': 'Anvato', + }) + + return info From f5629946608861097b6ce5095efb9a9e8ac7f056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:18:10 +0700 Subject: [PATCH 0671/1201] [drtv] Relax _VALID_URL --- youtube_dl/extractor/drtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 0c7e350f0..218f10209 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -24,7 +24,7 @@ from ..utils import ( class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _GEO_BYPASS = False _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' @@ -80,6 +80,9 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'only_matching': True, }] def _real_extract(self, url): From 6625bf200d08baf64764e99caa48b4fb3a48ff8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 27 Jun 2019 17:24:46 +0200 Subject: [PATCH 0672/1201] [mixer:vod] Relax _VALID_URL (closes #21531) (#21536) --- youtube_dl/extractor/beampro.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index 2eaec1ab4..e264a145f 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\w+)' + _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', 'info_dict': { @@ -119,7 +119,10 @@ class BeamProVodIE(BeamProBaseIE): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', + 'only_matching': True, + }] @staticmethod def _extract_format(vod, vod_type): From 4f71473ef186c0797596e96755e86df80f357a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:59:30 +0700 Subject: [PATCH 0673/1201] [go] Add support for disneynow.com (closes #21528) --- youtube_dl/extractor/go.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 206d89e82..5916f9a8f 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,7 @@ class GoIE(AdobePassIE): 'resource_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -71,6 +71,9 @@ class GoIE(AdobePassIE): # brand 008 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', 'only_matching': True, + }, { + 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', + 'only_matching': True, }] def _extract_videos(self, brand, video_id='-1', show_id='-1'): @@ -89,7 +92,7 @@ class GoIE(AdobePassIE): # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', - default=None) + default=video_id) if not site_info: brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', From 232331c0d2f446af760403ed5a0439cdc3deb112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:55:15 +0700 Subject: [PATCH 0674/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2d9988da3..985d14a28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version + +Extractors ++ [go] Add support for disneynow.com (#21528) +* [mixer:vod] Relax URL regular expression (#21531, #21536) +* [drtv] Relax URL regular expression +* [fusion] Fix extraction (#17775, #21269) +- [nfb] Remove extractor (#21518) ++ [beeg] Add support for api/v6 v2 URLs (#21511) ++ [brightcove:new] Add support for playlists (#21331) ++ [openload] Add support for oload.life (#21495) +* [vimeo:channel,group] Make title extraction non fatal +* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) ++ [pornhub] Add support for more paged video sources ++ [pornhub] Add support for downloading single pages and search pages (#15570) +* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) ++ [youtube] Add another signature function pattern +* [tf1] Fix extraction (#21365, #21372) +* [crunchyroll] Move Accept-Language workaround to video extractor since + it causes playlists not to list any videos +* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) + + version 2019.06.21 Core From 8c8cae91ece9841567aa48095245f92ae8f4b295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:57:33 +0700 Subject: [PATCH 0675/1201] release 2019.06.27 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 6 +++--- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 7a2b16827..d7c15e85a 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index d6180e672..741862590 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7cb981abf..4fb035ea4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 802fa2313..73ed62012 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5153864a1..a9d3653e2 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 985d14a28..4ae3d6c7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.27 Extractors + [go] Add support for disneynow.com (#21528) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bfd15b4dc..55ae43144 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -581,7 +581,6 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** - - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** - **nhl.com** @@ -692,8 +691,9 @@ - **PornerBros** - **PornHd** - **PornHub**: PornHub and Thumbzilla - - **PornHubPlaylist** - - **PornHubUserVideos** + - **PornHubPagedVideoList** + - **PornHubUser** + - **PornHubUserVideosUpload** - **Pornotube** - **PornoVoisines** - **PornoXO** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 33474a452..01896873d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.21' +__version__ = '2019.06.27' From f7a147e3b63a3165c425c56ee19e66f86900128c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Jun 2019 00:32:43 +0700 Subject: [PATCH 0676/1201] [utils] Introduce random_user_agent and use as default User-Agent (closes #21546) --- youtube_dl/extractor/openload.py | 1595 +----------------------------- youtube_dl/utils.py | 1586 ++++++++++++++++++++++++++++- 2 files changed, 1590 insertions(+), 1591 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b2918dc85..237b0d8fb 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import json import os -import random import re import subprocess import tempfile @@ -380,1595 +379,15 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '74.0.3729.129', - '76.0.3780.3', - '76.0.3780.2', - '74.0.3729.128', - '76.0.3780.1', - '76.0.3780.0', - '75.0.3770.15', - '74.0.3729.127', - '74.0.3729.126', - '76.0.3779.1', - '76.0.3779.0', - '75.0.3770.14', - '74.0.3729.125', - '76.0.3778.1', - '76.0.3778.0', - '75.0.3770.13', - '74.0.3729.124', - '74.0.3729.123', - '73.0.3683.121', - '76.0.3777.1', - '76.0.3777.0', - '75.0.3770.12', - '74.0.3729.122', - '76.0.3776.4', - '75.0.3770.11', - '74.0.3729.121', - '76.0.3776.3', - '76.0.3776.2', - '73.0.3683.120', - '74.0.3729.120', - '74.0.3729.119', - '74.0.3729.118', - '76.0.3776.1', - '76.0.3776.0', - '76.0.3775.5', - '75.0.3770.10', - '74.0.3729.117', - '76.0.3775.4', - '76.0.3775.3', - '74.0.3729.116', - '75.0.3770.9', - '76.0.3775.2', - '76.0.3775.1', - '76.0.3775.0', - '75.0.3770.8', - '74.0.3729.115', - '74.0.3729.114', - '76.0.3774.1', - '76.0.3774.0', - '75.0.3770.7', - '74.0.3729.113', - '74.0.3729.112', - '74.0.3729.111', - '76.0.3773.1', - '76.0.3773.0', - '75.0.3770.6', - '74.0.3729.110', - '74.0.3729.109', - '76.0.3772.1', - '76.0.3772.0', - '75.0.3770.5', - '74.0.3729.108', - '74.0.3729.107', - '76.0.3771.1', - '76.0.3771.0', - '75.0.3770.4', - '74.0.3729.106', - '74.0.3729.105', - '75.0.3770.3', - '74.0.3729.104', - '74.0.3729.103', - '74.0.3729.102', - '75.0.3770.2', - '74.0.3729.101', - '75.0.3770.1', - '75.0.3770.0', - '74.0.3729.100', - '75.0.3769.5', - '75.0.3769.4', - '74.0.3729.99', - '75.0.3769.3', - '75.0.3769.2', - '75.0.3768.6', - '74.0.3729.98', - '75.0.3769.1', - '75.0.3769.0', - '74.0.3729.97', - '73.0.3683.119', - '73.0.3683.118', - '74.0.3729.96', - '75.0.3768.5', - '75.0.3768.4', - '75.0.3768.3', - '75.0.3768.2', - '74.0.3729.95', - '74.0.3729.94', - '75.0.3768.1', - '75.0.3768.0', - '74.0.3729.93', - '74.0.3729.92', - '73.0.3683.117', - '74.0.3729.91', - '75.0.3766.3', - '74.0.3729.90', - '75.0.3767.2', - '75.0.3767.1', - '75.0.3767.0', - '74.0.3729.89', - '73.0.3683.116', - '75.0.3766.2', - '74.0.3729.88', - '75.0.3766.1', - '75.0.3766.0', - '74.0.3729.87', - '73.0.3683.115', - '74.0.3729.86', - '75.0.3765.1', - '75.0.3765.0', - '74.0.3729.85', - '73.0.3683.114', - '74.0.3729.84', - '75.0.3764.1', - '75.0.3764.0', - '74.0.3729.83', - '73.0.3683.113', - '75.0.3763.2', - '75.0.3761.4', - '74.0.3729.82', - '75.0.3763.1', - '75.0.3763.0', - '74.0.3729.81', - '73.0.3683.112', - '75.0.3762.1', - '75.0.3762.0', - '74.0.3729.80', - '75.0.3761.3', - '74.0.3729.79', - '73.0.3683.111', - '75.0.3761.2', - '74.0.3729.78', - '74.0.3729.77', - '75.0.3761.1', - '75.0.3761.0', - '73.0.3683.110', - '74.0.3729.76', - '74.0.3729.75', - '75.0.3760.0', - '74.0.3729.74', - '75.0.3759.8', - '75.0.3759.7', - '75.0.3759.6', - '74.0.3729.73', - '75.0.3759.5', - '74.0.3729.72', - '73.0.3683.109', - '75.0.3759.4', - '75.0.3759.3', - '74.0.3729.71', - '75.0.3759.2', - '74.0.3729.70', - '73.0.3683.108', - '74.0.3729.69', - '75.0.3759.1', - '75.0.3759.0', - '74.0.3729.68', - '73.0.3683.107', - '74.0.3729.67', - '75.0.3758.1', - '75.0.3758.0', - '74.0.3729.66', - '73.0.3683.106', - '74.0.3729.65', - '75.0.3757.1', - '75.0.3757.0', - '74.0.3729.64', - '73.0.3683.105', - '74.0.3729.63', - '75.0.3756.1', - '75.0.3756.0', - '74.0.3729.62', - '73.0.3683.104', - '75.0.3755.3', - '75.0.3755.2', - '73.0.3683.103', - '75.0.3755.1', - '75.0.3755.0', - '74.0.3729.61', - '73.0.3683.102', - '74.0.3729.60', - '75.0.3754.2', - '74.0.3729.59', - '75.0.3753.4', - '74.0.3729.58', - '75.0.3754.1', - '75.0.3754.0', - '74.0.3729.57', - '73.0.3683.101', - '75.0.3753.3', - '75.0.3752.2', - '75.0.3753.2', - '74.0.3729.56', - '75.0.3753.1', - '75.0.3753.0', - '74.0.3729.55', - '73.0.3683.100', - '74.0.3729.54', - '75.0.3752.1', - '75.0.3752.0', - '74.0.3729.53', - '73.0.3683.99', - '74.0.3729.52', - '75.0.3751.1', - '75.0.3751.0', - '74.0.3729.51', - '73.0.3683.98', - '74.0.3729.50', - '75.0.3750.0', - '74.0.3729.49', - '74.0.3729.48', - '74.0.3729.47', - '75.0.3749.3', - '74.0.3729.46', - '73.0.3683.97', - '75.0.3749.2', - '74.0.3729.45', - '75.0.3749.1', - '75.0.3749.0', - '74.0.3729.44', - '73.0.3683.96', - '74.0.3729.43', - '74.0.3729.42', - '75.0.3748.1', - '75.0.3748.0', - '74.0.3729.41', - '75.0.3747.1', - '73.0.3683.95', - '75.0.3746.4', - '74.0.3729.40', - '74.0.3729.39', - '75.0.3747.0', - '75.0.3746.3', - '75.0.3746.2', - '74.0.3729.38', - '75.0.3746.1', - '75.0.3746.0', - '74.0.3729.37', - '73.0.3683.94', - '75.0.3745.5', - '75.0.3745.4', - '75.0.3745.3', - '75.0.3745.2', - '74.0.3729.36', - '75.0.3745.1', - '75.0.3745.0', - '75.0.3744.2', - '74.0.3729.35', - '73.0.3683.93', - '74.0.3729.34', - '75.0.3744.1', - '75.0.3744.0', - '74.0.3729.33', - '73.0.3683.92', - '74.0.3729.32', - '74.0.3729.31', - '73.0.3683.91', - '75.0.3741.2', - '75.0.3740.5', - '74.0.3729.30', - '75.0.3741.1', - '75.0.3741.0', - '74.0.3729.29', - '75.0.3740.4', - '73.0.3683.90', - '74.0.3729.28', - '75.0.3740.3', - '73.0.3683.89', - '75.0.3740.2', - '74.0.3729.27', - '75.0.3740.1', - '75.0.3740.0', - '74.0.3729.26', - '73.0.3683.88', - '73.0.3683.87', - '74.0.3729.25', - '75.0.3739.1', - '75.0.3739.0', - '73.0.3683.86', - '74.0.3729.24', - '73.0.3683.85', - '75.0.3738.4', - '75.0.3738.3', - '75.0.3738.2', - '75.0.3738.1', - '75.0.3738.0', - '74.0.3729.23', - '73.0.3683.84', - '74.0.3729.22', - '74.0.3729.21', - '75.0.3737.1', - '75.0.3737.0', - '74.0.3729.20', - '73.0.3683.83', - '74.0.3729.19', - '75.0.3736.1', - '75.0.3736.0', - '74.0.3729.18', - '73.0.3683.82', - '74.0.3729.17', - '75.0.3735.1', - '75.0.3735.0', - '74.0.3729.16', - '73.0.3683.81', - '75.0.3734.1', - '75.0.3734.0', - '74.0.3729.15', - '73.0.3683.80', - '74.0.3729.14', - '75.0.3733.1', - '75.0.3733.0', - '75.0.3732.1', - '74.0.3729.13', - '74.0.3729.12', - '73.0.3683.79', - '74.0.3729.11', - '75.0.3732.0', - '74.0.3729.10', - '73.0.3683.78', - '74.0.3729.9', - '74.0.3729.8', - '74.0.3729.7', - '75.0.3731.3', - '75.0.3731.2', - '75.0.3731.0', - '74.0.3729.6', - '73.0.3683.77', - '73.0.3683.76', - '75.0.3730.5', - '75.0.3730.4', - '73.0.3683.75', - '74.0.3729.5', - '73.0.3683.74', - '75.0.3730.3', - '75.0.3730.2', - '74.0.3729.4', - '73.0.3683.73', - '73.0.3683.72', - '75.0.3730.1', - '75.0.3730.0', - '74.0.3729.3', - '73.0.3683.71', - '74.0.3729.2', - '73.0.3683.70', - '74.0.3729.1', - '74.0.3729.0', - '74.0.3726.4', - '73.0.3683.69', - '74.0.3726.3', - '74.0.3728.0', - '74.0.3726.2', - '73.0.3683.68', - '74.0.3726.1', - '74.0.3726.0', - '74.0.3725.4', - '73.0.3683.67', - '73.0.3683.66', - '74.0.3725.3', - '74.0.3725.2', - '74.0.3725.1', - '74.0.3724.8', - '74.0.3725.0', - '73.0.3683.65', - '74.0.3724.7', - '74.0.3724.6', - '74.0.3724.5', - '74.0.3724.4', - '74.0.3724.3', - '74.0.3724.2', - '74.0.3724.1', - '74.0.3724.0', - '73.0.3683.64', - '74.0.3723.1', - '74.0.3723.0', - '73.0.3683.63', - '74.0.3722.1', - '74.0.3722.0', - '73.0.3683.62', - '74.0.3718.9', - '74.0.3702.3', - '74.0.3721.3', - '74.0.3721.2', - '74.0.3721.1', - '74.0.3721.0', - '74.0.3720.6', - '73.0.3683.61', - '72.0.3626.122', - '73.0.3683.60', - '74.0.3720.5', - '72.0.3626.121', - '74.0.3718.8', - '74.0.3720.4', - '74.0.3720.3', - '74.0.3718.7', - '74.0.3720.2', - '74.0.3720.1', - '74.0.3720.0', - '74.0.3718.6', - '74.0.3719.5', - '73.0.3683.59', - '74.0.3718.5', - '74.0.3718.4', - '74.0.3719.4', - '74.0.3719.3', - '74.0.3719.2', - '74.0.3719.1', - '73.0.3683.58', - '74.0.3719.0', - '73.0.3683.57', - '73.0.3683.56', - '74.0.3718.3', - '73.0.3683.55', - '74.0.3718.2', - '74.0.3718.1', - '74.0.3718.0', - '73.0.3683.54', - '74.0.3717.2', - '73.0.3683.53', - '74.0.3717.1', - '74.0.3717.0', - '73.0.3683.52', - '74.0.3716.1', - '74.0.3716.0', - '73.0.3683.51', - '74.0.3715.1', - '74.0.3715.0', - '73.0.3683.50', - '74.0.3711.2', - '74.0.3714.2', - '74.0.3713.3', - '74.0.3714.1', - '74.0.3714.0', - '73.0.3683.49', - '74.0.3713.1', - '74.0.3713.0', - '72.0.3626.120', - '73.0.3683.48', - '74.0.3712.2', - '74.0.3712.1', - '74.0.3712.0', - '73.0.3683.47', - '72.0.3626.119', - '73.0.3683.46', - '74.0.3710.2', - '72.0.3626.118', - '74.0.3711.1', - '74.0.3711.0', - '73.0.3683.45', - '72.0.3626.117', - '74.0.3710.1', - '74.0.3710.0', - '73.0.3683.44', - '72.0.3626.116', - '74.0.3709.1', - '74.0.3709.0', - '74.0.3704.9', - '73.0.3683.43', - '72.0.3626.115', - '74.0.3704.8', - '74.0.3704.7', - '74.0.3708.0', - '74.0.3706.7', - '74.0.3704.6', - '73.0.3683.42', - '72.0.3626.114', - '74.0.3706.6', - '72.0.3626.113', - '74.0.3704.5', - '74.0.3706.5', - '74.0.3706.4', - '74.0.3706.3', - '74.0.3706.2', - '74.0.3706.1', - '74.0.3706.0', - '73.0.3683.41', - '72.0.3626.112', - '74.0.3705.1', - '74.0.3705.0', - '73.0.3683.40', - '72.0.3626.111', - '73.0.3683.39', - '74.0.3704.4', - '73.0.3683.38', - '74.0.3704.3', - '74.0.3704.2', - '74.0.3704.1', - '74.0.3704.0', - '73.0.3683.37', - '72.0.3626.110', - '72.0.3626.109', - '74.0.3703.3', - '74.0.3703.2', - '73.0.3683.36', - '74.0.3703.1', - '74.0.3703.0', - '73.0.3683.35', - '72.0.3626.108', - '74.0.3702.2', - '74.0.3699.3', - '74.0.3702.1', - '74.0.3702.0', - '73.0.3683.34', - '72.0.3626.107', - '73.0.3683.33', - '74.0.3701.1', - '74.0.3701.0', - '73.0.3683.32', - '73.0.3683.31', - '72.0.3626.105', - '74.0.3700.1', - '74.0.3700.0', - '73.0.3683.29', - '72.0.3626.103', - '74.0.3699.2', - '74.0.3699.1', - '74.0.3699.0', - '73.0.3683.28', - '72.0.3626.102', - '73.0.3683.27', - '73.0.3683.26', - '74.0.3698.0', - '74.0.3696.2', - '72.0.3626.101', - '73.0.3683.25', - '74.0.3696.1', - '74.0.3696.0', - '74.0.3694.8', - '72.0.3626.100', - '74.0.3694.7', - '74.0.3694.6', - '74.0.3694.5', - '74.0.3694.4', - '72.0.3626.99', - '72.0.3626.98', - '74.0.3694.3', - '73.0.3683.24', - '72.0.3626.97', - '72.0.3626.96', - '72.0.3626.95', - '73.0.3683.23', - '72.0.3626.94', - '73.0.3683.22', - '73.0.3683.21', - '72.0.3626.93', - '74.0.3694.2', - '72.0.3626.92', - '74.0.3694.1', - '74.0.3694.0', - '74.0.3693.6', - '73.0.3683.20', - '72.0.3626.91', - '74.0.3693.5', - '74.0.3693.4', - '74.0.3693.3', - '74.0.3693.2', - '73.0.3683.19', - '74.0.3693.1', - '74.0.3693.0', - '73.0.3683.18', - '72.0.3626.90', - '74.0.3692.1', - '74.0.3692.0', - '73.0.3683.17', - '72.0.3626.89', - '74.0.3687.3', - '74.0.3691.1', - '74.0.3691.0', - '73.0.3683.16', - '72.0.3626.88', - '72.0.3626.87', - '73.0.3683.15', - '74.0.3690.1', - '74.0.3690.0', - '73.0.3683.14', - '72.0.3626.86', - '73.0.3683.13', - '73.0.3683.12', - '74.0.3689.1', - '74.0.3689.0', - '73.0.3683.11', - '72.0.3626.85', - '73.0.3683.10', - '72.0.3626.84', - '73.0.3683.9', - '74.0.3688.1', - '74.0.3688.0', - '73.0.3683.8', - '72.0.3626.83', - '74.0.3687.2', - '74.0.3687.1', - '74.0.3687.0', - '73.0.3683.7', - '72.0.3626.82', - '74.0.3686.4', - '72.0.3626.81', - '74.0.3686.3', - '74.0.3686.2', - '74.0.3686.1', - '74.0.3686.0', - '73.0.3683.6', - '72.0.3626.80', - '74.0.3685.1', - '74.0.3685.0', - '73.0.3683.5', - '72.0.3626.79', - '74.0.3684.1', - '74.0.3684.0', - '73.0.3683.4', - '72.0.3626.78', - '72.0.3626.77', - '73.0.3683.3', - '73.0.3683.2', - '72.0.3626.76', - '73.0.3683.1', - '73.0.3683.0', - '72.0.3626.75', - '71.0.3578.141', - '73.0.3682.1', - '73.0.3682.0', - '72.0.3626.74', - '71.0.3578.140', - '73.0.3681.4', - '73.0.3681.3', - '73.0.3681.2', - '73.0.3681.1', - '73.0.3681.0', - '72.0.3626.73', - '71.0.3578.139', - '72.0.3626.72', - '72.0.3626.71', - '73.0.3680.1', - '73.0.3680.0', - '72.0.3626.70', - '71.0.3578.138', - '73.0.3678.2', - '73.0.3679.1', - '73.0.3679.0', - '72.0.3626.69', - '71.0.3578.137', - '73.0.3678.1', - '73.0.3678.0', - '71.0.3578.136', - '73.0.3677.1', - '73.0.3677.0', - '72.0.3626.68', - '72.0.3626.67', - '71.0.3578.135', - '73.0.3676.1', - '73.0.3676.0', - '73.0.3674.2', - '72.0.3626.66', - '71.0.3578.134', - '73.0.3674.1', - '73.0.3674.0', - '72.0.3626.65', - '71.0.3578.133', - '73.0.3673.2', - '73.0.3673.1', - '73.0.3673.0', - '72.0.3626.64', - '71.0.3578.132', - '72.0.3626.63', - '72.0.3626.62', - '72.0.3626.61', - '72.0.3626.60', - '73.0.3672.1', - '73.0.3672.0', - '72.0.3626.59', - '71.0.3578.131', - '73.0.3671.3', - '73.0.3671.2', - '73.0.3671.1', - '73.0.3671.0', - '72.0.3626.58', - '71.0.3578.130', - '73.0.3670.1', - '73.0.3670.0', - '72.0.3626.57', - '71.0.3578.129', - '73.0.3669.1', - '73.0.3669.0', - '72.0.3626.56', - '71.0.3578.128', - '73.0.3668.2', - '73.0.3668.1', - '73.0.3668.0', - '72.0.3626.55', - '71.0.3578.127', - '73.0.3667.2', - '73.0.3667.1', - '73.0.3667.0', - '72.0.3626.54', - '71.0.3578.126', - '73.0.3666.1', - '73.0.3666.0', - '72.0.3626.53', - '71.0.3578.125', - '73.0.3665.4', - '73.0.3665.3', - '72.0.3626.52', - '73.0.3665.2', - '73.0.3664.4', - '73.0.3665.1', - '73.0.3665.0', - '72.0.3626.51', - '71.0.3578.124', - '72.0.3626.50', - '73.0.3664.3', - '73.0.3664.2', - '73.0.3664.1', - '73.0.3664.0', - '73.0.3663.2', - '72.0.3626.49', - '71.0.3578.123', - '73.0.3663.1', - '73.0.3663.0', - '72.0.3626.48', - '71.0.3578.122', - '73.0.3662.1', - '73.0.3662.0', - '72.0.3626.47', - '71.0.3578.121', - '73.0.3661.1', - '72.0.3626.46', - '73.0.3661.0', - '72.0.3626.45', - '71.0.3578.120', - '73.0.3660.2', - '73.0.3660.1', - '73.0.3660.0', - '72.0.3626.44', - '71.0.3578.119', - '73.0.3659.1', - '73.0.3659.0', - '72.0.3626.43', - '71.0.3578.118', - '73.0.3658.1', - '73.0.3658.0', - '72.0.3626.42', - '71.0.3578.117', - '73.0.3657.1', - '73.0.3657.0', - '72.0.3626.41', - '71.0.3578.116', - '73.0.3656.1', - '73.0.3656.0', - '72.0.3626.40', - '71.0.3578.115', - '73.0.3655.1', - '73.0.3655.0', - '72.0.3626.39', - '71.0.3578.114', - '73.0.3654.1', - '73.0.3654.0', - '72.0.3626.38', - '71.0.3578.113', - '73.0.3653.1', - '73.0.3653.0', - '72.0.3626.37', - '71.0.3578.112', - '73.0.3652.1', - '73.0.3652.0', - '72.0.3626.36', - '71.0.3578.111', - '73.0.3651.1', - '73.0.3651.0', - '72.0.3626.35', - '71.0.3578.110', - '73.0.3650.1', - '73.0.3650.0', - '72.0.3626.34', - '71.0.3578.109', - '73.0.3649.1', - '73.0.3649.0', - '72.0.3626.33', - '71.0.3578.108', - '73.0.3648.2', - '73.0.3648.1', - '73.0.3648.0', - '72.0.3626.32', - '71.0.3578.107', - '73.0.3647.2', - '73.0.3647.1', - '73.0.3647.0', - '72.0.3626.31', - '71.0.3578.106', - '73.0.3635.3', - '73.0.3646.2', - '73.0.3646.1', - '73.0.3646.0', - '72.0.3626.30', - '71.0.3578.105', - '72.0.3626.29', - '73.0.3645.2', - '73.0.3645.1', - '73.0.3645.0', - '72.0.3626.28', - '71.0.3578.104', - '72.0.3626.27', - '72.0.3626.26', - '72.0.3626.25', - '72.0.3626.24', - '73.0.3644.0', - '73.0.3643.2', - '72.0.3626.23', - '71.0.3578.103', - '73.0.3643.1', - '73.0.3643.0', - '72.0.3626.22', - '71.0.3578.102', - '73.0.3642.1', - '73.0.3642.0', - '72.0.3626.21', - '71.0.3578.101', - '73.0.3641.1', - '73.0.3641.0', - '72.0.3626.20', - '71.0.3578.100', - '72.0.3626.19', - '73.0.3640.1', - '73.0.3640.0', - '72.0.3626.18', - '73.0.3639.1', - '71.0.3578.99', - '73.0.3639.0', - '72.0.3626.17', - '73.0.3638.2', - '72.0.3626.16', - '73.0.3638.1', - '73.0.3638.0', - '72.0.3626.15', - '71.0.3578.98', - '73.0.3635.2', - '71.0.3578.97', - '73.0.3637.1', - '73.0.3637.0', - '72.0.3626.14', - '71.0.3578.96', - '71.0.3578.95', - '72.0.3626.13', - '71.0.3578.94', - '73.0.3636.2', - '71.0.3578.93', - '73.0.3636.1', - '73.0.3636.0', - '72.0.3626.12', - '71.0.3578.92', - '73.0.3635.1', - '73.0.3635.0', - '72.0.3626.11', - '71.0.3578.91', - '73.0.3634.2', - '73.0.3634.1', - '73.0.3634.0', - '72.0.3626.10', - '71.0.3578.90', - '71.0.3578.89', - '73.0.3633.2', - '73.0.3633.1', - '73.0.3633.0', - '72.0.3610.4', - '72.0.3626.9', - '71.0.3578.88', - '73.0.3632.5', - '73.0.3632.4', - '73.0.3632.3', - '73.0.3632.2', - '73.0.3632.1', - '73.0.3632.0', - '72.0.3626.8', - '71.0.3578.87', - '73.0.3631.2', - '73.0.3631.1', - '73.0.3631.0', - '72.0.3626.7', - '71.0.3578.86', - '72.0.3626.6', - '73.0.3630.1', - '73.0.3630.0', - '72.0.3626.5', - '71.0.3578.85', - '72.0.3626.4', - '73.0.3628.3', - '73.0.3628.2', - '73.0.3629.1', - '73.0.3629.0', - '72.0.3626.3', - '71.0.3578.84', - '73.0.3628.1', - '73.0.3628.0', - '71.0.3578.83', - '73.0.3627.1', - '73.0.3627.0', - '72.0.3626.2', - '71.0.3578.82', - '71.0.3578.81', - '71.0.3578.80', - '72.0.3626.1', - '72.0.3626.0', - '71.0.3578.79', - '70.0.3538.124', - '71.0.3578.78', - '72.0.3623.4', - '72.0.3625.2', - '72.0.3625.1', - '72.0.3625.0', - '71.0.3578.77', - '70.0.3538.123', - '72.0.3624.4', - '72.0.3624.3', - '72.0.3624.2', - '71.0.3578.76', - '72.0.3624.1', - '72.0.3624.0', - '72.0.3623.3', - '71.0.3578.75', - '70.0.3538.122', - '71.0.3578.74', - '72.0.3623.2', - '72.0.3610.3', - '72.0.3623.1', - '72.0.3623.0', - '72.0.3622.3', - '72.0.3622.2', - '71.0.3578.73', - '70.0.3538.121', - '72.0.3622.1', - '72.0.3622.0', - '71.0.3578.72', - '70.0.3538.120', - '72.0.3621.1', - '72.0.3621.0', - '71.0.3578.71', - '70.0.3538.119', - '72.0.3620.1', - '72.0.3620.0', - '71.0.3578.70', - '70.0.3538.118', - '71.0.3578.69', - '72.0.3619.1', - '72.0.3619.0', - '71.0.3578.68', - '70.0.3538.117', - '71.0.3578.67', - '72.0.3618.1', - '72.0.3618.0', - '71.0.3578.66', - '70.0.3538.116', - '72.0.3617.1', - '72.0.3617.0', - '71.0.3578.65', - '70.0.3538.115', - '72.0.3602.3', - '71.0.3578.64', - '72.0.3616.1', - '72.0.3616.0', - '71.0.3578.63', - '70.0.3538.114', - '71.0.3578.62', - '72.0.3615.1', - '72.0.3615.0', - '71.0.3578.61', - '70.0.3538.113', - '72.0.3614.1', - '72.0.3614.0', - '71.0.3578.60', - '70.0.3538.112', - '72.0.3613.1', - '72.0.3613.0', - '71.0.3578.59', - '70.0.3538.111', - '72.0.3612.2', - '72.0.3612.1', - '72.0.3612.0', - '70.0.3538.110', - '71.0.3578.58', - '70.0.3538.109', - '72.0.3611.2', - '72.0.3611.1', - '72.0.3611.0', - '71.0.3578.57', - '70.0.3538.108', - '72.0.3610.2', - '71.0.3578.56', - '71.0.3578.55', - '72.0.3610.1', - '72.0.3610.0', - '71.0.3578.54', - '70.0.3538.107', - '71.0.3578.53', - '72.0.3609.3', - '71.0.3578.52', - '72.0.3609.2', - '71.0.3578.51', - '72.0.3608.5', - '72.0.3609.1', - '72.0.3609.0', - '71.0.3578.50', - '70.0.3538.106', - '72.0.3608.4', - '72.0.3608.3', - '72.0.3608.2', - '71.0.3578.49', - '72.0.3608.1', - '72.0.3608.0', - '70.0.3538.105', - '71.0.3578.48', - '72.0.3607.1', - '72.0.3607.0', - '71.0.3578.47', - '70.0.3538.104', - '72.0.3606.2', - '72.0.3606.1', - '72.0.3606.0', - '71.0.3578.46', - '70.0.3538.103', - '70.0.3538.102', - '72.0.3605.3', - '72.0.3605.2', - '72.0.3605.1', - '72.0.3605.0', - '71.0.3578.45', - '70.0.3538.101', - '71.0.3578.44', - '71.0.3578.43', - '70.0.3538.100', - '70.0.3538.99', - '71.0.3578.42', - '72.0.3604.1', - '72.0.3604.0', - '71.0.3578.41', - '70.0.3538.98', - '71.0.3578.40', - '72.0.3603.2', - '72.0.3603.1', - '72.0.3603.0', - '71.0.3578.39', - '70.0.3538.97', - '72.0.3602.2', - '71.0.3578.38', - '71.0.3578.37', - '72.0.3602.1', - '72.0.3602.0', - '71.0.3578.36', - '70.0.3538.96', - '72.0.3601.1', - '72.0.3601.0', - '71.0.3578.35', - '70.0.3538.95', - '72.0.3600.1', - '72.0.3600.0', - '71.0.3578.34', - '70.0.3538.94', - '72.0.3599.3', - '72.0.3599.2', - '72.0.3599.1', - '72.0.3599.0', - '71.0.3578.33', - '70.0.3538.93', - '72.0.3598.1', - '72.0.3598.0', - '71.0.3578.32', - '70.0.3538.87', - '72.0.3597.1', - '72.0.3597.0', - '72.0.3596.2', - '71.0.3578.31', - '70.0.3538.86', - '71.0.3578.30', - '71.0.3578.29', - '72.0.3596.1', - '72.0.3596.0', - '71.0.3578.28', - '70.0.3538.85', - '72.0.3595.2', - '72.0.3591.3', - '72.0.3595.1', - '72.0.3595.0', - '71.0.3578.27', - '70.0.3538.84', - '72.0.3594.1', - '72.0.3594.0', - '71.0.3578.26', - '70.0.3538.83', - '72.0.3593.2', - '72.0.3593.1', - '72.0.3593.0', - '71.0.3578.25', - '70.0.3538.82', - '72.0.3589.3', - '72.0.3592.2', - '72.0.3592.1', - '72.0.3592.0', - '71.0.3578.24', - '72.0.3589.2', - '70.0.3538.81', - '70.0.3538.80', - '72.0.3591.2', - '72.0.3591.1', - '72.0.3591.0', - '71.0.3578.23', - '70.0.3538.79', - '71.0.3578.22', - '72.0.3590.1', - '72.0.3590.0', - '71.0.3578.21', - '70.0.3538.78', - '70.0.3538.77', - '72.0.3589.1', - '72.0.3589.0', - '71.0.3578.20', - '70.0.3538.76', - '71.0.3578.19', - '70.0.3538.75', - '72.0.3588.1', - '72.0.3588.0', - '71.0.3578.18', - '70.0.3538.74', - '72.0.3586.2', - '72.0.3587.0', - '71.0.3578.17', - '70.0.3538.73', - '72.0.3586.1', - '72.0.3586.0', - '71.0.3578.16', - '70.0.3538.72', - '72.0.3585.1', - '72.0.3585.0', - '71.0.3578.15', - '70.0.3538.71', - '71.0.3578.14', - '72.0.3584.1', - '72.0.3584.0', - '71.0.3578.13', - '70.0.3538.70', - '72.0.3583.2', - '71.0.3578.12', - '72.0.3583.1', - '72.0.3583.0', - '71.0.3578.11', - '70.0.3538.69', - '71.0.3578.10', - '72.0.3582.0', - '72.0.3581.4', - '71.0.3578.9', - '70.0.3538.67', - '72.0.3581.3', - '72.0.3581.2', - '72.0.3581.1', - '72.0.3581.0', - '71.0.3578.8', - '70.0.3538.66', - '72.0.3580.1', - '72.0.3580.0', - '71.0.3578.7', - '70.0.3538.65', - '71.0.3578.6', - '72.0.3579.1', - '72.0.3579.0', - '71.0.3578.5', - '70.0.3538.64', - '71.0.3578.4', - '71.0.3578.3', - '71.0.3578.2', - '71.0.3578.1', - '71.0.3578.0', - '70.0.3538.63', - '69.0.3497.128', - '70.0.3538.62', - '70.0.3538.61', - '70.0.3538.60', - '70.0.3538.59', - '71.0.3577.1', - '71.0.3577.0', - '70.0.3538.58', - '69.0.3497.127', - '71.0.3576.2', - '71.0.3576.1', - '71.0.3576.0', - '70.0.3538.57', - '70.0.3538.56', - '71.0.3575.2', - '70.0.3538.55', - '69.0.3497.126', - '70.0.3538.54', - '71.0.3575.1', - '71.0.3575.0', - '71.0.3574.1', - '71.0.3574.0', - '70.0.3538.53', - '69.0.3497.125', - '70.0.3538.52', - '71.0.3573.1', - '71.0.3573.0', - '70.0.3538.51', - '69.0.3497.124', - '71.0.3572.1', - '71.0.3572.0', - '70.0.3538.50', - '69.0.3497.123', - '71.0.3571.2', - '70.0.3538.49', - '69.0.3497.122', - '71.0.3571.1', - '71.0.3571.0', - '70.0.3538.48', - '69.0.3497.121', - '71.0.3570.1', - '71.0.3570.0', - '70.0.3538.47', - '69.0.3497.120', - '71.0.3568.2', - '71.0.3569.1', - '71.0.3569.0', - '70.0.3538.46', - '69.0.3497.119', - '70.0.3538.45', - '71.0.3568.1', - '71.0.3568.0', - '70.0.3538.44', - '69.0.3497.118', - '70.0.3538.43', - '70.0.3538.42', - '71.0.3567.1', - '71.0.3567.0', - '70.0.3538.41', - '69.0.3497.117', - '71.0.3566.1', - '71.0.3566.0', - '70.0.3538.40', - '69.0.3497.116', - '71.0.3565.1', - '71.0.3565.0', - '70.0.3538.39', - '69.0.3497.115', - '71.0.3564.1', - '71.0.3564.0', - '70.0.3538.38', - '69.0.3497.114', - '71.0.3563.0', - '71.0.3562.2', - '70.0.3538.37', - '69.0.3497.113', - '70.0.3538.36', - '70.0.3538.35', - '71.0.3562.1', - '71.0.3562.0', - '70.0.3538.34', - '69.0.3497.112', - '70.0.3538.33', - '71.0.3561.1', - '71.0.3561.0', - '70.0.3538.32', - '69.0.3497.111', - '71.0.3559.6', - '71.0.3560.1', - '71.0.3560.0', - '71.0.3559.5', - '71.0.3559.4', - '70.0.3538.31', - '69.0.3497.110', - '71.0.3559.3', - '70.0.3538.30', - '69.0.3497.109', - '71.0.3559.2', - '71.0.3559.1', - '71.0.3559.0', - '70.0.3538.29', - '69.0.3497.108', - '71.0.3558.2', - '71.0.3558.1', - '71.0.3558.0', - '70.0.3538.28', - '69.0.3497.107', - '71.0.3557.2', - '71.0.3557.1', - '71.0.3557.0', - '70.0.3538.27', - '69.0.3497.106', - '71.0.3554.4', - '70.0.3538.26', - '71.0.3556.1', - '71.0.3556.0', - '70.0.3538.25', - '71.0.3554.3', - '69.0.3497.105', - '71.0.3554.2', - '70.0.3538.24', - '69.0.3497.104', - '71.0.3555.2', - '70.0.3538.23', - '71.0.3555.1', - '71.0.3555.0', - '70.0.3538.22', - '69.0.3497.103', - '71.0.3554.1', - '71.0.3554.0', - '70.0.3538.21', - '69.0.3497.102', - '71.0.3553.3', - '70.0.3538.20', - '69.0.3497.101', - '71.0.3553.2', - '69.0.3497.100', - '71.0.3553.1', - '71.0.3553.0', - '70.0.3538.19', - '69.0.3497.99', - '69.0.3497.98', - '69.0.3497.97', - '71.0.3552.6', - '71.0.3552.5', - '71.0.3552.4', - '71.0.3552.3', - '71.0.3552.2', - '71.0.3552.1', - '71.0.3552.0', - '70.0.3538.18', - '69.0.3497.96', - '71.0.3551.3', - '71.0.3551.2', - '71.0.3551.1', - '71.0.3551.0', - '70.0.3538.17', - '69.0.3497.95', - '71.0.3550.3', - '71.0.3550.2', - '71.0.3550.1', - '71.0.3550.0', - '70.0.3538.16', - '69.0.3497.94', - '71.0.3549.1', - '71.0.3549.0', - '70.0.3538.15', - '69.0.3497.93', - '69.0.3497.92', - '71.0.3548.1', - '71.0.3548.0', - '70.0.3538.14', - '69.0.3497.91', - '71.0.3547.1', - '71.0.3547.0', - '70.0.3538.13', - '69.0.3497.90', - '71.0.3546.2', - '69.0.3497.89', - '71.0.3546.1', - '71.0.3546.0', - '70.0.3538.12', - '69.0.3497.88', - '71.0.3545.4', - '71.0.3545.3', - '71.0.3545.2', - '71.0.3545.1', - '71.0.3545.0', - '70.0.3538.11', - '69.0.3497.87', - '71.0.3544.5', - '71.0.3544.4', - '71.0.3544.3', - '71.0.3544.2', - '71.0.3544.1', - '71.0.3544.0', - '69.0.3497.86', - '70.0.3538.10', - '69.0.3497.85', - '70.0.3538.9', - '69.0.3497.84', - '71.0.3543.4', - '70.0.3538.8', - '71.0.3543.3', - '71.0.3543.2', - '71.0.3543.1', - '71.0.3543.0', - '70.0.3538.7', - '69.0.3497.83', - '71.0.3542.2', - '71.0.3542.1', - '71.0.3542.0', - '70.0.3538.6', - '69.0.3497.82', - '69.0.3497.81', - '71.0.3541.1', - '71.0.3541.0', - '70.0.3538.5', - '69.0.3497.80', - '71.0.3540.1', - '71.0.3540.0', - '70.0.3538.4', - '69.0.3497.79', - '70.0.3538.3', - '71.0.3539.1', - '71.0.3539.0', - '69.0.3497.78', - '68.0.3440.134', - '69.0.3497.77', - '70.0.3538.2', - '70.0.3538.1', - '70.0.3538.0', - '69.0.3497.76', - '68.0.3440.133', - '69.0.3497.75', - '70.0.3537.2', - '70.0.3537.1', - '70.0.3537.0', - '69.0.3497.74', - '68.0.3440.132', - '70.0.3536.0', - '70.0.3535.5', - '70.0.3535.4', - '70.0.3535.3', - '69.0.3497.73', - '68.0.3440.131', - '70.0.3532.8', - '70.0.3532.7', - '69.0.3497.72', - '69.0.3497.71', - '70.0.3535.2', - '70.0.3535.1', - '70.0.3535.0', - '69.0.3497.70', - '68.0.3440.130', - '69.0.3497.69', - '68.0.3440.129', - '70.0.3534.4', - '70.0.3534.3', - '70.0.3534.2', - '70.0.3534.1', - '70.0.3534.0', - '69.0.3497.68', - '68.0.3440.128', - '70.0.3533.2', - '70.0.3533.1', - '70.0.3533.0', - '69.0.3497.67', - '68.0.3440.127', - '70.0.3532.6', - '70.0.3532.5', - '70.0.3532.4', - '69.0.3497.66', - '68.0.3440.126', - '70.0.3532.3', - '70.0.3532.2', - '70.0.3532.1', - '69.0.3497.60', - '69.0.3497.65', - '69.0.3497.64', - '70.0.3532.0', - '70.0.3531.0', - '70.0.3530.4', - '70.0.3530.3', - '70.0.3530.2', - '69.0.3497.58', - '68.0.3440.125', - '69.0.3497.57', - '69.0.3497.56', - '69.0.3497.55', - '69.0.3497.54', - '70.0.3530.1', - '70.0.3530.0', - '69.0.3497.53', - '68.0.3440.124', - '69.0.3497.52', - '70.0.3529.3', - '70.0.3529.2', - '70.0.3529.1', - '70.0.3529.0', - '69.0.3497.51', - '70.0.3528.4', - '68.0.3440.123', - '70.0.3528.3', - '70.0.3528.2', - '70.0.3528.1', - '70.0.3528.0', - '69.0.3497.50', - '68.0.3440.122', - '70.0.3527.1', - '70.0.3527.0', - '69.0.3497.49', - '68.0.3440.121', - '70.0.3526.1', - '70.0.3526.0', - '68.0.3440.120', - '69.0.3497.48', - '69.0.3497.47', - '68.0.3440.119', - '68.0.3440.118', - '70.0.3525.5', - '70.0.3525.4', - '70.0.3525.3', - '68.0.3440.117', - '69.0.3497.46', - '70.0.3525.2', - '70.0.3525.1', - '70.0.3525.0', - '69.0.3497.45', - '68.0.3440.116', - '70.0.3524.4', - '70.0.3524.3', - '69.0.3497.44', - '70.0.3524.2', - '70.0.3524.1', - '70.0.3524.0', - '70.0.3523.2', - '69.0.3497.43', - '68.0.3440.115', - '70.0.3505.9', - '69.0.3497.42', - '70.0.3505.8', - '70.0.3523.1', - '70.0.3523.0', - '69.0.3497.41', - '68.0.3440.114', - '70.0.3505.7', - '69.0.3497.40', - '70.0.3522.1', - '70.0.3522.0', - '70.0.3521.2', - '69.0.3497.39', - '68.0.3440.113', - '70.0.3505.6', - '70.0.3521.1', - '70.0.3521.0', - '69.0.3497.38', - '68.0.3440.112', - '70.0.3520.1', - '70.0.3520.0', - '69.0.3497.37', - '68.0.3440.111', - '70.0.3519.3', - '70.0.3519.2', - '70.0.3519.1', - '70.0.3519.0', - '69.0.3497.36', - '68.0.3440.110', - '70.0.3518.1', - '70.0.3518.0', - '69.0.3497.35', - '69.0.3497.34', - '68.0.3440.109', - '70.0.3517.1', - '70.0.3517.0', - '69.0.3497.33', - '68.0.3440.108', - '69.0.3497.32', - '70.0.3516.3', - '70.0.3516.2', - '70.0.3516.1', - '70.0.3516.0', - '69.0.3497.31', - '68.0.3440.107', - '70.0.3515.4', - '68.0.3440.106', - '70.0.3515.3', - '70.0.3515.2', - '70.0.3515.1', - '70.0.3515.0', - '69.0.3497.30', - '68.0.3440.105', - '68.0.3440.104', - '70.0.3514.2', - '70.0.3514.1', - '70.0.3514.0', - '69.0.3497.29', - '68.0.3440.103', - '70.0.3513.1', - '70.0.3513.0', - '69.0.3497.28', - ) - @classmethod def _extract_urls(cls, webpage): return re.findall( r']+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' % (cls._DOMAINS, cls._EMBED_WORD), webpage) - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) return webpage def _real_extract(self, url): @@ -1977,16 +396,13 @@ class OpenloadIE(InfoExtractor): video_id = mobj.group('id') url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - headers = { - 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), - } for path in (self._EMBED_WORD, self._STREAM_WORD): page_url = url_pattern % path last = path == self._STREAM_WORD webpage = self._download_webpage( page_url, video_id, 'Downloading %s webpage' % path, - headers=headers, fatal=last) + fatal=last) if not webpage: continue if 'File not found' in webpage or 'deleted by the owner' in webpage: @@ -1995,7 +411,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True, video_id=video_id) break - webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers) + webpage = self._extract_decrypted_page(page_url, webpage, video_id) for element_id in self._URL_IDS: decoded_id = get_element_by_id(element_id, webpage) if decoded_id: @@ -2026,7 +442,6 @@ class OpenloadIE(InfoExtractor): 'url': video_url, 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), 'subtitles': subtitles, - 'http_headers': headers, } @@ -2061,5 +476,5 @@ class VerystreamIE(OpenloadIE): 'only_matching': True, }] - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a1f586b80..798757241 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -81,8 +81,1592 @@ def register_socks_protocols(): # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) + +def random_user_agent(): + _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' + _CHROME_VERSIONS = ( + '74.0.3729.129', + '76.0.3780.3', + '76.0.3780.2', + '74.0.3729.128', + '76.0.3780.1', + '76.0.3780.0', + '75.0.3770.15', + '74.0.3729.127', + '74.0.3729.126', + '76.0.3779.1', + '76.0.3779.0', + '75.0.3770.14', + '74.0.3729.125', + '76.0.3778.1', + '76.0.3778.0', + '75.0.3770.13', + '74.0.3729.124', + '74.0.3729.123', + '73.0.3683.121', + '76.0.3777.1', + '76.0.3777.0', + '75.0.3770.12', + '74.0.3729.122', + '76.0.3776.4', + '75.0.3770.11', + '74.0.3729.121', + '76.0.3776.3', + '76.0.3776.2', + '73.0.3683.120', + '74.0.3729.120', + '74.0.3729.119', + '74.0.3729.118', + '76.0.3776.1', + '76.0.3776.0', + '76.0.3775.5', + '75.0.3770.10', + '74.0.3729.117', + '76.0.3775.4', + '76.0.3775.3', + '74.0.3729.116', + '75.0.3770.9', + '76.0.3775.2', + '76.0.3775.1', + '76.0.3775.0', + '75.0.3770.8', + '74.0.3729.115', + '74.0.3729.114', + '76.0.3774.1', + '76.0.3774.0', + '75.0.3770.7', + '74.0.3729.113', + '74.0.3729.112', + '74.0.3729.111', + '76.0.3773.1', + '76.0.3773.0', + '75.0.3770.6', + '74.0.3729.110', + '74.0.3729.109', + '76.0.3772.1', + '76.0.3772.0', + '75.0.3770.5', + '74.0.3729.108', + '74.0.3729.107', + '76.0.3771.1', + '76.0.3771.0', + '75.0.3770.4', + '74.0.3729.106', + '74.0.3729.105', + '75.0.3770.3', + '74.0.3729.104', + '74.0.3729.103', + '74.0.3729.102', + '75.0.3770.2', + '74.0.3729.101', + '75.0.3770.1', + '75.0.3770.0', + '74.0.3729.100', + '75.0.3769.5', + '75.0.3769.4', + '74.0.3729.99', + '75.0.3769.3', + '75.0.3769.2', + '75.0.3768.6', + '74.0.3729.98', + '75.0.3769.1', + '75.0.3769.0', + '74.0.3729.97', + '73.0.3683.119', + '73.0.3683.118', + '74.0.3729.96', + '75.0.3768.5', + '75.0.3768.4', + '75.0.3768.3', + '75.0.3768.2', + '74.0.3729.95', + '74.0.3729.94', + '75.0.3768.1', + '75.0.3768.0', + '74.0.3729.93', + '74.0.3729.92', + '73.0.3683.117', + '74.0.3729.91', + '75.0.3766.3', + '74.0.3729.90', + '75.0.3767.2', + '75.0.3767.1', + '75.0.3767.0', + '74.0.3729.89', + '73.0.3683.116', + '75.0.3766.2', + '74.0.3729.88', + '75.0.3766.1', + '75.0.3766.0', + '74.0.3729.87', + '73.0.3683.115', + '74.0.3729.86', + '75.0.3765.1', + '75.0.3765.0', + '74.0.3729.85', + '73.0.3683.114', + '74.0.3729.84', + '75.0.3764.1', + '75.0.3764.0', + '74.0.3729.83', + '73.0.3683.113', + '75.0.3763.2', + '75.0.3761.4', + '74.0.3729.82', + '75.0.3763.1', + '75.0.3763.0', + '74.0.3729.81', + '73.0.3683.112', + '75.0.3762.1', + '75.0.3762.0', + '74.0.3729.80', + '75.0.3761.3', + '74.0.3729.79', + '73.0.3683.111', + '75.0.3761.2', + '74.0.3729.78', + '74.0.3729.77', + '75.0.3761.1', + '75.0.3761.0', + '73.0.3683.110', + '74.0.3729.76', + '74.0.3729.75', + '75.0.3760.0', + '74.0.3729.74', + '75.0.3759.8', + '75.0.3759.7', + '75.0.3759.6', + '74.0.3729.73', + '75.0.3759.5', + '74.0.3729.72', + '73.0.3683.109', + '75.0.3759.4', + '75.0.3759.3', + '74.0.3729.71', + '75.0.3759.2', + '74.0.3729.70', + '73.0.3683.108', + '74.0.3729.69', + '75.0.3759.1', + '75.0.3759.0', + '74.0.3729.68', + '73.0.3683.107', + '74.0.3729.67', + '75.0.3758.1', + '75.0.3758.0', + '74.0.3729.66', + '73.0.3683.106', + '74.0.3729.65', + '75.0.3757.1', + '75.0.3757.0', + '74.0.3729.64', + '73.0.3683.105', + '74.0.3729.63', + '75.0.3756.1', + '75.0.3756.0', + '74.0.3729.62', + '73.0.3683.104', + '75.0.3755.3', + '75.0.3755.2', + '73.0.3683.103', + '75.0.3755.1', + '75.0.3755.0', + '74.0.3729.61', + '73.0.3683.102', + '74.0.3729.60', + '75.0.3754.2', + '74.0.3729.59', + '75.0.3753.4', + '74.0.3729.58', + '75.0.3754.1', + '75.0.3754.0', + '74.0.3729.57', + '73.0.3683.101', + '75.0.3753.3', + '75.0.3752.2', + '75.0.3753.2', + '74.0.3729.56', + '75.0.3753.1', + '75.0.3753.0', + '74.0.3729.55', + '73.0.3683.100', + '74.0.3729.54', + '75.0.3752.1', + '75.0.3752.0', + '74.0.3729.53', + '73.0.3683.99', + '74.0.3729.52', + '75.0.3751.1', + '75.0.3751.0', + '74.0.3729.51', + '73.0.3683.98', + '74.0.3729.50', + '75.0.3750.0', + '74.0.3729.49', + '74.0.3729.48', + '74.0.3729.47', + '75.0.3749.3', + '74.0.3729.46', + '73.0.3683.97', + '75.0.3749.2', + '74.0.3729.45', + '75.0.3749.1', + '75.0.3749.0', + '74.0.3729.44', + '73.0.3683.96', + '74.0.3729.43', + '74.0.3729.42', + '75.0.3748.1', + '75.0.3748.0', + '74.0.3729.41', + '75.0.3747.1', + '73.0.3683.95', + '75.0.3746.4', + '74.0.3729.40', + '74.0.3729.39', + '75.0.3747.0', + '75.0.3746.3', + '75.0.3746.2', + '74.0.3729.38', + '75.0.3746.1', + '75.0.3746.0', + '74.0.3729.37', + '73.0.3683.94', + '75.0.3745.5', + '75.0.3745.4', + '75.0.3745.3', + '75.0.3745.2', + '74.0.3729.36', + '75.0.3745.1', + '75.0.3745.0', + '75.0.3744.2', + '74.0.3729.35', + '73.0.3683.93', + '74.0.3729.34', + '75.0.3744.1', + '75.0.3744.0', + '74.0.3729.33', + '73.0.3683.92', + '74.0.3729.32', + '74.0.3729.31', + '73.0.3683.91', + '75.0.3741.2', + '75.0.3740.5', + '74.0.3729.30', + '75.0.3741.1', + '75.0.3741.0', + '74.0.3729.29', + '75.0.3740.4', + '73.0.3683.90', + '74.0.3729.28', + '75.0.3740.3', + '73.0.3683.89', + '75.0.3740.2', + '74.0.3729.27', + '75.0.3740.1', + '75.0.3740.0', + '74.0.3729.26', + '73.0.3683.88', + '73.0.3683.87', + '74.0.3729.25', + '75.0.3739.1', + '75.0.3739.0', + '73.0.3683.86', + '74.0.3729.24', + '73.0.3683.85', + '75.0.3738.4', + '75.0.3738.3', + '75.0.3738.2', + '75.0.3738.1', + '75.0.3738.0', + '74.0.3729.23', + '73.0.3683.84', + '74.0.3729.22', + '74.0.3729.21', + '75.0.3737.1', + '75.0.3737.0', + '74.0.3729.20', + '73.0.3683.83', + '74.0.3729.19', + '75.0.3736.1', + '75.0.3736.0', + '74.0.3729.18', + '73.0.3683.82', + '74.0.3729.17', + '75.0.3735.1', + '75.0.3735.0', + '74.0.3729.16', + '73.0.3683.81', + '75.0.3734.1', + '75.0.3734.0', + '74.0.3729.15', + '73.0.3683.80', + '74.0.3729.14', + '75.0.3733.1', + '75.0.3733.0', + '75.0.3732.1', + '74.0.3729.13', + '74.0.3729.12', + '73.0.3683.79', + '74.0.3729.11', + '75.0.3732.0', + '74.0.3729.10', + '73.0.3683.78', + '74.0.3729.9', + '74.0.3729.8', + '74.0.3729.7', + '75.0.3731.3', + '75.0.3731.2', + '75.0.3731.0', + '74.0.3729.6', + '73.0.3683.77', + '73.0.3683.76', + '75.0.3730.5', + '75.0.3730.4', + '73.0.3683.75', + '74.0.3729.5', + '73.0.3683.74', + '75.0.3730.3', + '75.0.3730.2', + '74.0.3729.4', + '73.0.3683.73', + '73.0.3683.72', + '75.0.3730.1', + '75.0.3730.0', + '74.0.3729.3', + '73.0.3683.71', + '74.0.3729.2', + '73.0.3683.70', + '74.0.3729.1', + '74.0.3729.0', + '74.0.3726.4', + '73.0.3683.69', + '74.0.3726.3', + '74.0.3728.0', + '74.0.3726.2', + '73.0.3683.68', + '74.0.3726.1', + '74.0.3726.0', + '74.0.3725.4', + '73.0.3683.67', + '73.0.3683.66', + '74.0.3725.3', + '74.0.3725.2', + '74.0.3725.1', + '74.0.3724.8', + '74.0.3725.0', + '73.0.3683.65', + '74.0.3724.7', + '74.0.3724.6', + '74.0.3724.5', + '74.0.3724.4', + '74.0.3724.3', + '74.0.3724.2', + '74.0.3724.1', + '74.0.3724.0', + '73.0.3683.64', + '74.0.3723.1', + '74.0.3723.0', + '73.0.3683.63', + '74.0.3722.1', + '74.0.3722.0', + '73.0.3683.62', + '74.0.3718.9', + '74.0.3702.3', + '74.0.3721.3', + '74.0.3721.2', + '74.0.3721.1', + '74.0.3721.0', + '74.0.3720.6', + '73.0.3683.61', + '72.0.3626.122', + '73.0.3683.60', + '74.0.3720.5', + '72.0.3626.121', + '74.0.3718.8', + '74.0.3720.4', + '74.0.3720.3', + '74.0.3718.7', + '74.0.3720.2', + '74.0.3720.1', + '74.0.3720.0', + '74.0.3718.6', + '74.0.3719.5', + '73.0.3683.59', + '74.0.3718.5', + '74.0.3718.4', + '74.0.3719.4', + '74.0.3719.3', + '74.0.3719.2', + '74.0.3719.1', + '73.0.3683.58', + '74.0.3719.0', + '73.0.3683.57', + '73.0.3683.56', + '74.0.3718.3', + '73.0.3683.55', + '74.0.3718.2', + '74.0.3718.1', + '74.0.3718.0', + '73.0.3683.54', + '74.0.3717.2', + '73.0.3683.53', + '74.0.3717.1', + '74.0.3717.0', + '73.0.3683.52', + '74.0.3716.1', + '74.0.3716.0', + '73.0.3683.51', + '74.0.3715.1', + '74.0.3715.0', + '73.0.3683.50', + '74.0.3711.2', + '74.0.3714.2', + '74.0.3713.3', + '74.0.3714.1', + '74.0.3714.0', + '73.0.3683.49', + '74.0.3713.1', + '74.0.3713.0', + '72.0.3626.120', + '73.0.3683.48', + '74.0.3712.2', + '74.0.3712.1', + '74.0.3712.0', + '73.0.3683.47', + '72.0.3626.119', + '73.0.3683.46', + '74.0.3710.2', + '72.0.3626.118', + '74.0.3711.1', + '74.0.3711.0', + '73.0.3683.45', + '72.0.3626.117', + '74.0.3710.1', + '74.0.3710.0', + '73.0.3683.44', + '72.0.3626.116', + '74.0.3709.1', + '74.0.3709.0', + '74.0.3704.9', + '73.0.3683.43', + '72.0.3626.115', + '74.0.3704.8', + '74.0.3704.7', + '74.0.3708.0', + '74.0.3706.7', + '74.0.3704.6', + '73.0.3683.42', + '72.0.3626.114', + '74.0.3706.6', + '72.0.3626.113', + '74.0.3704.5', + '74.0.3706.5', + '74.0.3706.4', + '74.0.3706.3', + '74.0.3706.2', + '74.0.3706.1', + '74.0.3706.0', + '73.0.3683.41', + '72.0.3626.112', + '74.0.3705.1', + '74.0.3705.0', + '73.0.3683.40', + '72.0.3626.111', + '73.0.3683.39', + '74.0.3704.4', + '73.0.3683.38', + '74.0.3704.3', + '74.0.3704.2', + '74.0.3704.1', + '74.0.3704.0', + '73.0.3683.37', + '72.0.3626.110', + '72.0.3626.109', + '74.0.3703.3', + '74.0.3703.2', + '73.0.3683.36', + '74.0.3703.1', + '74.0.3703.0', + '73.0.3683.35', + '72.0.3626.108', + '74.0.3702.2', + '74.0.3699.3', + '74.0.3702.1', + '74.0.3702.0', + '73.0.3683.34', + '72.0.3626.107', + '73.0.3683.33', + '74.0.3701.1', + '74.0.3701.0', + '73.0.3683.32', + '73.0.3683.31', + '72.0.3626.105', + '74.0.3700.1', + '74.0.3700.0', + '73.0.3683.29', + '72.0.3626.103', + '74.0.3699.2', + '74.0.3699.1', + '74.0.3699.0', + '73.0.3683.28', + '72.0.3626.102', + '73.0.3683.27', + '73.0.3683.26', + '74.0.3698.0', + '74.0.3696.2', + '72.0.3626.101', + '73.0.3683.25', + '74.0.3696.1', + '74.0.3696.0', + '74.0.3694.8', + '72.0.3626.100', + '74.0.3694.7', + '74.0.3694.6', + '74.0.3694.5', + '74.0.3694.4', + '72.0.3626.99', + '72.0.3626.98', + '74.0.3694.3', + '73.0.3683.24', + '72.0.3626.97', + '72.0.3626.96', + '72.0.3626.95', + '73.0.3683.23', + '72.0.3626.94', + '73.0.3683.22', + '73.0.3683.21', + '72.0.3626.93', + '74.0.3694.2', + '72.0.3626.92', + '74.0.3694.1', + '74.0.3694.0', + '74.0.3693.6', + '73.0.3683.20', + '72.0.3626.91', + '74.0.3693.5', + '74.0.3693.4', + '74.0.3693.3', + '74.0.3693.2', + '73.0.3683.19', + '74.0.3693.1', + '74.0.3693.0', + '73.0.3683.18', + '72.0.3626.90', + '74.0.3692.1', + '74.0.3692.0', + '73.0.3683.17', + '72.0.3626.89', + '74.0.3687.3', + '74.0.3691.1', + '74.0.3691.0', + '73.0.3683.16', + '72.0.3626.88', + '72.0.3626.87', + '73.0.3683.15', + '74.0.3690.1', + '74.0.3690.0', + '73.0.3683.14', + '72.0.3626.86', + '73.0.3683.13', + '73.0.3683.12', + '74.0.3689.1', + '74.0.3689.0', + '73.0.3683.11', + '72.0.3626.85', + '73.0.3683.10', + '72.0.3626.84', + '73.0.3683.9', + '74.0.3688.1', + '74.0.3688.0', + '73.0.3683.8', + '72.0.3626.83', + '74.0.3687.2', + '74.0.3687.1', + '74.0.3687.0', + '73.0.3683.7', + '72.0.3626.82', + '74.0.3686.4', + '72.0.3626.81', + '74.0.3686.3', + '74.0.3686.2', + '74.0.3686.1', + '74.0.3686.0', + '73.0.3683.6', + '72.0.3626.80', + '74.0.3685.1', + '74.0.3685.0', + '73.0.3683.5', + '72.0.3626.79', + '74.0.3684.1', + '74.0.3684.0', + '73.0.3683.4', + '72.0.3626.78', + '72.0.3626.77', + '73.0.3683.3', + '73.0.3683.2', + '72.0.3626.76', + '73.0.3683.1', + '73.0.3683.0', + '72.0.3626.75', + '71.0.3578.141', + '73.0.3682.1', + '73.0.3682.0', + '72.0.3626.74', + '71.0.3578.140', + '73.0.3681.4', + '73.0.3681.3', + '73.0.3681.2', + '73.0.3681.1', + '73.0.3681.0', + '72.0.3626.73', + '71.0.3578.139', + '72.0.3626.72', + '72.0.3626.71', + '73.0.3680.1', + '73.0.3680.0', + '72.0.3626.70', + '71.0.3578.138', + '73.0.3678.2', + '73.0.3679.1', + '73.0.3679.0', + '72.0.3626.69', + '71.0.3578.137', + '73.0.3678.1', + '73.0.3678.0', + '71.0.3578.136', + '73.0.3677.1', + '73.0.3677.0', + '72.0.3626.68', + '72.0.3626.67', + '71.0.3578.135', + '73.0.3676.1', + '73.0.3676.0', + '73.0.3674.2', + '72.0.3626.66', + '71.0.3578.134', + '73.0.3674.1', + '73.0.3674.0', + '72.0.3626.65', + '71.0.3578.133', + '73.0.3673.2', + '73.0.3673.1', + '73.0.3673.0', + '72.0.3626.64', + '71.0.3578.132', + '72.0.3626.63', + '72.0.3626.62', + '72.0.3626.61', + '72.0.3626.60', + '73.0.3672.1', + '73.0.3672.0', + '72.0.3626.59', + '71.0.3578.131', + '73.0.3671.3', + '73.0.3671.2', + '73.0.3671.1', + '73.0.3671.0', + '72.0.3626.58', + '71.0.3578.130', + '73.0.3670.1', + '73.0.3670.0', + '72.0.3626.57', + '71.0.3578.129', + '73.0.3669.1', + '73.0.3669.0', + '72.0.3626.56', + '71.0.3578.128', + '73.0.3668.2', + '73.0.3668.1', + '73.0.3668.0', + '72.0.3626.55', + '71.0.3578.127', + '73.0.3667.2', + '73.0.3667.1', + '73.0.3667.0', + '72.0.3626.54', + '71.0.3578.126', + '73.0.3666.1', + '73.0.3666.0', + '72.0.3626.53', + '71.0.3578.125', + '73.0.3665.4', + '73.0.3665.3', + '72.0.3626.52', + '73.0.3665.2', + '73.0.3664.4', + '73.0.3665.1', + '73.0.3665.0', + '72.0.3626.51', + '71.0.3578.124', + '72.0.3626.50', + '73.0.3664.3', + '73.0.3664.2', + '73.0.3664.1', + '73.0.3664.0', + '73.0.3663.2', + '72.0.3626.49', + '71.0.3578.123', + '73.0.3663.1', + '73.0.3663.0', + '72.0.3626.48', + '71.0.3578.122', + '73.0.3662.1', + '73.0.3662.0', + '72.0.3626.47', + '71.0.3578.121', + '73.0.3661.1', + '72.0.3626.46', + '73.0.3661.0', + '72.0.3626.45', + '71.0.3578.120', + '73.0.3660.2', + '73.0.3660.1', + '73.0.3660.0', + '72.0.3626.44', + '71.0.3578.119', + '73.0.3659.1', + '73.0.3659.0', + '72.0.3626.43', + '71.0.3578.118', + '73.0.3658.1', + '73.0.3658.0', + '72.0.3626.42', + '71.0.3578.117', + '73.0.3657.1', + '73.0.3657.0', + '72.0.3626.41', + '71.0.3578.116', + '73.0.3656.1', + '73.0.3656.0', + '72.0.3626.40', + '71.0.3578.115', + '73.0.3655.1', + '73.0.3655.0', + '72.0.3626.39', + '71.0.3578.114', + '73.0.3654.1', + '73.0.3654.0', + '72.0.3626.38', + '71.0.3578.113', + '73.0.3653.1', + '73.0.3653.0', + '72.0.3626.37', + '71.0.3578.112', + '73.0.3652.1', + '73.0.3652.0', + '72.0.3626.36', + '71.0.3578.111', + '73.0.3651.1', + '73.0.3651.0', + '72.0.3626.35', + '71.0.3578.110', + '73.0.3650.1', + '73.0.3650.0', + '72.0.3626.34', + '71.0.3578.109', + '73.0.3649.1', + '73.0.3649.0', + '72.0.3626.33', + '71.0.3578.108', + '73.0.3648.2', + '73.0.3648.1', + '73.0.3648.0', + '72.0.3626.32', + '71.0.3578.107', + '73.0.3647.2', + '73.0.3647.1', + '73.0.3647.0', + '72.0.3626.31', + '71.0.3578.106', + '73.0.3635.3', + '73.0.3646.2', + '73.0.3646.1', + '73.0.3646.0', + '72.0.3626.30', + '71.0.3578.105', + '72.0.3626.29', + '73.0.3645.2', + '73.0.3645.1', + '73.0.3645.0', + '72.0.3626.28', + '71.0.3578.104', + '72.0.3626.27', + '72.0.3626.26', + '72.0.3626.25', + '72.0.3626.24', + '73.0.3644.0', + '73.0.3643.2', + '72.0.3626.23', + '71.0.3578.103', + '73.0.3643.1', + '73.0.3643.0', + '72.0.3626.22', + '71.0.3578.102', + '73.0.3642.1', + '73.0.3642.0', + '72.0.3626.21', + '71.0.3578.101', + '73.0.3641.1', + '73.0.3641.0', + '72.0.3626.20', + '71.0.3578.100', + '72.0.3626.19', + '73.0.3640.1', + '73.0.3640.0', + '72.0.3626.18', + '73.0.3639.1', + '71.0.3578.99', + '73.0.3639.0', + '72.0.3626.17', + '73.0.3638.2', + '72.0.3626.16', + '73.0.3638.1', + '73.0.3638.0', + '72.0.3626.15', + '71.0.3578.98', + '73.0.3635.2', + '71.0.3578.97', + '73.0.3637.1', + '73.0.3637.0', + '72.0.3626.14', + '71.0.3578.96', + '71.0.3578.95', + '72.0.3626.13', + '71.0.3578.94', + '73.0.3636.2', + '71.0.3578.93', + '73.0.3636.1', + '73.0.3636.0', + '72.0.3626.12', + '71.0.3578.92', + '73.0.3635.1', + '73.0.3635.0', + '72.0.3626.11', + '71.0.3578.91', + '73.0.3634.2', + '73.0.3634.1', + '73.0.3634.0', + '72.0.3626.10', + '71.0.3578.90', + '71.0.3578.89', + '73.0.3633.2', + '73.0.3633.1', + '73.0.3633.0', + '72.0.3610.4', + '72.0.3626.9', + '71.0.3578.88', + '73.0.3632.5', + '73.0.3632.4', + '73.0.3632.3', + '73.0.3632.2', + '73.0.3632.1', + '73.0.3632.0', + '72.0.3626.8', + '71.0.3578.87', + '73.0.3631.2', + '73.0.3631.1', + '73.0.3631.0', + '72.0.3626.7', + '71.0.3578.86', + '72.0.3626.6', + '73.0.3630.1', + '73.0.3630.0', + '72.0.3626.5', + '71.0.3578.85', + '72.0.3626.4', + '73.0.3628.3', + '73.0.3628.2', + '73.0.3629.1', + '73.0.3629.0', + '72.0.3626.3', + '71.0.3578.84', + '73.0.3628.1', + '73.0.3628.0', + '71.0.3578.83', + '73.0.3627.1', + '73.0.3627.0', + '72.0.3626.2', + '71.0.3578.82', + '71.0.3578.81', + '71.0.3578.80', + '72.0.3626.1', + '72.0.3626.0', + '71.0.3578.79', + '70.0.3538.124', + '71.0.3578.78', + '72.0.3623.4', + '72.0.3625.2', + '72.0.3625.1', + '72.0.3625.0', + '71.0.3578.77', + '70.0.3538.123', + '72.0.3624.4', + '72.0.3624.3', + '72.0.3624.2', + '71.0.3578.76', + '72.0.3624.1', + '72.0.3624.0', + '72.0.3623.3', + '71.0.3578.75', + '70.0.3538.122', + '71.0.3578.74', + '72.0.3623.2', + '72.0.3610.3', + '72.0.3623.1', + '72.0.3623.0', + '72.0.3622.3', + '72.0.3622.2', + '71.0.3578.73', + '70.0.3538.121', + '72.0.3622.1', + '72.0.3622.0', + '71.0.3578.72', + '70.0.3538.120', + '72.0.3621.1', + '72.0.3621.0', + '71.0.3578.71', + '70.0.3538.119', + '72.0.3620.1', + '72.0.3620.0', + '71.0.3578.70', + '70.0.3538.118', + '71.0.3578.69', + '72.0.3619.1', + '72.0.3619.0', + '71.0.3578.68', + '70.0.3538.117', + '71.0.3578.67', + '72.0.3618.1', + '72.0.3618.0', + '71.0.3578.66', + '70.0.3538.116', + '72.0.3617.1', + '72.0.3617.0', + '71.0.3578.65', + '70.0.3538.115', + '72.0.3602.3', + '71.0.3578.64', + '72.0.3616.1', + '72.0.3616.0', + '71.0.3578.63', + '70.0.3538.114', + '71.0.3578.62', + '72.0.3615.1', + '72.0.3615.0', + '71.0.3578.61', + '70.0.3538.113', + '72.0.3614.1', + '72.0.3614.0', + '71.0.3578.60', + '70.0.3538.112', + '72.0.3613.1', + '72.0.3613.0', + '71.0.3578.59', + '70.0.3538.111', + '72.0.3612.2', + '72.0.3612.1', + '72.0.3612.0', + '70.0.3538.110', + '71.0.3578.58', + '70.0.3538.109', + '72.0.3611.2', + '72.0.3611.1', + '72.0.3611.0', + '71.0.3578.57', + '70.0.3538.108', + '72.0.3610.2', + '71.0.3578.56', + '71.0.3578.55', + '72.0.3610.1', + '72.0.3610.0', + '71.0.3578.54', + '70.0.3538.107', + '71.0.3578.53', + '72.0.3609.3', + '71.0.3578.52', + '72.0.3609.2', + '71.0.3578.51', + '72.0.3608.5', + '72.0.3609.1', + '72.0.3609.0', + '71.0.3578.50', + '70.0.3538.106', + '72.0.3608.4', + '72.0.3608.3', + '72.0.3608.2', + '71.0.3578.49', + '72.0.3608.1', + '72.0.3608.0', + '70.0.3538.105', + '71.0.3578.48', + '72.0.3607.1', + '72.0.3607.0', + '71.0.3578.47', + '70.0.3538.104', + '72.0.3606.2', + '72.0.3606.1', + '72.0.3606.0', + '71.0.3578.46', + '70.0.3538.103', + '70.0.3538.102', + '72.0.3605.3', + '72.0.3605.2', + '72.0.3605.1', + '72.0.3605.0', + '71.0.3578.45', + '70.0.3538.101', + '71.0.3578.44', + '71.0.3578.43', + '70.0.3538.100', + '70.0.3538.99', + '71.0.3578.42', + '72.0.3604.1', + '72.0.3604.0', + '71.0.3578.41', + '70.0.3538.98', + '71.0.3578.40', + '72.0.3603.2', + '72.0.3603.1', + '72.0.3603.0', + '71.0.3578.39', + '70.0.3538.97', + '72.0.3602.2', + '71.0.3578.38', + '71.0.3578.37', + '72.0.3602.1', + '72.0.3602.0', + '71.0.3578.36', + '70.0.3538.96', + '72.0.3601.1', + '72.0.3601.0', + '71.0.3578.35', + '70.0.3538.95', + '72.0.3600.1', + '72.0.3600.0', + '71.0.3578.34', + '70.0.3538.94', + '72.0.3599.3', + '72.0.3599.2', + '72.0.3599.1', + '72.0.3599.0', + '71.0.3578.33', + '70.0.3538.93', + '72.0.3598.1', + '72.0.3598.0', + '71.0.3578.32', + '70.0.3538.87', + '72.0.3597.1', + '72.0.3597.0', + '72.0.3596.2', + '71.0.3578.31', + '70.0.3538.86', + '71.0.3578.30', + '71.0.3578.29', + '72.0.3596.1', + '72.0.3596.0', + '71.0.3578.28', + '70.0.3538.85', + '72.0.3595.2', + '72.0.3591.3', + '72.0.3595.1', + '72.0.3595.0', + '71.0.3578.27', + '70.0.3538.84', + '72.0.3594.1', + '72.0.3594.0', + '71.0.3578.26', + '70.0.3538.83', + '72.0.3593.2', + '72.0.3593.1', + '72.0.3593.0', + '71.0.3578.25', + '70.0.3538.82', + '72.0.3589.3', + '72.0.3592.2', + '72.0.3592.1', + '72.0.3592.0', + '71.0.3578.24', + '72.0.3589.2', + '70.0.3538.81', + '70.0.3538.80', + '72.0.3591.2', + '72.0.3591.1', + '72.0.3591.0', + '71.0.3578.23', + '70.0.3538.79', + '71.0.3578.22', + '72.0.3590.1', + '72.0.3590.0', + '71.0.3578.21', + '70.0.3538.78', + '70.0.3538.77', + '72.0.3589.1', + '72.0.3589.0', + '71.0.3578.20', + '70.0.3538.76', + '71.0.3578.19', + '70.0.3538.75', + '72.0.3588.1', + '72.0.3588.0', + '71.0.3578.18', + '70.0.3538.74', + '72.0.3586.2', + '72.0.3587.0', + '71.0.3578.17', + '70.0.3538.73', + '72.0.3586.1', + '72.0.3586.0', + '71.0.3578.16', + '70.0.3538.72', + '72.0.3585.1', + '72.0.3585.0', + '71.0.3578.15', + '70.0.3538.71', + '71.0.3578.14', + '72.0.3584.1', + '72.0.3584.0', + '71.0.3578.13', + '70.0.3538.70', + '72.0.3583.2', + '71.0.3578.12', + '72.0.3583.1', + '72.0.3583.0', + '71.0.3578.11', + '70.0.3538.69', + '71.0.3578.10', + '72.0.3582.0', + '72.0.3581.4', + '71.0.3578.9', + '70.0.3538.67', + '72.0.3581.3', + '72.0.3581.2', + '72.0.3581.1', + '72.0.3581.0', + '71.0.3578.8', + '70.0.3538.66', + '72.0.3580.1', + '72.0.3580.0', + '71.0.3578.7', + '70.0.3538.65', + '71.0.3578.6', + '72.0.3579.1', + '72.0.3579.0', + '71.0.3578.5', + '70.0.3538.64', + '71.0.3578.4', + '71.0.3578.3', + '71.0.3578.2', + '71.0.3578.1', + '71.0.3578.0', + '70.0.3538.63', + '69.0.3497.128', + '70.0.3538.62', + '70.0.3538.61', + '70.0.3538.60', + '70.0.3538.59', + '71.0.3577.1', + '71.0.3577.0', + '70.0.3538.58', + '69.0.3497.127', + '71.0.3576.2', + '71.0.3576.1', + '71.0.3576.0', + '70.0.3538.57', + '70.0.3538.56', + '71.0.3575.2', + '70.0.3538.55', + '69.0.3497.126', + '70.0.3538.54', + '71.0.3575.1', + '71.0.3575.0', + '71.0.3574.1', + '71.0.3574.0', + '70.0.3538.53', + '69.0.3497.125', + '70.0.3538.52', + '71.0.3573.1', + '71.0.3573.0', + '70.0.3538.51', + '69.0.3497.124', + '71.0.3572.1', + '71.0.3572.0', + '70.0.3538.50', + '69.0.3497.123', + '71.0.3571.2', + '70.0.3538.49', + '69.0.3497.122', + '71.0.3571.1', + '71.0.3571.0', + '70.0.3538.48', + '69.0.3497.121', + '71.0.3570.1', + '71.0.3570.0', + '70.0.3538.47', + '69.0.3497.120', + '71.0.3568.2', + '71.0.3569.1', + '71.0.3569.0', + '70.0.3538.46', + '69.0.3497.119', + '70.0.3538.45', + '71.0.3568.1', + '71.0.3568.0', + '70.0.3538.44', + '69.0.3497.118', + '70.0.3538.43', + '70.0.3538.42', + '71.0.3567.1', + '71.0.3567.0', + '70.0.3538.41', + '69.0.3497.117', + '71.0.3566.1', + '71.0.3566.0', + '70.0.3538.40', + '69.0.3497.116', + '71.0.3565.1', + '71.0.3565.0', + '70.0.3538.39', + '69.0.3497.115', + '71.0.3564.1', + '71.0.3564.0', + '70.0.3538.38', + '69.0.3497.114', + '71.0.3563.0', + '71.0.3562.2', + '70.0.3538.37', + '69.0.3497.113', + '70.0.3538.36', + '70.0.3538.35', + '71.0.3562.1', + '71.0.3562.0', + '70.0.3538.34', + '69.0.3497.112', + '70.0.3538.33', + '71.0.3561.1', + '71.0.3561.0', + '70.0.3538.32', + '69.0.3497.111', + '71.0.3559.6', + '71.0.3560.1', + '71.0.3560.0', + '71.0.3559.5', + '71.0.3559.4', + '70.0.3538.31', + '69.0.3497.110', + '71.0.3559.3', + '70.0.3538.30', + '69.0.3497.109', + '71.0.3559.2', + '71.0.3559.1', + '71.0.3559.0', + '70.0.3538.29', + '69.0.3497.108', + '71.0.3558.2', + '71.0.3558.1', + '71.0.3558.0', + '70.0.3538.28', + '69.0.3497.107', + '71.0.3557.2', + '71.0.3557.1', + '71.0.3557.0', + '70.0.3538.27', + '69.0.3497.106', + '71.0.3554.4', + '70.0.3538.26', + '71.0.3556.1', + '71.0.3556.0', + '70.0.3538.25', + '71.0.3554.3', + '69.0.3497.105', + '71.0.3554.2', + '70.0.3538.24', + '69.0.3497.104', + '71.0.3555.2', + '70.0.3538.23', + '71.0.3555.1', + '71.0.3555.0', + '70.0.3538.22', + '69.0.3497.103', + '71.0.3554.1', + '71.0.3554.0', + '70.0.3538.21', + '69.0.3497.102', + '71.0.3553.3', + '70.0.3538.20', + '69.0.3497.101', + '71.0.3553.2', + '69.0.3497.100', + '71.0.3553.1', + '71.0.3553.0', + '70.0.3538.19', + '69.0.3497.99', + '69.0.3497.98', + '69.0.3497.97', + '71.0.3552.6', + '71.0.3552.5', + '71.0.3552.4', + '71.0.3552.3', + '71.0.3552.2', + '71.0.3552.1', + '71.0.3552.0', + '70.0.3538.18', + '69.0.3497.96', + '71.0.3551.3', + '71.0.3551.2', + '71.0.3551.1', + '71.0.3551.0', + '70.0.3538.17', + '69.0.3497.95', + '71.0.3550.3', + '71.0.3550.2', + '71.0.3550.1', + '71.0.3550.0', + '70.0.3538.16', + '69.0.3497.94', + '71.0.3549.1', + '71.0.3549.0', + '70.0.3538.15', + '69.0.3497.93', + '69.0.3497.92', + '71.0.3548.1', + '71.0.3548.0', + '70.0.3538.14', + '69.0.3497.91', + '71.0.3547.1', + '71.0.3547.0', + '70.0.3538.13', + '69.0.3497.90', + '71.0.3546.2', + '69.0.3497.89', + '71.0.3546.1', + '71.0.3546.0', + '70.0.3538.12', + '69.0.3497.88', + '71.0.3545.4', + '71.0.3545.3', + '71.0.3545.2', + '71.0.3545.1', + '71.0.3545.0', + '70.0.3538.11', + '69.0.3497.87', + '71.0.3544.5', + '71.0.3544.4', + '71.0.3544.3', + '71.0.3544.2', + '71.0.3544.1', + '71.0.3544.0', + '69.0.3497.86', + '70.0.3538.10', + '69.0.3497.85', + '70.0.3538.9', + '69.0.3497.84', + '71.0.3543.4', + '70.0.3538.8', + '71.0.3543.3', + '71.0.3543.2', + '71.0.3543.1', + '71.0.3543.0', + '70.0.3538.7', + '69.0.3497.83', + '71.0.3542.2', + '71.0.3542.1', + '71.0.3542.0', + '70.0.3538.6', + '69.0.3497.82', + '69.0.3497.81', + '71.0.3541.1', + '71.0.3541.0', + '70.0.3538.5', + '69.0.3497.80', + '71.0.3540.1', + '71.0.3540.0', + '70.0.3538.4', + '69.0.3497.79', + '70.0.3538.3', + '71.0.3539.1', + '71.0.3539.0', + '69.0.3497.78', + '68.0.3440.134', + '69.0.3497.77', + '70.0.3538.2', + '70.0.3538.1', + '70.0.3538.0', + '69.0.3497.76', + '68.0.3440.133', + '69.0.3497.75', + '70.0.3537.2', + '70.0.3537.1', + '70.0.3537.0', + '69.0.3497.74', + '68.0.3440.132', + '70.0.3536.0', + '70.0.3535.5', + '70.0.3535.4', + '70.0.3535.3', + '69.0.3497.73', + '68.0.3440.131', + '70.0.3532.8', + '70.0.3532.7', + '69.0.3497.72', + '69.0.3497.71', + '70.0.3535.2', + '70.0.3535.1', + '70.0.3535.0', + '69.0.3497.70', + '68.0.3440.130', + '69.0.3497.69', + '68.0.3440.129', + '70.0.3534.4', + '70.0.3534.3', + '70.0.3534.2', + '70.0.3534.1', + '70.0.3534.0', + '69.0.3497.68', + '68.0.3440.128', + '70.0.3533.2', + '70.0.3533.1', + '70.0.3533.0', + '69.0.3497.67', + '68.0.3440.127', + '70.0.3532.6', + '70.0.3532.5', + '70.0.3532.4', + '69.0.3497.66', + '68.0.3440.126', + '70.0.3532.3', + '70.0.3532.2', + '70.0.3532.1', + '69.0.3497.60', + '69.0.3497.65', + '69.0.3497.64', + '70.0.3532.0', + '70.0.3531.0', + '70.0.3530.4', + '70.0.3530.3', + '70.0.3530.2', + '69.0.3497.58', + '68.0.3440.125', + '69.0.3497.57', + '69.0.3497.56', + '69.0.3497.55', + '69.0.3497.54', + '70.0.3530.1', + '70.0.3530.0', + '69.0.3497.53', + '68.0.3440.124', + '69.0.3497.52', + '70.0.3529.3', + '70.0.3529.2', + '70.0.3529.1', + '70.0.3529.0', + '69.0.3497.51', + '70.0.3528.4', + '68.0.3440.123', + '70.0.3528.3', + '70.0.3528.2', + '70.0.3528.1', + '70.0.3528.0', + '69.0.3497.50', + '68.0.3440.122', + '70.0.3527.1', + '70.0.3527.0', + '69.0.3497.49', + '68.0.3440.121', + '70.0.3526.1', + '70.0.3526.0', + '68.0.3440.120', + '69.0.3497.48', + '69.0.3497.47', + '68.0.3440.119', + '68.0.3440.118', + '70.0.3525.5', + '70.0.3525.4', + '70.0.3525.3', + '68.0.3440.117', + '69.0.3497.46', + '70.0.3525.2', + '70.0.3525.1', + '70.0.3525.0', + '69.0.3497.45', + '68.0.3440.116', + '70.0.3524.4', + '70.0.3524.3', + '69.0.3497.44', + '70.0.3524.2', + '70.0.3524.1', + '70.0.3524.0', + '70.0.3523.2', + '69.0.3497.43', + '68.0.3440.115', + '70.0.3505.9', + '69.0.3497.42', + '70.0.3505.8', + '70.0.3523.1', + '70.0.3523.0', + '69.0.3497.41', + '68.0.3440.114', + '70.0.3505.7', + '69.0.3497.40', + '70.0.3522.1', + '70.0.3522.0', + '70.0.3521.2', + '69.0.3497.39', + '68.0.3440.113', + '70.0.3505.6', + '70.0.3521.1', + '70.0.3521.0', + '69.0.3497.38', + '68.0.3440.112', + '70.0.3520.1', + '70.0.3520.0', + '69.0.3497.37', + '68.0.3440.111', + '70.0.3519.3', + '70.0.3519.2', + '70.0.3519.1', + '70.0.3519.0', + '69.0.3497.36', + '68.0.3440.110', + '70.0.3518.1', + '70.0.3518.0', + '69.0.3497.35', + '69.0.3497.34', + '68.0.3440.109', + '70.0.3517.1', + '70.0.3517.0', + '69.0.3497.33', + '68.0.3440.108', + '69.0.3497.32', + '70.0.3516.3', + '70.0.3516.2', + '70.0.3516.1', + '70.0.3516.0', + '69.0.3497.31', + '68.0.3440.107', + '70.0.3515.4', + '68.0.3440.106', + '70.0.3515.3', + '70.0.3515.2', + '70.0.3515.1', + '70.0.3515.0', + '69.0.3497.30', + '68.0.3440.105', + '68.0.3440.104', + '70.0.3514.2', + '70.0.3514.1', + '70.0.3514.0', + '69.0.3497.29', + '68.0.3440.103', + '70.0.3513.1', + '70.0.3513.0', + '69.0.3497.28', + ) + return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + + std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0', + 'User-Agent': random_user_agent(), 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From c5606802474822887b75af7de23de6679264c0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Jun 2019 00:33:35 +0700 Subject: [PATCH 0677/1201] [soundcloud] Update client id --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 277c3c7b4..3a8626e02 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -221,7 +221,7 @@ class SoundcloudIE(InfoExtractor): } ] - _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7' + _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' @staticmethod def _extract_urls(webpage): From 5e3da0d42b3d16465a95451276f021ecd0b7bd75 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Jul 2019 08:37:21 +0100 Subject: [PATCH 0678/1201] [dailymotion] add support embed with DM.player js call --- youtube_dl/extractor/dailymotion.py | 12 +++++++++--- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1a2c1308a..3d3d78041 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -137,10 +137,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor): @staticmethod def _extract_urls(webpage): + urls = [] # Look for embedded Dailymotion player - matches = re.findall( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) - return list(map(lambda m: unescapeHTML(m[1]), matches)) + # https://developer.dailymotion.com/player#player-parameters + for mobj in re.finditer( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage): + urls.append(unescapeHTML(mobj.group('url'))) + for mobj in re.finditer( + r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): + urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eeb0d25f6..77e217460 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2104,6 +2104,23 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to download MPD manifest'], }, + { + # DailyMotion embed with DM.player + 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804', + 'info_dict': { + 'id': 'k6aKkGHd9FJs4mtJN39', + 'ext': 'mp4', + 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final', + 'description': 'This video is private.', + 'uploader_id': 'x1jf30l', + 'uploader': 'beIN SPORTS USA', + 'upload_date': '20190528', + 'timestamp': 1559062971, + }, + 'params': { + 'skip_download': True, + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject From 976e1ff7f9be76588f5e6d4a569a49694072e08b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Jul 2019 12:05:18 +0100 Subject: [PATCH 0679/1201] [acast] add support for URLs with episode id(closes #21444) --- youtube_dl/extractor/acast.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index c4362be88..b17c792d2 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -7,6 +7,7 @@ import functools from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, float_or_none, int_or_none, try_get, @@ -27,7 +28,7 @@ class ACastIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', - 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', + 'md5': '16d936099ec5ca2d5869e3a813ee8dc4', 'info_dict': { 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'ext': 'mp3', @@ -46,28 +47,37 @@ class ACastIE(InfoExtractor): }, { 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', + 'only_matching': True, }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() s = self._download_json( - 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id), - display_id)['result'] + 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), + display_id) media_url = s['url'] + if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): + episode_url = s.get('episodeUrl') + if episode_url: + display_id = episode_url + else: + channel, display_id = re.match(self._VALID_URL, s['link']).groups() cast_data = self._download_json( 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)['result'] e = cast_data['episode'] - title = e['name'] + title = e.get('name') or s['title'] return { 'id': compat_str(e['id']), 'display_id': display_id, 'url': media_url, 'title': title, - 'description': e.get('description') or e.get('summary'), + 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), 'thumbnail': e.get('image'), - 'timestamp': unified_timestamp(e.get('publishingDate')), - 'duration': float_or_none(s.get('duration') or e.get('duration')), + 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), + 'duration': float_or_none(e.get('duration') or s.get('duration')), 'filesize': int_or_none(e.get('contentLength')), 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), From 4e2491f066f81ee9e941c48a910982ec6ac286b5 Mon Sep 17 00:00:00 2001 From: xyssy <52385286+xyssy@users.noreply.github.com> Date: Mon, 1 Jul 2019 12:05:51 -0500 Subject: [PATCH 0680/1201] [yourporn] Fix extraction (#21585) --- youtube_dl/extractor/yourporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index b1d1eb6b6..8a2d5f63b 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -37,7 +37,7 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn4/') + video_id)[video_id]).replace('/cdn/', '/cdn5/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', From 918398092c5049a6edf940ebe3c2dd46916ee93c Mon Sep 17 00:00:00 2001 From: Fai <4016742+aicest@users.noreply.github.com> Date: Tue, 2 Jul 2019 01:10:55 +0800 Subject: [PATCH 0681/1201] [xiami] Update API base URL (#21575) --- youtube_dl/extractor/xiami.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 8333fb534..618da8382 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -7,7 +7,7 @@ from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): - _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' + _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id' def _download_webpage_handle(self, *args, **kwargs): webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) From 9baf69af450a90ead36af6d205cd0afc87b79253 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Mon, 1 Jul 2019 18:11:38 +0100 Subject: [PATCH 0682/1201] [openload] Add support for oload.biz (#21574) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 237b0d8fb..11e92e471 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -365,6 +365,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', 'only_matching': True, + }, { + 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From d1e41164272a2993816548beebd0d5ef4effafe8 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 1 Jul 2019 19:13:23 +0200 Subject: [PATCH 0683/1201] [vevo] Add support for embed.vevo.com URLs (#21565) --- youtube_dl/extractor/vevo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 232e05816..4ea9f1b4b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -34,6 +34,7 @@ class VevoIE(VevoBaseIE): (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + https?://embed\.vevo\.com/.*?[?&]isrc=| vevo:) (?P[^&?#]+)''' @@ -144,6 +145,9 @@ class VevoIE(VevoBaseIE): # Geo-restricted to Netherlands/Germany 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 'only_matching': True, + }, { + 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', + 'only_matching': True, }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions From c8343f0a4331bd2f561fd67b9b272afb60147a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Jul 2019 01:07:54 +0700 Subject: [PATCH 0684/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4ae3d6c7c..9deeb884a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core ++ [utils] Introduce random_user_agent and use as default User-Agent (#21546) + +Extractors ++ [vevo] Add support for embed.vevo.com URLs (#21565) ++ [openload] Add support for oload.biz (#21574) +* [xiami] Update API base URL (#21575) +* [yourporn] Fix extraction (#21585) ++ [acast] Add support for URLs with episode id (#21444) ++ [dailymotion] Add support for DM.player embeds +* [soundcloud] Update client id + + version 2019.06.27 Extractors From 1335bf10f69b5d2c45b386d3faf71398b9662f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Jul 2019 01:09:59 +0700 Subject: [PATCH 0685/1201] release 2019.07.02 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d7c15e85a..fb0d33b8f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 741862590..3c95565a6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 4fb035ea4..7410776d7 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 73ed62012..cc52bcca6 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index a9d3653e2..bbd421b1a 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 9deeb884a..5ce78b07a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.02 Core + [utils] Introduce random_user_agent and use as default User-Agent (#21546) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 01896873d..78fe54326 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.27' +__version__ = '2019.07.02' From ff0f4cfeba73d17c74caa05b55da610d903ae4d3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 Jul 2019 22:07:01 +0100 Subject: [PATCH 0686/1201] [arte] clean extractor(closes #15583)(closes #21614) --- youtube_dl/extractor/arte.py | 330 +++-------------------------- youtube_dl/extractor/extractors.py | 9 - 2 files changed, 29 insertions(+), 310 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ffc321821..2bd3bfe8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -4,17 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - find_xpath_attr, - get_element_by_attribute, int_or_none, - NO_DEFAULT, qualities, try_get, unified_strdate, @@ -25,59 +18,7 @@ from ..utils import ( # add tests. -class ArteTvIE(InfoExtractor): - _VALID_URL = r'https?://videos\.arte\.tv/(?Pfr|de|en|es)/.*-(?P.*?)\.html' - IE_NAME = 'arte.tv' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - lang = mobj.group('lang') - video_id = mobj.group('id') - - ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') - ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') - ref_xml_doc = self._download_xml( - ref_xml_url, video_id, note='Downloading metadata') - config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) - config_xml_url = config_node.attrib['ref'] - config = self._download_xml( - config_xml_url, video_id, note='Downloading configuration') - - formats = [{ - 'format_id': q.attrib['quality'], - # The playpath starts at 'mp4:', if we don't manually - # split the url, rtmpdump will incorrectly parse them - 'url': q.text.split('mp4:', 1)[0], - 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], - 'ext': 'flv', - 'quality': 2 if q.attrib['quality'] == 'hd' else 1, - } for q in config.findall('./urls/url')] - self._sort_formats(formats) - - title = config.find('.//name').text - thumbnail = config.find('.//firstThumbnailUrl').text - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class ArteTVBaseIE(InfoExtractor): - @classmethod - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - lang = mobj.group('lang') - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - if 'vid' in query: - video_id = query['vid'][0] - else: - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') - return video_id, lang - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor): 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) + qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) LANGS = { 'fr': 'F', 'de': 'A', 'en': 'E[ANG]', 'es': 'E[ESP]', + 'it': 'E[ITA]', + 'pl': 'E[POL]', } langcode = LANGS.get(lang, lang) @@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor): l = re.escape(langcode) # Language preference from most to least priority - # Reference: section 5.6.3 of - # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + # Reference: section 6.8 of + # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf PREFERENCES = ( # original version in requested language, without subtitles r'VO{0}$'.format(l), @@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?Pfr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?P\d{6}-\d{3}-[AF])' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', - 'only_matching': True, - }, { - 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', - 'only_matching': True, - }, { - 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn', - 'only_matching': True, + 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', + 'info_dict': { + 'id': '088501-000-A', + 'ext': 'mp4', + 'title': 'Mexico: Stealing Petrol to Survive', + 'upload_date': '20190628', + }, }] - @classmethod - def suitable(cls, url): - return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - entries = [ - self.url_result(url) - for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] - return self.playlist_result(entries) - - -# It also uses the arte_vp_url url from the webpage to extract the information -class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', - 'info_dict': { - 'id': '057405-001-A', - 'ext': 'mp4', - 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', - 'upload_date': '20150716', - }, - }, { - 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'playlist_count': 11, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', - 'only_matching': True, - }] - - -class ArteTVInfoIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:info' - _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', - 'info_dict': { - 'id': '067528-000-A', - 'ext': 'mp4', - 'title': 'Service civique, un cache misère ?', - 'upload_date': '20160403', - }, - }] - - -class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:future' - _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', - 'info_dict': { - 'id': '050940-028-A', - 'ext': 'mp4', - 'title': 'Les écrevisses aussi peuvent être anxieuses', - 'upload_date': '20140902', - }, - }, { - 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', - 'only_matching': True, - }] - - -class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:ddc' - _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' - - _TESTS = [] - - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - if lang == 'folge': - lang = 'de' - elif lang == 'emission': - lang = 'fr' - webpage = self._download_webpage(url, video_id) - scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) - script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') - javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') - json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') - return self._extract_from_json_url(json_url, video_id, lang) - - -class ArteTVConcertIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:concert' - _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', - 'md5': '9ea035b7bd69696b67aa2ccaaa218161', - 'info_dict': { - 'id': '186', - 'ext': 'mp4', - 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', - 'upload_date': '20140128', - 'description': 'md5:486eb08f991552ade77439fe6d82c305', - }, - }] - - -class ArteTVCinemaIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:cinema' - _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', - 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', - 'info_dict': { - 'id': '062494-000-A', - 'ext': 'mp4', - 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', - 'upload_date': '20150807', - }, - }] - - -class ArteTVMagazineIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:magazine' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - # Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..." - 'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium', - 'md5': '2a9369bcccf847d1c741e51416299f25', - 'info_dict': { - 'id': '065965-000-A', - 'ext': 'mp4', - 'title': 'Trepalium - Extrait Ep.01', - 'upload_date': '20160121', - }, - }, { - # Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium" - 'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium', - 'md5': 'fedc64fc7a946110fe311634e79782ca', - 'info_dict': { - 'id': '054813-004_PLUS7-F', - 'ext': 'mp4', - 'title': 'Trepalium (4/6)', - 'description': 'md5:10057003c34d54e95350be4f9b05cb40', - 'upload_date': '20160218', - }, - }, { - 'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis', - 'only_matching': True, - }] + lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url( + 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), + video_id, lang) class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) - http://www\.arte\.tv - /(?:playerv2/embed|arte_vp/index)\.php\?json_url= + https://www\.arte\.tv + /player/v3/index\.php\?json_url= (?P<json_url> - http://arte\.tv/papi/tvguide/videos/stream/player/ - (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* + https?://api\.arte\.tv/api/player/v1/config/ + (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) ) ''' _TESTS = [] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - lang = mobj.group('lang') - json_url = mobj.group('json_url') + json_url, lang, video_id = re.match(self._VALID_URL, url).groups() return self._extract_from_json_url(json_url, video_id, lang) -class TheOperaPlatformIE(ArteTVPlus7IE): - IE_NAME = 'theoperaplatform' - _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello', - 'md5': '970655901fa2e82e04c00b955e9afe7b', - 'info_dict': { - 'id': '060338-009-A', - 'ext': 'mp4', - 'title': 'Verdi - OTELLO', - 'upload_date': '20160927', - }, - }] - - class ArteTVPlaylistIE(ArteTVBaseIE): IE_NAME = 'arte.tv:playlist' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'info_dict': { - 'id': 'PL-013263', - 'title': 'Areva & Uramin', - 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', + 'id': 'RC-016954', + 'title': 'Earn a Living', + 'description': 'md5:d322c55011514b3a7241f7fb80d494c2', }, 'playlist_mincount': 6, - }, { - 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', - 'only_matching': True, }] def _real_extract(self, url): - playlist_id, lang = self._extract_url_info(url) + lang, playlist_id = re.match(self._VALID_URL, url).groups() collection = self._download_json( 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' % (lang, playlist_id), playlist_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 530474f3f..02f17cf0d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -58,17 +58,8 @@ from .ard import ( ARDMediathekIE, ) from .arte import ( - ArteTvIE, ArteTVPlus7IE, - ArteTVCreativeIE, - ArteTVConcertIE, - ArteTVInfoIE, - ArteTVFutureIE, - ArteTVCinemaIE, - ArteTVDDCIE, - ArteTVMagazineIE, ArteTVEmbedIE, - TheOperaPlatformIE, ArteTVPlaylistIE, ) from .asiancrush import ( From e61ac1a09c215d9efb9a65ee798a6c1d6a0863cd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 3 Jul 2019 13:31:47 +0100 Subject: [PATCH 0687/1201] [tvland] fix extraction(closes #21384) --- youtube_dl/extractor/tvland.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index 957cf1ea2..791144128 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -1,32 +1,35 @@ # coding: utf-8 from __future__ import unicode_literals -from .mtv import MTVServicesInfoExtractor +from .spike import ParamountNetworkIE -class TVLandIE(MTVServicesInfoExtractor): +class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a # proxy it redirects to http://m.tvland.com/app/ - 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', + 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19', 'info_dict': { - 'description': 'md5:80973e81b916a324e05c14a3fb506d29', - 'title': 'The Invasion', + 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a', + 'title': 'The Dog', }, - 'playlist': [], + 'playlist_mincount': 5, }, { - 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', + 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6', 'md5': 'e2c6389401cf485df26c79c247b08713', 'info_dict': { - 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', + 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757', 'ext': 'mp4', - 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies', - 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269', - 'upload_date': '20151228', - 'timestamp': 1451289600, + 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6', + 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2', + 'upload_date': '20190430', + 'timestamp': 1556658000, + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', From 313877c6a2b5ac8b880a9c47e8038ea0cdcf3deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 3 Jul 2019 23:16:40 +0700 Subject: [PATCH 0688/1201] [vzaar] Fix videos with empty title (closes #21606) --- youtube_dl/extractor/vzaar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 6000671c3..3336e6c15 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -32,6 +32,10 @@ class VzaarIE(InfoExtractor): 'ext': 'mp3', 'title': 'MP3', }, + }, { + # with null videoTitle + 'url': 'https://view.vzaar.com/20313539/download', + 'only_matching': True, }] @staticmethod @@ -45,7 +49,7 @@ class VzaarIE(InfoExtractor): video_data = self._download_json( 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - title = video_data['videoTitle'] + title = video_data.get('videoTitle') or video_id formats = [] From 2da4316e48475c344be862149f744c3a8a1ab2f1 Mon Sep 17 00:00:00 2001 From: David Caldwell <david+github@porkrind.org> Date: Wed, 3 Jul 2019 09:22:23 -0700 Subject: [PATCH 0689/1201] [twitch:vod] Actualize m3u8 URL (#21538, #21607) --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index dc5ff29c3..0500e33a6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -317,7 +317,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( - '%s/vod/%s?%s' % ( + '%s/vod/%s.m3u8?%s' % ( self._USHER_BASE, item_id, compat_urllib_parse_urlencode({ 'allow_source': 'true', From cdb7c7d147b19f79512d541465cb5be9a54c7950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 02:04:23 +0700 Subject: [PATCH 0690/1201] [ted] Restrict info regex (closes #21631) --- youtube_dl/extractor/ted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 9b60cc462..db5a4f44e 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -133,7 +133,7 @@ class TEDIE(InfoExtractor): def _extract_info(self, webpage): info_json = self._search_regex( - r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>', + r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*</script>', webpage, 'info json') return json.loads(info_json) From 5ae9b8b3a3063c97730b79ea1dfd39bc19fd56c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 03:57:11 +0700 Subject: [PATCH 0691/1201] [adobepass] Add support for AT&T U-verse (mso ATT) (closes #13938, closes #21016) --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 1cf2dcbf3..38dca1b0a 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -25,6 +25,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'ATT': { + 'name': 'AT&T U-verse', + 'username_field': 'userid', + 'password_field': 'password', + }, 'ATTOTT': { 'name': 'DIRECTV NOW', 'username_field': 'email', From a30c2f40550dd1ecc52c470db8ef77ea84bfe85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 04:01:30 +0700 Subject: [PATCH 0692/1201] [go] Add site info for disneynow (closes #21613) --- youtube_dl/extractor/go.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 5916f9a8f..03e48f4ea 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -34,9 +34,13 @@ class GoIE(AdobePassIE): 'watchdisneyxd': { 'brand': '009', 'resource_id': 'DisneyXD', + }, + 'disneynow': { + 'brand': '011', + 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -83,7 +87,9 @@ class GoIE(AdobePassIE): display_id)['video'] def _real_extract(self, url): - sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + mobj = re.match(self._VALID_URL, url) + sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2') + video_id, display_id = mobj.group('id', 'display_id') site_info = self._SITE_INFO.get(sub_domain, {}) brand = site_info.get('brand') if not video_id or not site_info: From c9fa84d88ef31c847d418223c0c6eb93651ccbec Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 4 Jul 2019 15:59:25 +0100 Subject: [PATCH 0693/1201] [lecturio] add support id based URLs(closes #21630) --- youtube_dl/extractor/lecturio.py | 113 +++++++++++++++++-------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 24f78d928..6ed7da4ab 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -6,8 +6,8 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, determine_ext, - extract_attributes, ExtractorError, float_or_none, int_or_none, @@ -19,6 +19,7 @@ from ..utils import ( class LecturioBaseIE(InfoExtractor): + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' _NETRC_MACHINE = 'lecturio' @@ -67,51 +68,56 @@ class LecturioIE(LecturioBaseIE): _VALID_URL = r'''(?x) https:// (?: - app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture| - (?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag + app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))| + (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag ) ''' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', - 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', + 'md5': '9a42cf1d8282a6311bf7211bbde26fde', 'info_dict': { 'id': '39634', 'ext': 'mp4', - 'title': 'Important Concepts and Terms – Introduction to Microbiology', + 'title': 'Important Concepts and Terms — Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', }, { 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 'only_matching': True, + }, { + 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', + 'only_matching': True, }] _CC_LANGS = { + 'Arabic': 'ar', + 'Bulgarian': 'bg', 'German': 'de', 'English': 'en', 'Spanish': 'es', + 'Persian': 'fa', 'French': 'fr', + 'Japanese': 'ja', 'Polish': 'pl', + 'Pashto': 'ps', 'Russian': 'ru', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') or mobj.group('id_de') - - webpage = self._download_webpage( - 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, - display_id) - - lecture_id = self._search_regex( - r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id') - - api_url = self._search_regex( - r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'api url', group='url') - - video = self._download_json(api_url, display_id) - + nt = mobj.group('nt') or mobj.group('nt_de') + lecture_id = mobj.group('id') + display_id = nt or lecture_id + api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' + video = self._download_json( + self._API_BASE_URL + api_path, display_id) title = video['title'].strip() + if not lecture_id: + pid = video.get('productId') or video.get('uid') + if pid: + spid = pid.split('_') + if spid and len(spid) == 2: + lecture_id = spid[1] formats = [] for format_ in video['content']['media']: @@ -129,24 +135,30 @@ class LecturioIE(LecturioBaseIE): continue label = str_or_none(format_.get('label')) filesize = int_or_none(format_.get('fileSize')) - formats.append({ + f = { 'url': file_url, 'format_id': label, 'filesize': float_or_none(filesize, invscale=1000) - }) + } + if label: + mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) + if mobj: + f.update({ + 'format_id': mobj.group(2), + 'height': int(mobj.group(1)), + }) + formats.append(f) self._sort_formats(formats) subtitles = {} automatic_captions = {} - cc = self._parse_json( - self._search_regex( - r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles', - default='{}'), display_id, fatal=False) - for cc_label, cc_url in cc.items(): - cc_url = url_or_none(cc_url) + captions = video.get('captions') or [] + for cc in captions: + cc_url = cc.get('url') if not cc_url: continue - lang = self._search_regex( + cc_label = cc.get('translatedCode') + lang = cc.get('languageCode') or self._search_regex( r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0] if cc_label else 'en') original_lang = self._search_regex( @@ -160,7 +172,7 @@ class LecturioIE(LecturioBaseIE): }) return { - 'id': lecture_id, + 'id': lecture_id or nt, 'title': title, 'formats': formats, 'subtitles': subtitles, @@ -169,37 +181,40 @@ class LecturioIE(LecturioBaseIE): class LecturioCourseIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course' - _TEST = { + _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', 'info_dict': { 'id': 'microbiology-introduction', 'title': 'Microbiology: Introduction', + 'description': 'md5:13da8500c25880c6016ae1e6d78c386a', }, 'playlist_count': 45, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://app.lecturio.com/#/course/c/6434', + 'only_matching': True, + }] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - + nt, course_id = re.match(self._VALID_URL, url).groups() + display_id = nt or course_id + api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json' + course = self._download_json( + self._API_BASE_URL + api_path, display_id) entries = [] - for mobj in re.finditer( - r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>', - webpage): - params = extract_attributes(mobj.group(0)) - lecture_url = urljoin(url, params.get('data-url')) - lecture_id = params.get('data-id') + for lecture in course.get('lectures', []): + lecture_id = str_or_none(lecture.get('id')) + lecture_url = lecture.get('url') + if lecture_url: + lecture_url = urljoin(url, lecture_url) + else: + lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id) entries.append(self.url_result( lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) - - title = self._search_regex( - r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage, - 'title', default=None) - - return self.playlist_result(entries, display_id, title) + return self.playlist_result( + entries, display_id, course.get('title'), + clean_html(course.get('description'))) class LecturioDeCourseIE(LecturioBaseIE): From d1850c1a975de37b28c39afdce2e5ea56dec032a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20Schr=C3=B6ter?= <Rikorose@users.noreply.github.com> Date: Fri, 5 Jul 2019 15:47:32 +0000 Subject: [PATCH 0694/1201] [mixer:vod] Relax _VALID_URL (closes #21657) (#21658) --- youtube_dl/extractor/beampro.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index e264a145f..86abdae00 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,7 +99,7 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\w+)' + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', @@ -122,6 +122,9 @@ class BeamProVodIE(BeamProBaseIE): }, { 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', 'only_matching': True, + }, { + 'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', + 'only_matching': True, }] @staticmethod From d18003a1419517cad49d4c5e8acb8255dd5422df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jul 2019 00:42:54 +0700 Subject: [PATCH 0695/1201] [peertube] Detect embed URLs in generic extraction (closes #21666) --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e03c3d1d3..b50543e32 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -168,7 +168,7 @@ class PeerTubeIE(InfoExtractor): @staticmethod def _extract_peertube_url(webpage, source_url): mobj = re.match( - r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)' + r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)' % PeerTubeIE._UUID_RE, source_url) if mobj and any(p in webpage for p in ( '<title>PeerTube<', From a6389abfd7fec786ed07031cd7f3a42d02910de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jul 2019 23:16:38 +0700 Subject: [PATCH 0696/1201] [philharmoniedeparis] Relax _VALID_URL (closes #21672) --- youtube_dl/extractor/philharmoniedeparis.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f723a2b3b..03da64b11 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -14,7 +14,7 @@ class PhilharmonieDeParisIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| pad\.philharmoniedeparis\.fr/doc/CIMU/ ) (?P<id>\d+) @@ -40,6 +40,12 @@ class PhilharmonieDeParisIE(InfoExtractor): }, { 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, }] _LIVE_URL = 'https://live.philharmoniedeparis.fr' From 25d71fb058368e1d48c4ad9496d91d33378649f6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 Jul 2019 08:28:39 +0100 Subject: [PATCH 0697/1201] [packtpub] fix extraction(closes #21268) --- youtube_dl/extractor/packtpub.py | 111 ++++++++++++++----------------- 1 file changed, 51 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 1324137df..3d39d1b27 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -5,26 +5,27 @@ import re from .common import InfoExtractor from ..compat import ( - compat_str, + # compat_str, compat_HTTPError, ) from ..utils import ( clean_html, ExtractorError, - remove_end, + # remove_end, + str_or_none, strip_or_none, unified_timestamp, - urljoin, + # urljoin, ) class PacktPubBaseIE(InfoExtractor): - _PACKT_BASE = 'https://www.packtpub.com' - _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + # _PACKT_BASE = 'https://www.packtpub.com' + _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/' class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -50,9 +51,9 @@ class PacktPubIE(PacktPubBaseIE): return try: self._TOKEN = self._download_json( - self._MAPT_REST + '/users/tokens', None, + 'https://services.packtpub.com/auth-v1/users/tokens', None, 'Downloading Authorization Token', data=json.dumps({ - 'email': username, + 'username': username, 'password': password, }).encode())['data']['access'] except ExtractorError as e: @@ -61,54 +62,40 @@ class PacktPubIE(PacktPubBaseIE): raise ExtractorError(message, expected=True) raise - def _handle_error(self, response): - if response.get('status') != 'success': - raise ExtractorError( - '% said: %s' % (self.IE_NAME, response['message']), - expected=True) - - def _download_json(self, *args, **kwargs): - response = super(PacktPubIE, self)._download_json(*args, **kwargs) - self._handle_error(response) - return response - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_id, chapter_id, video_id = mobj.group( - 'course_id', 'chapter_id', 'id') + course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups() headers = {} if self._TOKEN: headers['Authorization'] = 'Bearer ' + self._TOKEN - video = self._download_json( - '%s/users/me/products/%s/chapters/%s/sections/%s' - % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, - 'Downloading JSON video', headers=headers)['data'] + try: + video_url = self._download_json( + 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id, + 'Downloading JSON video', headers=headers)['data'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + self.raise_login_required('This video is locked') + raise - content = video.get('content') - if not content: - self.raise_login_required('This video is locked') + # TODO: find a better way to avoid duplicating course requests + # metadata = self._download_json( + # '%s/products/%s/chapters/%s/sections/%s/metadata' + # % (self._MAPT_REST, course_id, chapter_id, video_id), + # video_id)['data'] - video_url = content['file'] - - metadata = self._download_json( - '%s/products/%s/chapters/%s/sections/%s/metadata' - % (self._MAPT_REST, course_id, chapter_id, video_id), - video_id)['data'] - - title = metadata['pageTitle'] - course_title = metadata.get('title') - if course_title: - title = remove_end(title, ' - %s' % course_title) - timestamp = unified_timestamp(metadata.get('publicationDate')) - thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + # title = metadata['pageTitle'] + # course_title = metadata.get('title') + # if course_title: + # title = remove_end(title, ' - %s' % course_title) + # timestamp = unified_timestamp(metadata.get('publicationDate')) + # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) return { 'id': video_id, 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'timestamp': timestamp, + 'title': display_id or video_id, # title, + # 'thumbnail': thumbnail, + # 'timestamp': timestamp, } @@ -119,6 +106,7 @@ class PacktPubCourseIE(PacktPubBaseIE): 'info_dict': { 'id': '9781787122215', 'title': 'Learn Nodejs by building 12 projects [Video]', + 'description': 'md5:489da8d953f416e51927b60a1c7db0aa', }, 'playlist_count': 90, }, { @@ -136,35 +124,38 @@ class PacktPubCourseIE(PacktPubBaseIE): url, course_id = mobj.group('url', 'id') course = self._download_json( - '%s/products/%s/metadata' % (self._MAPT_REST, course_id), - course_id)['data'] + self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id) + metadata = self._download_json( + self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id, + course_id, fatal=False) or {} entries = [] - for chapter_num, chapter in enumerate(course['tableOfContents'], 1): - if chapter.get('type') != 'chapter': - continue - children = chapter.get('children') - if not isinstance(children, list): + for chapter_num, chapter in enumerate(course['chapters'], 1): + chapter_id = str_or_none(chapter.get('id')) + sections = chapter.get('sections') + if not chapter_id or not isinstance(sections, list): continue chapter_info = { 'chapter': chapter.get('title'), 'chapter_number': chapter_num, - 'chapter_id': chapter.get('id'), + 'chapter_id': chapter_id, } - for section in children: - if section.get('type') != 'section': - continue - section_url = section.get('seoUrl') - if not isinstance(section_url, compat_str): + for section in sections: + section_id = str_or_none(section.get('id')) + if not section_id or section.get('contentType') != 'video': continue entry = { '_type': 'url_transparent', - 'url': urljoin(url + '/', section_url), + 'url': '/'.join([url, chapter_id, section_id]), 'title': strip_or_none(section.get('title')), 'description': clean_html(section.get('summary')), + 'thumbnail': metadata.get('coverImage'), + 'timestamp': unified_timestamp(metadata.get('publicationDate')), 'ie_key': PacktPubIE.ie_key(), } entry.update(chapter_info) entries.append(entry) - return self.playlist_result(entries, course_id, course.get('title')) + return self.playlist_result( + entries, course_id, metadata.get('title'), + clean_html(metadata.get('about'))) From c9b0564ac141bed3766fa65011274cb5c1c5bccb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 Jul 2019 11:56:16 +0100 Subject: [PATCH 0698/1201] [packtpub] Relax lesson _VALID_URL regex(closes #21695) --- youtube_dl/extractor/packtpub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 3d39d1b27..11ad3b3b8 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -25,7 +25,7 @@ class PacktPubBaseIE(InfoExtractor): class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -41,6 +41,9 @@ class PacktPubIE(PacktPubBaseIE): }, { 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro', 'only_matching': True, + }, { + 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project', + 'only_matching': True, }] _NETRC_MACHINE = 'packtpub' _TOKEN = None From 4b30282616c35b08308975b9d51614039b3f3d12 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 13:54:49 +0100 Subject: [PATCH 0699/1201] [funk] fix extraction(closes #17915) --- youtube_dl/extractor/funk.py | 171 +++++------------------------------ 1 file changed, 23 insertions(+), 148 deletions(-) diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py index 7e1af95e0..81d1949fd 100644 --- a/youtube_dl/extractor/funk.py +++ b/youtube_dl/extractor/funk.py @@ -1,89 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor from .nexx import NexxIE -from ..compat import compat_str from ..utils import ( int_or_none, - try_get, + str_or_none, ) -class FunkBaseIE(InfoExtractor): - _HEADERS = { - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8', - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4', - } - _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4' - - @staticmethod - def _make_headers(referer): - headers = FunkBaseIE._HEADERS.copy() - headers['Referer'] = referer - return headers - - def _make_url_result(self, video): - return { - '_type': 'url_transparent', - 'url': 'nexx:741:%s' % video['sourceId'], - 'ie_key': NexxIE.ie_key(), - 'id': video['sourceId'], - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'season_number': int_or_none(video.get('seasonNr')), - 'episode_number': int_or_none(video.get('episodeNr')), - } - - -class FunkMixIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _TESTS = [{ - 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten', - 'md5': '8edf617c2f2b7c9847dfda313f199009', - 'info_dict': { - 'id': '123748', - 'ext': 'mp4', - 'title': '"Die realste Kifferdoku aller Zeiten"', - 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d', - 'timestamp': 1490274721, - 'upload_date': '20170323', - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - mix_id = mobj.group('id') - alias = mobj.group('alias') - - lists = self._download_json( - 'https://www.funk.net/api/v3.1/curation/curatedLists/', - mix_id, headers=self._make_headers(url), query={ - 'size': 100, - })['_embedded']['curatedListList'] - - metas = next( - l for l in lists - if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas'] - video = next( - meta['videoDataDelegate'] - for meta in metas - if try_get( - meta, lambda x: x['videoDataDelegate']['alias'], - compat_str) == alias) - - return self._make_url_result(video) - - -class FunkChannelIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2', + 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', + 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'info_dict': { 'id': '1155821', 'ext': 'mp4', @@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE): 'timestamp': 1514507395, 'upload_date': '20171229', }, - 'params': { - 'skip_download': True, - }, + }, { - # only available via byIdList API - 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', - 'info_dict': { - 'id': '205067', - 'ext': 'mp4', - 'title': 'Martin Sonneborn erklärt die EU', - 'description': 'md5:050f74626e4ed87edf4626d2024210c0', - 'timestamp': 1494424042, - 'upload_date': '20170510', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', + 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - alias = mobj.group('alias') - - headers = self._make_headers(url) - - video = None - - # Id-based channels are currently broken on their side: webplayer - # tries to process them via byChannelAlias endpoint and fails - # predictably. - for page_num in itertools.count(): - by_channel_alias = self._download_json( - 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s' - % channel_id, - 'Downloading byChannelAlias JSON page %d' % (page_num + 1), - headers=headers, query={ - 'filterFsk': 'false', - 'sort': 'creationDate,desc', - 'size': 100, - 'page': page_num, - }, fatal=False) - if not by_channel_alias: - break - video_list = try_get( - by_channel_alias, lambda x: x['_embedded']['videoList'], list) - if not video_list: - break - try: - video = next(r for r in video_list if r.get('alias') == alias) - break - except StopIteration: - pass - if not try_get( - by_channel_alias, lambda x: x['_links']['next']): - break - - if not video: - by_id_list = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/byIdList', - channel_id, 'Downloading byIdList JSON', headers=headers, - query={ - 'ids': alias, - }, fatal=False) - if by_id_list: - video = try_get(by_id_list, lambda x: x['result'][0], dict) - - if not video: - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', - channel_id, 'Downloading filter JSON', headers=headers, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] - video = next(r for r in results if r.get('alias') == alias) - - return self._make_url_result(video) + display_id, nexx_id = re.match(self._VALID_URL, url).groups() + video = self._download_json( + 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) + return { + '_type': 'url_transparent', + 'url': 'nexx:741:' + nexx_id, + 'ie_key': NexxIE.ie_key(), + 'id': nexx_id, + 'title': video.get('title'), + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'channel_id': str_or_none(video.get('channelId')), + 'display_id': display_id, + 'tags': video.get('tags'), + 'thumbnail': video.get('imageUrlLandscape'), + } From 253289656f6c49f9bed7d043c9806ce4def4973f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 13:57:43 +0100 Subject: [PATCH 0700/1201] [extractors] update funk.net import --- youtube_dl/extractor/extractors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 02f17cf0d..68b9b9b25 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,10 +395,7 @@ from .frontendmasters import ( FrontendMastersCourseIE ) from .funimation import FunimationIE -from .funk import ( - FunkMixIE, - FunkChannelIE, -) +from .funk import FunkIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE From cfe781d4faf910b8f687ce39480456d97f0946cf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 15:45:00 +0100 Subject: [PATCH 0701/1201] [gameinformer] fix extraction(closes #8895)(closes #15363)(closes #17206) --- youtube_dl/extractor/gameinformer.py | 34 ++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py index a2920a793..f1b96c172 100644 --- a/youtube_dl/extractor/gameinformer.py +++ b/youtube_dl/extractor/gameinformer.py @@ -1,12 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +from .brightcove import BrightcoveNewIE from .common import InfoExtractor +from ..utils import ( + clean_html, + get_element_by_class, + get_element_by_id, +) class GameInformerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)' + _TESTS = [{ + # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', 'md5': '292f26da1ab4beb4c9099f1304d2b071', 'info_dict': { @@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor): 'upload_date': '20150928', 'uploader_id': '694940074001', }, - } + }, { + # Brightcove id inside unique element with field--name-field-brightcove-video-id class + 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue', + 'info_dict': { + 'id': '6057111913001', + 'ext': 'mp4', + 'title': 'New Gameplay Today – Streets Of Rogue', + 'timestamp': 1562699001, + 'upload_date': '20190709', + 'uploader_id': '694940074001', + + }, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage( url, display_id, headers=self.geo_verification_headers()) - brightcove_id = self._search_regex( - [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"], - webpage, 'brightcove id') - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage)) + brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage) + return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id) From e4d53148f506cfcfab8559d86b40c72b7db87a6f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 16:47:37 +0100 Subject: [PATCH 0702/1201] [funnyordie] move extraction to VoxMedia extractor and improve vox volume embed extraction(closes #16846) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/funnyordie.py | 162 ----------------------------- youtube_dl/extractor/voxmedia.py | 101 ++++++++++++------ 3 files changed, 67 insertions(+), 197 deletions(-) delete mode 100644 youtube_dl/extractor/funnyordie.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68b9b9b25..555fadfaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,7 +396,6 @@ from .frontendmasters import ( ) from .funimation import FunimationIE from .funk import FunkIE -from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py deleted file mode 100644 index f85e7de14..000000000 --- a/youtube_dl/extractor/funnyordie.py +++ /dev/null @@ -1,162 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - unified_timestamp, -) - - -class FunnyOrDieIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' - _TESTS = [{ - 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', - 'info_dict': { - 'id': '0732f586d7', - 'ext': 'mp4', - 'title': 'Heart-Shaped Box: Literal Video Version', - 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', - 'thumbnail': r're:^http:.*\.jpg$', - 'uploader': 'DASjr', - 'timestamp': 1317904928, - 'upload_date': '20111006', - 'duration': 318.3, - }, - }, { - 'url': 'http://www.funnyordie.com/embed/e402820827', - 'info_dict': { - 'id': 'e402820827', - 'ext': 'mp4', - 'title': 'Please Use This Song (Jon Lajoie)', - 'description': 'Please use this to sell something. www.jonlajoie.com', - 'thumbnail': r're:^http:.*\.jpg$', - 'timestamp': 1398988800, - 'upload_date': '20140502', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - - links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage) - if not links: - raise ExtractorError('No media links available for %s' % video_id) - - links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) - - m3u8_url = self._search_regex( - r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1', - webpage, 'm3u8 url', group='url') - - formats = [] - - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - source_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] - bitrates.sort() - - if source_formats: - self._sort_formats(source_formats) - - for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): - for path, ext in links: - ff = f.copy() - if ff: - if ext != 'mp4': - ff = dict( - [(k, v) for k, v in ff.items() - if k in ('height', 'width', 'format_id')]) - ff.update({ - 'format_id': ff['format_id'].replace('hls', ext), - 'ext': ext, - 'protocol': 'http', - }) - else: - ff.update({ - 'format_id': '%s-%d' % (ext, bitrate), - 'vbr': bitrate, - }) - ff['url'] = self._proto_relative_url( - '%s%d.%s' % (path, bitrate, ext)) - formats.append(ff) - self._check_formats(formats, video_id) - - formats.extend(m3u8_formats) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - subtitles = {} - for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage): - subtitles[src_lang] = [{ - 'ext': src.split('/')[-1], - 'url': 'http://www.funnyordie.com%s' % src, - }] - - timestamp = unified_timestamp(self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - - uploader = self._html_search_regex( - r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h', - webpage, 'uploader', default=None) - - title, description, thumbnail, duration = [None] * 4 - - medium = self._parse_json( - self._search_regex( - r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', - default='{}'), - video_id, fatal=False) - if medium: - title = medium.get('title') - duration = float_or_none(medium.get('duration')) - if not timestamp: - timestamp = unified_timestamp(medium.get('publishDate')) - - post = self._parse_json( - self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', - default='{}'), - video_id, fatal=False) - if post: - if not title: - title = post.get('name') - description = post.get('description') - thumbnail = post.get('picture') - - if not title: - title = self._og_search_title(webpage) - if not description: - description = self._og_search_description(webpage) - if not duration: - duration = int_or_none(self._html_search_meta( - ('video:duration', 'duration'), webpage, 'duration', default=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index c7a0a88fe..b318e15d4 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from .once import OnceIE from ..compat import compat_urllib_parse_unquote -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) class VoxMediaVolumeIE(OnceIE): @@ -13,18 +16,43 @@ class VoxMediaVolumeIE(OnceIE): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id) + + setup = self._parse_json(self._search_regex( + r'setup\s*=\s*({.+});', webpage, 'setup'), video_id) + video_data = setup.get('video') or {} + info = { + 'id': video_id, + 'title': video_data.get('title_short'), + 'description': video_data.get('description_long') or video_data.get('description_short'), + 'thumbnail': video_data.get('brightcove_thumbnail') + } + asset = setup.get('asset') or setup.get('params') or {} + + formats = [] + hls_url = asset.get('hls_url') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + mp4_url = asset.get('mp4_url') + if mp4_url: + tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None) + format_id = 'http' + if tbr: + format_id += '-' + tbr + formats.append({ + 'format_id': format_id, + 'url': mp4_url, + 'tbr': int_or_none(tbr), + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + return info + for provider_video_type in ('ooyala', 'youtube', 'brightcove'): provider_video_id = video_data.get('%s_id' % provider_video_type) if not provider_video_id: continue - info = { - 'id': video_id, - 'title': video_data.get('title_short'), - 'description': video_data.get('description_long') or video_data.get('description_short'), - 'thumbnail': video_data.get('brightcove_thumbnail') - } if provider_video_type == 'brightcove': info['formats'] = self._extract_once_formats(provider_video_id) self._sort_formats(info['formats']) @@ -39,46 +67,49 @@ class VoxMediaVolumeIE(OnceIE): class VoxMediaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)' _TESTS = [{ + # Volume embed, Youtube 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'info_dict': { - 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', + 'id': 'j4mLW6x17VM', 'ext': 'mp4', - 'title': 'Google\'s new material design direction', - 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', + 'title': 'Material world: how Google discovered what software is made of', + 'description': 'md5:dfc17e7715e3b542d66e33a109861382', + 'upload_date': '20190710', + 'uploader_id': 'TheVerge', + 'uploader': 'The Verge', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], }, { - # data-ooyala-id + # Volume embed, Youtube 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', - 'md5': 'd744484ff127884cd2ba09e3fa604e4b', + 'md5': '4c8f4a0937752b437c3ebc0ed24802b5', 'info_dict': { - 'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B', + 'id': 'Gy8Md3Eky38', 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', - 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', + 'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d', + 'uploader_id': 'TheVerge', + 'upload_date': '20141021', + 'uploader': 'The Verge', }, - 'add_ie': ['Ooyala'], - 'skip': 'Video Not Found', + 'add_ie': ['Youtube'], + 'skip': 'similar to the previous test', }, { - # volume embed + # Volume embed, Youtube 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', 'info_dict': { - 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', + 'id': 'YCjDnX-Xzhg', 'ext': 'mp4', - 'title': 'The new frontier of LGBTQ civil rights, explained', - 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', + 'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination", + 'description': 'md5:fc1317922057de31cd74bce91eb1c66c', + 'uploader_id': 'voxdotcom', + 'upload_date': '20150915', + 'uploader': 'Vox', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], + 'skip': 'similar to the previous test', }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -93,6 +124,7 @@ class VoxMediaIE(InfoExtractor): 'uploader': 'Vox', }, 'add_ie': ['Youtube'], + 'skip': 'Page no longer contain videos', }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', @@ -118,10 +150,11 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:e02d56b026d51aa32c010676765a690d', }, }], + 'skip': 'Page no longer contain videos', }, { # volume embed, Brightcove Once 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya', - 'md5': '01571a896281f77dc06e084138987ea2', + 'md5': '2dbc77b8b0bff1894c2fce16eded637d', 'info_dict': { 'id': '1231c973d', 'ext': 'mp4', From 5fc0896168a9ff155475bfb0b7b66504c7077605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Jul 2019 23:37:09 +0700 Subject: [PATCH 0703/1201] [beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) --- youtube_dl/extractor/beeg.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c15a0ac8f..5788d13ba 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -32,6 +32,10 @@ class BeegIE(InfoExtractor): # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', 'only_matching': True, + }, { + # api/v6 v2 w/o t + 'url': 'https://beeg.com/1277207756', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -49,14 +53,17 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - t = qs.get('t', [''])[0].split('-') - if len(t) > 1: + if len(video_id) >= 10: query = { 'v': 2, - 's': t[0], - 'e': t[1], } + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query.update({ + 's': t[0], + 'e': t[1], + }) else: query = {'v': 1} From 4dcd4b7b163feddc07959ca34cbb29815b354c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:04:25 +0700 Subject: [PATCH 0704/1201] [mgtv] Pass Referer HTTP header for format URLs (closes #21726) --- youtube_dl/extractor/mgtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 84137df50..7ae2e3c3b 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -79,6 +79,9 @@ class MGTVIE(InfoExtractor): 'ext': 'mp4', 'tbr': tbr, 'protocol': 'm3u8_native', + 'http_headers': { + 'Referer': url, + }, }) self._sort_formats(formats) From 7612406bf9de825e569b9d2d1faee3d82fd60f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:34:03 +0700 Subject: [PATCH 0705/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5ce78b07a..ad99a64d6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version <unreleased> + +Core ++ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) + +Extractors ++ [mgtv] Pass Referer HTTP header for format URLs (#21726) ++ [beeg] Add support for api/v6 v2 URLs without t argument (#21701) +* [voxmedia:volume] Improvevox embed extraction (#16846) +* [funnyordie] Move extraction to VoxMedia extractor (#16846) +* [gameinformer] Fix extraction (#8895, #15363, #17206) +* [funk] Fix extraction (#17915) +* [packtpub] Relax lesson URL regular expression (#21695) +* [packtpub] Fix extraction (#21268) +* [philharmoniedeparis] Relax URL regular expression (#21672) +* [peertube] Detect embed URLs in generic extraction (#21666) +* [mixer:vod] Relax URL regular expression (#21657, #21658) ++ [lecturio] Add support id based URLs (#21630) ++ [go] Add site info for disneynow (#21613) +* [ted] Restrict info regular expression (#21631) +* [twitch:vod] Actualize m3u8 URL (#21538, #21607) +* [vzaar] Fix videos with empty title (#21606) +* [tvland] Fix extraction (#21384) +* [arte] Clean extractor (#15583, #21614) + + version 2019.07.02 Core From 0d1f4af39dab36138a3809e31a8c09ee588e1b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:43:54 +0700 Subject: [PATCH 0706/1201] release 2019.07.12 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 13 +------------ youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 26 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index fb0d33b8f..fcfadeb1f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.02 + [debug] youtube-dl version 2019.07.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 3c95565a6..7e1a3d1c0 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7410776d7..71782a104 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index cc52bcca6..6bcfde1f8 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.02 + [debug] youtube-dl version 2019.07.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index bbd421b1a..89d9c63aa 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index ad99a64d6..45cb2746e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.07.12 Core + [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 55ae43144..4e664336d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -58,16 +58,8 @@ - **ARD:mediathek** - **ARDBetaMediathek** - **Arkena** - - **arte.tv** - **arte.tv:+7** - - **arte.tv:cinema** - - **arte.tv:concert** - - **arte.tv:creative** - - **arte.tv:ddc** - **arte.tv:embed** - - **arte.tv:future** - - **arte.tv:info** - - **arte.tv:magazine** - **arte.tv:playlist** - **AsianCrush** - **AsianCrushPlaylist** @@ -313,9 +305,7 @@ - **FrontendMastersCourse** - **FrontendMastersLesson** - **Funimation** - - **FunkChannel** - - **FunkMix** - - **FunnyOrDie** + - **Funk** - **Fusion** - **Fux** - **FXNetworks** @@ -896,7 +886,6 @@ - **TF1** - **TFO** - **TheIntercept** - - **theoperaplatform** - **ThePlatform** - **ThePlatformFeed** - **TheScene** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 78fe54326..18bcb33e2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.02' +__version__ = '2019.07.12' From baf67a604d912722b0fe03a40e9dc5349a2208cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 02:26:05 +0700 Subject: [PATCH 0707/1201] [youtube] Fix authentication (closes #11270) --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b570d5bae..9f661a84f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -116,6 +116,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'f.req': json.dumps(f_req), 'flowName': 'GlifWebSignIn', 'flowEntry': 'ServiceLogin', + # TODO: reverse actual botguard identifier generation algo + 'bgRequest': '["identifier",""]', }) return self._download_json( url, None, note=note, errnote=errnote, From 27019dbb4b4829b5e1910c6b714f904ce8fad680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 03:45:58 +0700 Subject: [PATCH 0708/1201] [youtube] Fix is_live extraction (closes #21734) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9f661a84f..8a3c502ba 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..compat import ( compat_str, ) from ..utils import ( + bool_or_none, clean_html, dict_get, error_to_compat_str, @@ -1890,6 +1891,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if view_count is None and video_details: view_count = int_or_none(video_details.get('viewCount')) + if is_live is None: + is_live = bool_or_none(dict_get( + video_details, ('isLive', 'isLiveContent'), + skip_false_values=False)) + # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) From 0dd58a523fffd06c126c006722850bab36bd3aa2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 11 Jul 2019 23:09:09 +0100 Subject: [PATCH 0709/1201] [fivetv] relax video URL regex and support https URLs --- youtube_dl/extractor/fivetv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 9f9863746..c4c0f1b3d 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -9,7 +9,7 @@ from ..utils import int_or_none class FiveTVIE(InfoExtractor): _VALID_URL = r'''(?x) - http:// + https?:// (?:www\.)?5-tv\.ru/ (?: (?:[^/]+/)+(?P<id>\d+)| @@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor): 'duration': 180, }, }, { + # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails', 'info_dict': { 'id': 'glavnoe', @@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor): 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, + 'skip': 'redirect to «Известия. Главное» project page', }, { 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', 'only_matching': True, @@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - [r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"', + [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"', r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') From 16d3672ad73802043a9cccd1505909949e2ce71f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 11 Jul 2019 23:37:34 +0100 Subject: [PATCH 0710/1201] [espn] fix fivethirtyeight.com extraction --- youtube_dl/extractor/abcnews.py | 9 ++++++--- youtube_dl/extractor/espn.py | 16 ++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index cd29aca77..8b407bf9c 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE): IE_NAME = 'abcnews:video' _VALID_URL = r'''(?x) https?:// - abcnews\.go\.com/ (?: - [^/]+/video/(?P<display_id>[0-9a-z-]+)-| - video/embed\?.*?\bid= + abcnews\.go\.com/ + (?: + [^/]+/video/(?P<display_id>[0-9a-z-]+)-| + video/embed\?.*?\bid= + )| + fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ ) (?P<id>\d+) ''' diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 8cc9bd165..6cf05e6da 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor): _TEST = { 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', 'info_dict': { - 'id': '21846851', - 'ext': 'mp4', + 'id': '56032156', + 'ext': 'flv', 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', - 'timestamp': 1513960621, - 'upload_date': '20171222', }, 'params': { 'skip_download': True, }, - 'expected_warnings': ['Unable to download f4m manifest'], } def _real_extract(self, url): @@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r'data-video-id=["\'](?P<id>\d+)', - webpage, 'video id', group='id') + embed_url = self._search_regex( + r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)', + webpage, 'embed url') - return self.url_result( - 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + return self.url_result(embed_url, 'AbcNewsVideo') From d4ece5d359bfe5c6b87e0ef19f2b351e408e510c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 21:56:49 +0100 Subject: [PATCH 0711/1201] [bleacherreport] fix Bleacher Report CMS extraction --- youtube_dl/extractor/bleacherreport.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index e829974ff..dc60224d0 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor): video = article_data.get('video') if video: video_type = video['type'] - if video_type == 'cms.bleacherreport.com': + if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] elif video_type == 'ooyala.com': info['url'] = 'ooyala:%s' % video['id'] @@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor): class BleacherReportCMSIE(AMPIE): - _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' _TESTS = [{ - 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', 'info_dict': { 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', @@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE): def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) + info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) info['id'] = video_id return info From 82f68e4a0113f00144b55c5d2a1951793ac78818 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:02:06 +0100 Subject: [PATCH 0712/1201] [facebook] fallback to twitter:image meta for thumbnail extraction(closes #21224) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 789dd79d5..a3dcdca3e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -428,7 +428,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) - thumbnail = self._og_search_thumbnail(webpage) + thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) view_count = parse_count(self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', From 0441d6266ca275b1b4a2ad4efdb4b3f54e318e88 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:31:11 +0100 Subject: [PATCH 0713/1201] [rudo] remove extractor(closes #18430)(closes #18474) Covered by generic extractor --- youtube_dl/extractor/biobiochiletv.py | 19 ++++++---- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/rudo.py | 53 --------------------------- 3 files changed, 12 insertions(+), 61 deletions(-) delete mode 100644 youtube_dl/extractor/rudo.py diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index b92031c8a..dc86c57c5 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, remove_end, ) -from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): @@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor): }, { 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', 'info_dict': { - 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'id': 'b4xd0LK3SK', 'ext': 'mp4', - 'uploader': '(none)', - 'upload_date': '20160708', - 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + # TODO: fix url_transparent information overriding + # 'uploader': 'Juan Pablo Echenique', + 'title': 'Comentario Oscar Cáceres', + }, + 'params': { + # empty m3u8 manifest + 'skip_download': True, }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', @@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - rudo_url = RudoIE._extract_url(webpage) + rudo_url = self._search_regex( + r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage, 'embed URL', None, group='url') if not rudo_url: raise ExtractorError('No videos found') @@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', + r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', webpage, 'uploader', fatal=False) return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 555fadfaf..e88ad34a8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -967,7 +967,6 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE -from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py deleted file mode 100644 index f036f6757..000000000 --- a/youtube_dl/extractor/rudo.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - get_element_by_class, - unified_strdate, -) - - -class RudoIE(InfoExtractor): - _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' - - _TEST = { - 'url': 'http://rudo.video/vod/oTzw0MGnyG', - 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', - 'info_dict': { - 'id': 'oTzw0MGnyG', - 'ext': 'mp4', - 'title': 'Comentario Tomás Mosciatti', - 'upload_date': '20160617', - }, - } - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, - transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) - - info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') - - info_dict.update({ - 'title': self._og_search_title(webpage), - 'upload_date': unified_strdate(get_element_by_class('date', webpage)), - }) - - return info_dict From 57227618fe2708f9e4f1c87fdd08c49c7122c13b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:50:37 +0100 Subject: [PATCH 0714/1201] [spike] fix Bellator extraction --- youtube_dl/extractor/spike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 21b93a5b3..7c11ea7aa 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor): 'only_matching': True, }] - _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _FEED_URL = 'http://www.bellator.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] From 272355c17265e8dc921d7f1518606b15fd800112 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 23:26:46 +0100 Subject: [PATCH 0715/1201] [dbtv] fix extraction --- youtube_dl/extractor/dbtv.py | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index f232f0dc5..aaedf2e3d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -7,50 +7,51 @@ from .common import InfoExtractor class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?' + _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' _TESTS = [{ - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': '2e24f67936517b143a234b4cadf792ec', + 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', + 'md5': 'b8f850ba1860adbda668d367f9b77699', 'info_dict': { - 'id': '3649835190001', - 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'id': 'PynxJnNWChE', 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', + 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69.544, - 'uploader_id': '1027729757001', + 'upload_date': '20160916', + 'duration': 69, + 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', + 'uploader': 'Dagbladet', }, - 'add_ie': ['BrightcoveNew'] + 'add_ie': ['Youtube'] }, { - 'url': 'http://dbtv.no/3649835190001', + 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, }, { - 'url': 'http://www.dbtv.no/lazyplayer/4631135248001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/vice/5000634109001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/filmtrailer/3359293614001', + 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', + r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', webpage)] def _real_extract(self, url): - video_id, display_id = re.match(self._VALID_URL, url).groups() - - return { + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = { '_type': 'url_transparent', - 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, 'id': video_id, 'display_id': display_id, - 'ie_key': 'BrightcoveNew', } + if len(video_id) == 11: + info.update({ + 'url': video_id, + 'ie_key': 'Youtube', + }) + else: + info.update({ + 'url': 'jwplatform:' + video_id, + 'ie_key': 'JWPlatform', + }) + return info From c72dc20d099bbe1dc4ede83e8f94a7bc42d81532 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 13 Jul 2019 10:13:07 +0100 Subject: [PATCH 0716/1201] [roosterteeth] fix free episode extraction(#16094) --- youtube_dl/extractor/roosterteeth.py | 97 ++++++++++++++-------------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 857434540..d3eeeba62 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, - strip_or_none, - unescapeHTML, + str_or_none, urlencode_postdata, ) @@ -21,15 +24,14 @@ class RoosterTeethIE(InfoExtractor): 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'md5': 'e2bd7764732d785ef797700a2489f212', 'info_dict': { - 'id': '26576', + 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', - 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', - 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', + 'title': 'Million Dollars, But... The Game Announcement', + 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', - 'comment_count': int, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -89,60 +91,55 @@ class RoosterTeethIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id - webpage = self._download_webpage(url, display_id) - - episode = strip_or_none(unescapeHTML(self._search_regex( - (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', - r'<title>(?P<title>[^<]+)'), webpage, 'title', - default=None, group='title'))) - - title = strip_or_none(self._og_search_title( - webpage, default=None)) or episode - - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', - webpage, 'm3u8 url', default=None, group='url') - - if not m3u8_url: - if re.search(r']+class=["\']non-sponsor', webpage): - self.raise_login_required( - '%s is only available for FIRST members' % display_id) - - if re.search(r']+class=["\']golive-gate', webpage): - self.raise_login_required('%s is not available yet' % display_id) - - raise ExtractorError('Unable to extract m3u8 URL') + try: + m3u8_url = self._download_json( + api_episode_url + '/videos', display_id, + 'Downloading video JSON metadata')['data'][0]['attributes']['url'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + raise formats = self._extract_m3u8_formats( - m3u8_url, display_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - description = strip_or_none(self._og_search_description(webpage)) - thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) + episode = self._download_json( + api_episode_url, display_id, + 'Downloading episode JSON metadata')['data'][0] + attributes = episode['attributes'] + title = attributes.get('title') or attributes['display_title'] + video_id = compat_str(episode['id']) - series = self._search_regex( - (r'

    More ([^<]+)

    ', r']+>See All ([^<]+) Videos<'), - webpage, 'series', fatal=False) - - comment_count = int_or_none(self._search_regex( - r'>Comments \((\d+)\)<', webpage, - 'comment count', fatal=False)) - - video_id = self._search_regex( - (r'containerId\s*=\s*["\']episode-(\d+)\1', - r' Date: Sat, 13 Jul 2019 12:47:02 +0100 Subject: [PATCH 0717/1201] [livejournal] Add new extractor(closes #21526) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/livejournal.py | 42 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/livejournal.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e88ad34a8..75a53f54b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -579,6 +579,7 @@ from .linkedin import ( ) from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE +from .livejournal import LiveJournalIE from .liveleak import ( LiveLeakIE, LiveLeakEmbedIE, diff --git a/youtube_dl/extractor/livejournal.py b/youtube_dl/extractor/livejournal.py new file mode 100644 index 000000000..3a9f4553f --- /dev/null +++ b/youtube_dl/extractor/livejournal.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class LiveJournalIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' + _TEST = { + 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', + 'md5': 'adaf018388572ced8a6f301ace49d4b2', + 'info_dict': { + 'id': '1263729', + 'ext': 'mp4', + 'title': 'Истребители против БПЛА', + 'upload_date': '20190624', + 'timestamp': 1561406715, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + record = self._parse_json(self._search_regex( + r'Site\.page\s*=\s*({.+?});', webpage, + 'page data'), video_id)['video']['record'] + storage_id = compat_str(record['storageid']) + title = record.get('name') + if title: + # remove filename extension(.mp4, .mov, etc...) + title = title.rsplit('.', 1)[0] + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'thumbnail': record.get('thumbnail'), + 'timestamp': int_or_none(record.get('timecreate')), + 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id, + 'ie_key': 'EaglePlatform', + } From 4a71ef6da677bfd08b39d5cb6f584be9f6b2fc27 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Jul 2019 13:08:19 +0100 Subject: [PATCH 0718/1201] [dlive] Add new extractor(closes #18080) --- youtube_dl/extractor/dlive.py | 94 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 98 insertions(+) create mode 100644 youtube_dl/extractor/dlive.py diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py new file mode 100644 index 000000000..b81eaecce --- /dev/null +++ b/youtube_dl/extractor/dlive.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class DLiveVODIE(InfoExtractor): + IE_NAME = 'dlive:vod' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[a-zA-Z0-9]+)' + _TEST = { + 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', + 'info_dict': { + 'id': '3mTzOl4WR', + 'ext': 'mp4', + 'title': 'Minecraft with james charles epic', + 'upload_date': '20190701', + 'timestamp': 1562011015, + 'uploader_id': 'pdp', + } + } + + def _real_extract(self, url): + uploader_id, vod_id = re.match(self._VALID_URL, url).groups() + broadcast = self._download_json( + 'https://graphigo.prd.dlive.tv/', vod_id, + data=json.dumps({'query': '''query { + pastBroadcast(permlink:"%s+%s") { + content + createdAt + length + playbackUrl + title + thumbnailUrl + viewCount + } +}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] + title = broadcast['title'] + formats = self._extract_m3u8_formats( + broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + return { + 'id': vod_id, + 'title': title, + 'uploader_id': uploader_id, + 'formats': formats, + 'description': broadcast.get('content'), + 'thumbnail': broadcast.get('thumbnailUrl'), + 'timestamp': int_or_none(broadcast.get('createdAt'), 1000), + 'view_count': int_or_none(broadcast.get('viewCount')), + } + + +class DLiveStreamIE(InfoExtractor): + IE_NAME = 'dlive:stream' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?P[\w.-]+)' + + def _real_extract(self, url): + display_name = self._match_id(url) + user = self._download_json( + 'https://graphigo.prd.dlive.tv/', display_name, + data=json.dumps({'query': '''query { + userByDisplayName(displayname:"%s") { + livestream { + content + createdAt + title + thumbnailUrl + watchingCount + } + username + } +}''' % display_name}).encode())['data']['userByDisplayName'] + livestream = user['livestream'] + title = livestream['title'] + username = user['username'] + formats = self._extract_m3u8_formats( + 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, + display_name, 'mp4') + self._sort_formats(formats) + return { + 'id': display_name, + 'title': self._live_title(title), + 'uploader': display_name, + 'uploader_id': username, + 'formats': formats, + 'description': livestream.get('content'), + 'thumbnail': livestream.get('thumbnailUrl'), + 'is_live': True, + 'timestamp': int_or_none(livestream.get('createdAt'), 1000), + 'view_count': int_or_none(livestream.get('watchingCount')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 75a53f54b..15f54a214 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1255,6 +1255,10 @@ from .udn import UDNEmbedIE from .ufctv import UFCTVIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE From b99f11a56b64b647366a00c335a4a55f3e9e1854 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Jul 2019 14:11:57 +0100 Subject: [PATCH 0719/1201] [dlive] restrict DLive Stream _VALID_URL regex --- youtube_dl/extractor/dlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py index b81eaecce..8787f15a6 100644 --- a/youtube_dl/extractor/dlive.py +++ b/youtube_dl/extractor/dlive.py @@ -55,7 +55,7 @@ class DLiveVODIE(InfoExtractor): class DLiveStreamIE(InfoExtractor): IE_NAME = 'dlive:stream' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?P[\w.-]+)' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P[\w.-]+)' def _real_extract(self, url): display_name = self._match_id(url) From 5f562bd4bbc780e535e187efb36659247b41d6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 00:09:39 +0700 Subject: [PATCH 0720/1201] [spankbang] Fix extraction (closes #21763, closes #21764) --- youtube_dl/extractor/spankbang.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index f11d728ca..eb0919e3a 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -106,6 +106,8 @@ class SpankBangIE(InfoExtractor): for format_id, format_url in stream.items(): if format_id.startswith(STREAM_URL_PREFIX): + if format_url and isinstance(format_url, list): + format_url = format_url[0] extract_format( format_id[len(STREAM_URL_PREFIX):], format_url) From f9eeeda31c1a643aced8283440983f3a45208840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 00:21:39 +0700 Subject: [PATCH 0721/1201] [spankbang] Fix and improve metadata extraction --- youtube_dl/extractor/spankbang.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index eb0919e3a..e040ada29 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + merge_dicts, orderedSet, parse_duration, parse_resolution, @@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor): 'description': 'dillion harper masturbates on a bed', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', + 'timestamp': 1422571989, + 'upload_date': '20150129', 'age_limit': 18, } }, { @@ -113,26 +116,29 @@ class SpankBangIE(InfoExtractor): self._sort_formats(formats) + info = self._search_json_ld(webpage, video_id, default={}) + title = self._html_search_regex( - r'(?s)]*>(.+?)', webpage, 'title') + r'(?s)]*>(.+?)', webpage, 'title', default=None) description = self._search_regex( r']+\bclass=["\']bottom[^>]+>\s*

    [^<]*

    \s*

    ([^<]+)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r'class="user"[^>]*>]+>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + uploader = self._html_search_regex( + (r'(?s)]+class=["\']profile[^>]+>(.+?)', + r'class="user"[^>]*>]+>([^<]+)'), webpage, 'uploader', default=None) duration = parse_duration(self._search_regex( r']+\bclass=["\']right_side[^>]+>\s*([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) age_limit = self._rta_search(webpage) - return { + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': title or video_id, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -140,7 +146,8 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - } + }, info + ) class SpankBangPlaylistIE(InfoExtractor): From b7ef93f0ab2963047953be1472a5a108d92b621c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 01:19:17 +0700 Subject: [PATCH 0722/1201] [twitter] Improve uploader id extraction (closes #21705) --- youtube_dl/extractor/twitter.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 41d0b6be8..cebb6238c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -428,11 +428,22 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/foobar/status/1087791357756956680', + 'info_dict': { + 'id': '1087791357756956680', + 'ext': 'mp4', + 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:66d493500c013e3e2d434195746a7f78', + 'uploader': 'Twitter', + 'uploader_id': 'Twitter', + 'duration': 61.567, + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user_id') twid = mobj.group('id') webpage, urlh = self._download_webpage_handle( @@ -441,8 +452,13 @@ class TwitterIE(InfoExtractor): if 'twitter.com/account/suspended' in urlh.geturl(): raise ExtractorError('Account suspended by Twitter.', expected=True) - if user_id is None: - mobj = re.match(self._VALID_URL, urlh.geturl()) + user_id = None + + redirect_mobj = re.match(self._VALID_URL, urlh.geturl()) + if redirect_mobj: + user_id = redirect_mobj.group('user_id') + + if not user_id: user_id = mobj.group('user_id') username = remove_end(self._og_search_title(webpage), ' on Twitter') From ba036333bf4ebcba70deb51e77e81dca723fb54d Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Sun, 14 Jul 2019 01:23:22 +0700 Subject: [PATCH 0723/1201] [youtube] Add more invidious instances to _VALID_URL (#21694) --- youtube_dl/extractor/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8a3c502ba..762611b89 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -371,10 +371,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| + # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances (?:(?:www|dev)\.)?invidio\.us/| - (?:www\.)?invidiou\.sh/| - (?:www\.)?invidious\.snopyta\.org/| + (?:(?:www|no)\.)?invidiou\.sh/| + (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?invidious\.enkirton\.net/| + (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls From d89a0a8026e0010a96a1309d70f8fcc2164dd5a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 01:43:31 +0700 Subject: [PATCH 0724/1201] [lynda] Handle missing subtitles (closes #20490, closes #20513) --- youtube_dl/extractor/lynda.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 3084c6dff..b3d8653d0 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', 'only_matching': True, + }, { + # Status="NotFound", Message="Transcript not found" + 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html', + 'only_matching': True, }] def _raise_unavailable(self, video_id): @@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE): def _get_subtitles(self, video_id): url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id - subs = self._download_json(url, None, False) + subs = self._download_webpage( + url, video_id, 'Downloading subtitles JSON', fatal=False) + if not subs or 'Status="NotFound"' in subs: + return {} + subs = self._parse_json(subs, video_id, fatal=False) + if not subs: + return {} fixed_subs = self._fix_subtitles(subs) if fixed_subs: return {'en': [{'ext': 'srt', 'data': fixed_subs}]} - else: - return {} + return {} class LyndaCourseIE(LyndaBaseIE): From c452790a796730113dd62db0e743b11045606e27 Mon Sep 17 00:00:00 2001 From: aerworker Date: Sat, 13 Jul 2019 22:38:47 +0300 Subject: [PATCH 0725/1201] [yandexmusic] Add support for multi disk albums and extract track number and disk number (closes #21420) (#21421) * [yandexmusic] extract tracks from all volumes of an album (closes #21420) * [yandexmusic] extract genre, disk_number and track_number * [yandexmusic] extract decomposed artist names * Update yandexmusic.py * Update yandexmusic.py * Update yandexmusic.py --- youtube_dl/extractor/yandexmusic.py | 63 +++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 1dfee59e9..fea817419 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -51,23 +51,43 @@ class YandexMusicTrackIE(YandexMusicBaseIE): IE_DESC = 'Яндекс.Музыка - Трек' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/track/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508/track/4878838', 'md5': 'f496818aa2f60b6c0062980d2e00dc20', 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, 'track': 'Gypsy Eyes 1', 'album': 'Gypsy Soul', 'album_artist': 'Carlo Ambrosio', - 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari', + 'artist': 'Carlo Ambrosio & Fabio Di Bari', 'release_year': 2009, }, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + # multiple disks + 'url': 'http://music.yandex.ru/album/3840501/track/705105', + 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', + 'info_dict': { + 'id': '705105', + 'ext': 'mp3', + 'title': 'Hooverphonic - Sometimes', + 'filesize': 5743386, + 'duration': 239.27, + 'track': 'Sometimes', + 'album': 'The Best of Hooverphonic', + 'album_artist': 'Hooverphonic', + 'artist': 'Hooverphonic', + 'release_year': 2016, + 'genre': 'pop', + 'disc_number': 2, + 'track_number': 9, + }, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -110,9 +130,21 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'abr': int_or_none(download_data.get('bitrate')), } + def extract_artist_name(artist): + decomposed = artist.get('decomposed') + if not isinstance(decomposed, list): + return artist['name'] + parts = [artist['name']] + for element in decomposed: + if isinstance(element, dict) and element.get('name'): + parts.append(element['name']) + elif isinstance(element, compat_str): + parts.append(element) + return ''.join(parts) + def extract_artist(artist_list): if artist_list and isinstance(artist_list, list): - artists_names = [a['name'] for a in artist_list if a.get('name')] + artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] if artists_names: return ', '.join(artists_names) @@ -121,10 +153,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE): album = albums[0] if isinstance(album, dict): year = album.get('year') + disc_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['volume'])) + track_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['index'])) track_info.update({ 'album': album.get('title'), 'album_artist': extract_artist(album.get('artists')), 'release_year': int_or_none(year), + 'genre': album.get('genre'), + 'disc_number': disc_number, + 'track_number': track_number, }) track_artist = extract_artist(track.get('artists')) @@ -152,7 +191,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_DESC = 'Яндекс.Музыка - Альбом' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/?(\?|$)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508', 'info_dict': { 'id': '540508', @@ -160,7 +199,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): }, 'playlist_count': 50, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + 'url': 'https://music.yandex.ru/album/3840501', + 'info_dict': { + 'id': '3840501', + 'title': 'Hooverphonic - The Best of Hooverphonic (2016)', + }, + 'playlist_count': 33, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): album_id = self._match_id(url) @@ -169,7 +216,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, album_id, 'Downloading album JSON') - entries = self._build_playlist(album['volumes'][0]) + entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) title = '%s - %s' % (album['artists'][0]['name'], album['title']) year = album.get('year') From 2fe074a960773c2ec6f0a94a8c5fab5af8714651 Mon Sep 17 00:00:00 2001 From: hrimfaxi Date: Sun, 14 Jul 2019 03:57:44 +0800 Subject: [PATCH 0726/1201] [porn91] Fix extraction (#21312) --- youtube_dl/extractor/porn91.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 24c3600fe..20eac647a 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -39,7 +39,12 @@ class Porn91IE(InfoExtractor): r'

    ([^<]+)
    ', webpage, 'title') title = title.replace('\n', '') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + video_link_url = self._search_regex( + r']+id=["\']fm-video_link[^>]+>([^<]+)', + webpage, 'video link') + videopage = self._download_webpage(video_link_url, video_id) + + info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0] duration = parse_duration(self._search_regex( r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) From 364a2cb658a0db069a746ca5e25c8b589b3c509d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:07:02 +0700 Subject: [PATCH 0727/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 45cb2746e..bc722b73c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version + +Extractors +* [porn91] Fix extraction (#21312) ++ [yandexmusic] Extract track number and disk number (#21421) ++ [yandexmusic] Add support for multi disk albums (#21420, #21421) +* [lynda] Handle missing subtitles (#20490, #20513) ++ [youtube] Add more invidious instances to URL regular expression (#21694) +* [twitter] Improve uploader id extraction (#21705) +* [spankbang] Fix and improve metadata extraction +* [spankbang] Fix extraction (#21763, #21764) ++ [dlive] Add support for dlive.tv (#18080) ++ [livejournal] Add support for livejournal.com (#21526) +* [roosterteeth] Fix free episode extraction (#16094) +* [dbtv] Fix extraction +* [bellator] Fix extraction +- [rudo] Remove extractor (#18430, #18474) +* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224) +* [bleacherreport] Fix Bleacher Report CMS extraction +* [espn] Fix fivethirtyeight.com extraction +* [5tv] Relax video URL regular expression and support https URLs +* [youtube] Fix is_live extraction (#21734) +* [youtube] Fix authentication (#11270) + + version 2019.07.12 Core From 0250161c5272e2794f33085f9f8c3f464d8ee996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:09:16 +0700 Subject: [PATCH 0728/1201] [yandexmusic] Add missing import --- youtube_dl/extractor/yandexmusic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index fea817419..08d35e04c 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, int_or_none, float_or_none, + try_get, ) From ce80cacefd70a2c268de2fb1d5838ce66ac9a683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:10:49 +0700 Subject: [PATCH 0729/1201] release 2019.07.14 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 4 +++- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index fcfadeb1f..80ca6d5f1 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.12 + [debug] youtube-dl version 2019.07.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 7e1a3d1c0..a4f3c4dd9 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 71782a104..9d82e1cd9 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 6bcfde1f8..ff82a7435 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.12 + [debug] youtube-dl version 2019.07.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 89d9c63aa..f692c663d 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index bc722b73c..be1606586 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.14 Extractors * [porn91] Fix extraction (#21312) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4e664336d..9ae6e5c96 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -223,6 +223,8 @@ - **DiscoveryNetworksDe** - **DiscoveryVR** - **Disney** + - **dlive:stream** + - **dlive:vod** - **Dotsub** - **DouyuShow** - **DouyuTV**: 斗鱼 @@ -448,6 +450,7 @@ - **linkedin:learning:course** - **LinuxAcademy** - **LiTV** + - **LiveJournal** - **LiveLeak** - **LiveLeakEmbed** - **livestream** @@ -754,7 +757,6 @@ - **rtve.es:television** - **RTVNH** - **RTVS** - - **Rudo** - **RUHD** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 18bcb33e2..8a7f4d733 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.12' +__version__ = '2019.07.14' From 898238e9f82e29ef139ff934f6949ddf574bd4d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 20:30:05 +0700 Subject: [PATCH 0730/1201] [youtube] Restrict is_live extraction (closes #21782) --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 762611b89..43a3fad9f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1896,9 +1896,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = int_or_none(video_details.get('viewCount')) if is_live is None: - is_live = bool_or_none(dict_get( - video_details, ('isLive', 'isLiveContent'), - skip_false_values=False)) + is_live = bool_or_none(video_details.get('isLive')) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: From 2adedc477ee4c87709ca8d1c9bdfac3c31b1a57b Mon Sep 17 00:00:00 2001 From: Gary <35942108+LameLemon@users.noreply.github.com> Date: Mon, 15 Jul 2019 18:53:20 +0300 Subject: [PATCH 0731/1201] [gfycat] Extend _VALID_URL (closes #21779) (#21780) --- youtube_dl/extractor/gfycat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index eb6f85836..bbe3cb283 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor): 'categories': list, 'age_limit': 0, } + }, { + 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish', + 'only_matching': True }, { 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', 'only_matching': True From 791d2e81172826ef645b62c6961c65f8c2cb2a4f Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Mon, 15 Jul 2019 22:54:22 +0700 Subject: [PATCH 0732/1201] [youtube] Add support for invidious.mastodon.host (#21777) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 43a3fad9f..a87a46b3b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -378,6 +378,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?invidious\.mastodon\.host/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains From f2a213d02596b603dea5be65f4778591101db5a2 Mon Sep 17 00:00:00 2001 From: tlonic Date: Mon, 15 Jul 2019 11:58:55 -0400 Subject: [PATCH 0733/1201] [einthusan] Add support for einthusan.com (closes #21748) (#21775) --- youtube_dl/extractor/einthusan.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 4485bf8c1..1fb00c9b0 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..compat import ( @@ -18,7 +19,7 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com))/movie/watch/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', @@ -32,6 +33,9 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', 'only_matching': True, + }, { + 'url': 'https://einthusan.com/movie/watch/9097/', + 'only_matching': True, }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js @@ -41,7 +45,9 @@ class EinthusanIE(InfoExtractor): )).decode('utf-8'), video_id) def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -53,7 +59,7 @@ class EinthusanIE(InfoExtractor): page_id = self._html_search_regex( ']+data-pageid="([^"]+)"', webpage, 'page ID') video_data = self._download_json( - 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id, + 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id, data=urlencode_postdata({ 'xEvent': 'UIVideoPlayer.PingOutcome', 'xJson': json.dumps({ From 7d4dd3e5b444c43c1cc19b53689514e8deaf3849 Mon Sep 17 00:00:00 2001 From: chien-yu <32920873+chien-yu@users.noreply.github.com> Date: Mon, 15 Jul 2019 09:03:03 -0700 Subject: [PATCH 0734/1201] [ctsnews] Fix YouTube embeds extraction (#21678) --- youtube_dl/extractor/ctsnews.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index d565335cf..dcda7e89d 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import unified_timestamp - +from .youtube import YoutubeIE class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' @@ -14,8 +14,8 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201501291578109', 'ext': 'mp4', - 'title': '以色列.真主黨交火 3人死亡', - 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...', + 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網', + 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...', 'timestamp': 1422528540, 'upload_date': '20150129', } @@ -26,7 +26,7 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201309031304098', 'ext': 'mp4', - 'title': '韓國31歲童顏男 貌如十多歲小孩', + 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網', 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1378205880, @@ -62,8 +62,7 @@ class CtsNewsIE(InfoExtractor): video_url = mp4_feed['source_url'] else: self.to_screen('Not CTSPlayer video, trying Youtube...') - youtube_url = self._search_regex( - r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url') + youtube_url = YoutubeIE._extract_url(page) return self.url_result(youtube_url, ie='Youtube') From 799756a3b3c794284ca52b9af482e1f03fc46833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:47:10 +0700 Subject: [PATCH 0735/1201] [kaltura] Check source format URL (#21290) --- youtube_dl/extractor/kaltura.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 639d73837..0a733424c 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor): { 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'only_matching': True, + }, + { + # unavailable source format + 'url': 'kaltura:513551:1_66x4rg7o', + 'only_matching': True, } ] @@ -306,12 +311,17 @@ class KalturaIE(InfoExtractor): f['fileExt'] = 'mp4' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + format_id = '%(fileExt)s-%(bitrate)s' % f + # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o) + if f.get('isOriginal') is True and not self._is_valid_url( + video_url, entry_id, format_id): + continue # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g # -f mp4-56) vcodec = 'none' if 'videoCodecId' not in f and f.get( 'frameRate') == 0 else f.get('videoCodecId') formats.append({ - 'format_id': '%(fileExt)s-%(bitrate)s' % f, + 'format_id': format_id, 'ext': f.get('fileExt'), 'tbr': int_or_none(f['bitrate']), 'fps': int_or_none(f.get('frameRate')), From f61496863d2207718a2a6cb1591dcbc7abc282de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:56:05 +0700 Subject: [PATCH 0736/1201] [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv (closes #21281, closes #21290) --- youtube_dl/extractor/asiancrush.py | 80 +++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 6d71c5ad5..0348e680c 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -5,14 +5,12 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - extract_attributes, - remove_end, -) +from ..utils import extract_attributes class AsianCrushIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P\d+)v\b' + _VALID_URL_BASE = r'https?://(?:www\.)?(?P(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))' + _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P\d+)v\b' % _VALID_URL_BASE _TESTS = [{ 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', 'md5': 'c3b740e48d0ba002a42c0b72857beae6', @@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor): 'id': '1_y4tmjm5r', 'ext': 'mp4', 'title': 'Women Who Flirt', - 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'description': 'md5:7e986615808bcfb11756eb503a751487', 'timestamp': 1496936429, 'upload_date': '20170608', 'uploader_id': 'craig@crifkin.com', @@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor): }, { 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', + 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor): r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') player = self._download_webpage( - 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + 'https://api.%s/embeddedVideoPlayer' % host, video_id, query={'id': entry_id}) kaltura_id = self._search_regex( @@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor): r'/p(?:artner_id)?/(\d+)', player, 'partner id', default='513551') - return self.url_result( - 'kaltura:%s:%s' % (partner_id, kaltura_id), - ie=KalturaIE.ie_key(), video_id=kaltura_id, - video_title=title) + description = self._html_search_regex( + r'(?s)]+\bclass=["\']description["\'][^>]*>(.+?)
    ', + webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': KalturaIE.ie_key(), + 'id': video_id, + 'title': title, + 'description': description, + } class AsianCrushPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P\d+)s\b' - _TEST = { + _VALID_URL = r'%s/series/0+(?P\d+)s\b' % AsianCrushIE._VALID_URL_BASE + _TESTS = [{ 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', 'info_dict': { 'id': '12481', @@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor): 'description': 'md5:7addd7c5132a09fd4741152d96cce886', }, 'playlist_count': 20, - } + }, { + 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', + 'only_matching': True, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor): entries.append(self.url_result( mobj.group('url'), ie=AsianCrushIE.ie_key())) - title = remove_end( - self._html_search_regex( - r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, - 'title', default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', - default=None) or self._search_regex( - r'([^<]+)', webpage, 'title', fatal=False), - ' | AsianCrush') + title = self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False) + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) description = self._og_search_description( webpage, default=None) or self._html_search_meta( From 8b4a0ebf10376e89daa9da3cd9570a3f16f8f375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:59:23 +0700 Subject: [PATCH 0737/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index be1606586..a67b0595a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version + +Extractors ++ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv + (#21281, #21290) +* [kaltura] Check source format URL (#21290) +* [ctsnews] Fix YouTube embeds extraction (#21678) ++ [einthusan] Add support for einthusan.com (#21748, #21775) ++ [youtube] Add support for invidious.mastodon.host (#21777) ++ [gfycat] Extend URL regular expression (#21779, #21780) +* [youtube] Restrict is_live extraction (#21782) + + version 2019.07.14 Extractors From 2f1991ff1461b11134d2b09d6e8d681ce51d93d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Jul 2019 00:01:46 +0700 Subject: [PATCH 0738/1201] release 2019.07.16 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 80ca6d5f1..89001802b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.14 + [debug] youtube-dl version 2019.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a4f3c4dd9..4cc58fa42 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 9d82e1cd9..f38760b77 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index ff82a7435..e4133dc4e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.14 + [debug] youtube-dl version 2019.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index f692c663d..0bb6543e3 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index a67b0595a..fe0ca7164 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.16 Extractors + [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8a7f4d733..b0f5a6b47 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.14' +__version__ = '2019.07.16' From 1824bfdcdff1af4bfc4f7f6ed885d45ee7e8c376 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Jul 2019 22:51:10 +0100 Subject: [PATCH 0739/1201] [vrv] fix CMS signing query extraction(closes #21809) --- youtube_dl/extractor/vrv.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index c814a8a4a..6e51469b0 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -64,7 +64,15 @@ class VRVBaseIE(InfoExtractor): def _call_cms(self, path, video_id, note): if not self._CMS_SIGNING: - self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] + index = self._call_api('index', video_id, 'CMS Signing') + self._CMS_SIGNING = index.get('cms_signing') or {} + if not self._CMS_SIGNING: + for signing_policy in index.get('signing_policies', []): + signing_path = signing_policy.get('path') + if signing_path and signing_path.startswith('/cms/'): + name, value = signing_policy.get('name'), signing_policy.get('value') + if name and value: + self._CMS_SIGNING[name] = value return self._download_json( self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) From 5e1c39ac853bfe4da7feda2a48544cb5811873d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 17 Jul 2019 17:47:53 +0200 Subject: [PATCH 0740/1201] [extractor/common] Fix typo in thumbnails resolution description (#21817) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c3e9eec6..859786617 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -220,7 +220,7 @@ class InfoExtractor(object): * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) - * "resolution" (optional, string "{width}x{height"}, + * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. From 9c1da4a9f9fc17cffc2fa2261030c66d2a032a58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Jul 2019 23:08:26 +0700 Subject: [PATCH 0741/1201] [extractor/generic] Restrict --default-search schemeless URLs detection pattern (closes #21842) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 77e217460..d34fc4b15 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2226,7 +2226,7 @@ class GenericIE(InfoExtractor): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if '/' in url: + if re.match(r'^[^\s/]+\.[^\s/]+/', url): self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': From 2e18adec98a44ca839cbaaed7ce27d8d07f54cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Jul 2019 23:46:34 +0700 Subject: [PATCH 0742/1201] [youtube:playlist] Relax _VIDEO_RE (closes #21844) --- youtube_dl/extractor/youtube.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a87a46b3b..aa316ba88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2432,7 +2432,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' + _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', @@ -2556,6 +2556,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'noplaylist': True, 'skip_download': True, }, + }, { + # https://github.com/ytdl-org/youtube-dl/issues/21844 + 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'info_dict': { + 'title': 'Data Analysis with Dr Mike Pound', + 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'uploader_id': 'Computerphile', + 'uploader': 'Computerphile', + }, + 'playlist_mincount': 11, }, { 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 'only_matching': True, From 13a75688a55f32cde316b0f7d5992ff4a1f6d279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Jul 2019 00:01:46 +0700 Subject: [PATCH 0743/1201] [youtube] Fix some tests --- youtube_dl/extractor/youtube.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aa316ba88..b2c714505 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2455,6 +2455,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': '29C3: Not my department', 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'uploader': 'Christiaan008', + 'uploader_id': 'ChRiStIaAn008', }, 'playlist_count': 95, }, { @@ -2463,6 +2465,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': '[OLD]Team Fortress 2 (Class-based LP)', 'id': 'PLBB231211A4F62143', + 'uploader': 'Wickydoo', + 'uploader_id': 'Wickydoo', }, 'playlist_mincount': 26, }, { @@ -2471,6 +2475,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'Uploads from Cauchemar', 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', + 'uploader': 'Cauchemar', + 'uploader_id': 'Cauchemar89', }, 'playlist_mincount': 799, }, { @@ -2488,13 +2494,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'JODA15', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'uploader': 'milan', + 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw', } }, { 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'playlist_mincount': 485, 'info_dict': { - 'title': '2017 華語最新單曲 (2/24更新)', + 'title': '2018 Chinese New Singles (11/6 updated)', 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + 'uploader': 'LBK', + 'uploader_id': 'sdragonfang', } }, { 'note': 'Embedded SWF player', @@ -2503,13 +2513,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'JODA7', 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ', - } + }, + 'skip': 'This playlist does not exist', }, { 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'info_dict': { 'title': 'Uploads from Interstellar Movie', 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', + 'uploader': 'Interstellar Movie', + 'uploader_id': 'InterstellarMovie1', }, 'playlist_mincount': 21, }, { @@ -2534,6 +2547,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video is not available.', 'add_ie': [YoutubeIE.ie_key()], }, { 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', @@ -2545,7 +2559,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'uploader_id': 'backuspagemuseum', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 'upload_date': '20161008', - 'license': 'Standard YouTube License', 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', 'categories': ['Nonprofits & Activism'], 'tags': list, @@ -2732,6 +2745,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w', 'title': 'Uploads from lex will', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', } }, { 'note': 'Age restricted channel', @@ -2741,6 +2756,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', + 'uploader': 'Deus Ex', + 'uploader_id': 'DeusExOfficial', }, }, { 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', @@ -2825,6 +2842,8 @@ class YoutubeUserIE(YoutubeChannelIE): 'info_dict': { 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ', 'title': 'Uploads from The Linux Foundation', + 'uploader': 'The Linux Foundation', + 'uploader_id': 'TheLinuxFoundation', } }, { # Only available via https://www.youtube.com/c/12minuteathlete/videos @@ -2834,6 +2853,8 @@ class YoutubeUserIE(YoutubeChannelIE): 'info_dict': { 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ', 'title': 'Uploads from 12 Minute Athlete', + 'uploader': '12 Minute Athlete', + 'uploader_id': 'the12minuteathlete', } }, { 'url': 'ytuser:phihag', @@ -2927,7 +2948,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', - 'title': 'Thirst for Science', + 'title': 'ThirstForScience', }, }, { # with "Load more" button @@ -2944,6 +2965,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): 'id': 'UCiU1dHvZObB2iP6xkJ__Icw', 'title': 'Chem Player', }, + 'skip': 'Blocked', }] From 3b446ab3519948980630e3328b971385826ffba8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 20 Jul 2019 20:20:30 +0100 Subject: [PATCH 0744/1201] [discovery] add support go.discovery.com URLs --- youtube_dl/extractor/discovery.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index b70c307a7..9003545ce 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -19,9 +19,9 @@ from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): _VALID_URL = r'''(?x)https?:// (?P<site> + (?:(?:www|go)\.)?discovery| (?:www\.)? (?: - discovery| investigationdiscovery| discoverylife| animalplanet| @@ -56,6 +56,9 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', 'only_matching': True, + }, { + 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False From ab794a553c36ddd690e2243450653c3ede43e606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Jul 2019 13:20:21 +0700 Subject: [PATCH 0745/1201] [ctsnews] PEP 8 --- youtube_dl/extractor/ctsnews.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index dcda7e89d..679f1d92e 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import unified_timestamp from .youtube import YoutubeIE + class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' From 608b8a4300fab7792e637fd9b7045adf1c0cb2aa Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Mon, 22 Jul 2019 02:59:36 +0900 Subject: [PATCH 0746/1201] [yahoo:japannews] Add extractor (closes #21698) (#21265) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yahoo.py | 131 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15f54a214..06de556b7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1448,6 +1448,7 @@ from .yahoo import ( YahooSearchIE, YahooGyaOPlayerIE, YahooGyaOIE, + YahooJapanNewsIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index a3b5f00c8..e5ebdd180 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib import itertools import json import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( + compat_str, compat_urllib_parse, compat_urlparse, ) @@ -18,7 +20,9 @@ from ..utils import ( int_or_none, mimetype2ext, smuggle_url, + try_get, unescapeHTML, + url_or_none, ) from .brightcove import ( @@ -556,3 +560,130 @@ class YahooGyaOIE(InfoExtractor): 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'), YahooGyaOPlayerIE.ie_key(), video_id)) return self.playlist_result(entries, program_id) + + +class YahooJapanNewsIE(InfoExtractor): + IE_NAME = 'yahoo:japannews' + IE_DESC = 'Yahoo! Japan News' + _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?' + _GEO_COUNTRIES = ['JP'] + _TESTS = [{ + 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int', + 'info_dict': { + 'id': '1736242', + 'ext': 'mp4', + 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース', + 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))', + 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$', + }, + 'params': { + 'skip_download': True, + }, + }, { + # geo restricted + 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04', + 'only_matching': True, + }, { + 'url': 'https://headlines.yahoo.co.jp/videonews/', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp/feature/1356', + 'only_matching': True + }] + + def _extract_formats(self, json_data, content_id): + formats = [] + + video_data = try_get( + json_data, + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) + for vid in video_data or []: + delivery = vid.get('delivery') + url = url_or_none(vid.get('Url')) + if not delivery or not url: + continue + elif delivery == 'hls': + formats.extend( + self._extract_m3u8_formats( + url, content_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': url, + 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')), + 'height': int_or_none(vid.get('height')), + 'width': int_or_none(vid.get('width')), + 'tbr': int_or_none(vid.get('bitrate')), + }) + self._remove_duplicate_formats(formats) + self._sort_formats(formats) + + return formats + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + display_id = mobj.group('id') or host + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage, 'title', default=None + ) or self._html_search_regex('<title>([^<]+)', webpage, 'title') + + if display_id == host: + # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) + stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage) + entries = [ + self.url_result( + smuggle_url( + 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id, + {'geo_countries': ['JP']}), + ie='BrightcoveNew', video_id=plist_id) + for plist_id in stream_plists] + return self.playlist_result(entries, playlist_title=title) + + # Article page + description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image', webpage, 'thumbnail', default=None) + space_id = self._search_regex([ + r']+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)', + r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)', + r' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.16 + [debug] youtube-dl version 2019.07.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 4cc58fa42..aeca69974 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index f38760b77..e232df726 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index e4133dc4e..8608a085c 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.16 + [debug] youtube-dl version 2019.07.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 0bb6543e3..64864a3b7 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 08e58524e..32070b8d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.27 Extractors + [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9ae6e5c96..7cf60eefe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1117,6 +1117,7 @@ - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - **yahoo:gyao:player** + - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b0f5a6b47..e3e37b8c5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.16' +__version__ = '2019.07.27' From 8dbf751aa241475dd8a7a6d3040713b5874fd057 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 30 Jul 2019 00:13:33 +0100 Subject: [PATCH 0750/1201] [youtube] improve title and description extraction(closes #21934) --- youtube_dl/extractor/youtube.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b2c714505..9a182fcf6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1820,16 +1820,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} - # title - if 'title' in video_info: - video_title = video_info['title'][0] - elif 'title' in player_response: - video_title = video_details['title'] - else: + video_title = video_info.get('title', [None])[0] or video_details.get('title') + if not video_title: self._downloader.report_warning('Unable to extract video title') video_title = '_' - # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: @@ -1854,11 +1849,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - fd_mobj = re.search(r' Date: Tue, 30 Jul 2019 09:41:23 +0700 Subject: [PATCH 0751/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 32070b8d5..0dbfc4dbf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [youtube] Fix and improve title and description extraction (#21934) + + version 2019.07.27 Extractors From 85c2c4b4abea4618be8013d41f6ba9e95c4e5e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Jul 2019 09:43:47 +0700 Subject: [PATCH 0752/1201] release 2019.07.30 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 06322bb2f..ccd033716 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.27 + [debug] youtube-dl version 2019.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index aeca69974..8709937ad 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e232df726..c3a555ed3 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8608a085c..07042a466 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.27 + [debug] youtube-dl version 2019.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 64864a3b7..4cf75a2eb 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 0dbfc4dbf..f6f1f7e38 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.30 Extractors * [youtube] Fix and improve title and description extraction (#21934) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e3e37b8c5..04dc83605 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.27' +__version__ = '2019.07.30' From c2d125d99f81aa33429b2158acd9a90524575378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 00:14:08 +0700 Subject: [PATCH 0753/1201] [youtube] Improve metadata extraction for age gate content (closes #21943) --- youtube_dl/extractor/youtube.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9a182fcf6..1aee0e465 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1700,6 +1700,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_token(v_info): return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + def extract_player_response(player_response, video_id): + pl_response = str_or_none(player_response) + if not pl_response: + return + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + add_dash_mpd_pr(pl_response) + return pl_response + player_response = {} # Get video info @@ -1722,7 +1731,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): note='Refetching age-gated info webpage', errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) + pl_response = video_info.get('player_response', [None])[0] + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(video_info) + view_count = extract_view_count(video_info) else: age_gate = False video_info = None @@ -1745,11 +1757,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = True sts = ytplayer_config.get('sts') if not player_response: - pl_response = str_or_none(args.get('player_response')) - if pl_response: - pl_response = self._parse_json(pl_response, video_id, fatal=False) - if isinstance(pl_response, dict): - player_response = pl_response + player_response = extract_player_response(args.get('player_response'), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) # We also try looking in get_video_info since it may contain different dashmpd @@ -1781,9 +1789,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): get_video_info = compat_parse_qs(video_info_webpage) if not player_response: pl_response = get_video_info.get('player_response', [None])[0] - if isinstance(pl_response, dict): - player_response = pl_response - add_dash_mpd_pr(player_response) + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) From 2c8b1a21e8901904ab674264f5eda118bca992a5 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Tue, 30 Jul 2019 19:40:50 +0100 Subject: [PATCH 0754/1201] [openload] Add support for oload.best (#21913) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 11e92e471..030355257 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -368,6 +368,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', 'only_matching': True, + }, { + 'url': 'https://oload.best/embed/kkz9JgVZeWc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 07ab44c420a79d1faae09d00323242746e522c4c Mon Sep 17 00:00:00 2001 From: CeruleanSky Date: Tue, 30 Jul 2019 14:43:49 -0400 Subject: [PATCH 0755/1201] [dlive] Relax _VALID_URL (#21909) --- youtube_dl/extractor/dlive.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py index 8787f15a6..d95c67a5b 100644 --- a/youtube_dl/extractor/dlive.py +++ b/youtube_dl/extractor/dlive.py @@ -9,8 +9,8 @@ from ..utils import int_or_none class DLiveVODIE(InfoExtractor): IE_NAME = 'dlive:vod' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[a-zA-Z0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', 'info_dict': { 'id': '3mTzOl4WR', @@ -20,7 +20,10 @@ class DLiveVODIE(InfoExtractor): 'timestamp': 1562011015, 'uploader_id': 'pdp', } - } + }, { + 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg', + 'only_matching': True, + }] def _real_extract(self, url): uploader_id, vod_id = re.match(self._VALID_URL, url).groups() From 72791634127cc3093592c807225ec684af1cfcc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 02:31:19 +0700 Subject: [PATCH 0756/1201] [tvn24] Fix metadata extraction (closes #21833, closes #21834) --- youtube_dl/extractor/tvn24.py | 42 +++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 6590e1fd0..39f57ae6b 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + NO_DEFAULT, unescapeHTML, ) @@ -20,6 +21,18 @@ class TVN24IE(InfoExtractor): 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', 'thumbnail': 're:https?://.*[.]jpeg', } + }, { + # different layout + 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', + 'info_dict': { + 'id': '1771763', + 'ext': 'mp4', + 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', + 'thumbnail': 're:https?://.*', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', 'only_matching': True, @@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - title = self._og_search_title(webpage) + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r']+class=["\']magazineItemHeader[^>]+>(.+?)(?!\1).+?)\1' % attr, webpage, - name, group='json', fatal=fatal) or '{}', - video_id, transform_source=unescapeHTML, fatal=fatal) + name, group='json', default=default, fatal=fatal) or '{}', + display_id, transform_source=unescapeHTML, fatal=fatal) quality_data = extract_json('data-quality', 'formats') @@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor): }) self._sort_formats(formats) - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_regex( r'\bdata-poster=(["\'])(?P(?!\1).+?)\1', webpage, 'thumbnail', group='url') + video_id = None + share_params = extract_json( - 'data-share-params', 'share params', fatal=False) + 'data-share-params', 'share params', default=None) if isinstance(share_params, dict): - video_id = share_params.get('id') or video_id + video_id = share_params.get('id') + + if not video_id: + video_id = self._search_regex( + r'data-vid-id=["\'](\d+)', webpage, 'video id', + default=None) or self._search_regex( + r',(\d+)\.html', url, 'video id', default=display_id) return { 'id': video_id, From 766c4f6090fdea635f50597a3c5d60643e3a2913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 02:32:02 +0700 Subject: [PATCH 0757/1201] [tvn24] Fix test --- youtube_dl/extractor/tvn24.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 39f57ae6b..de0fb5063 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor): 'id': '1584444', 'ext': 'mp4', 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', - 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', 'thumbnail': 're:https?://.*[.]jpeg', } }, { From 9a37ff82f17383336251afcd80821620dd86ee95 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Wed, 31 Jul 2019 13:45:02 -0700 Subject: [PATCH 0758/1201] [mgtv] Extract format_note (#21881) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit format_note should now show 标清, 高清, 超清, 蓝光, etc. --- youtube_dl/extractor/mgtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 7ae2e3c3b..71fc3ec56 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -82,6 +82,7 @@ class MGTVIE(InfoExtractor): 'http_headers': { 'Referer': url, }, + 'format_note': stream.get('name'), }) self._sort_formats(formats) From 826dcff99cd0a44ec5fa94f0e0201f5115d097ef Mon Sep 17 00:00:00 2001 From: cantandwont <52587695+cantandwont@users.noreply.github.com> Date: Thu, 1 Aug 2019 06:54:39 +1000 Subject: [PATCH 0759/1201] Output batch filename when it could not be read (#21915) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 165c975dd..9a659fc65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -94,7 +94,7 @@ def _real_main(argv=None): if opts.verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: - sys.exit('ERROR: batch file could not be read') + sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From 535111657b507d4f4454160aaf2587e7ce6b9936 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Aug 2019 22:44:38 +0100 Subject: [PATCH 0760/1201] [discovery] use API call for video data extraction(#21808) --- youtube_dl/extractor/discovery.py | 59 ++++++++++++++----------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 9003545ce..c4b90cd90 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -5,14 +5,8 @@ import re import string from .discoverygo import DiscoveryGoBaseIE -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - try_get, -) +from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError from ..compat import compat_HTTPError @@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))''' + )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ - 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', + 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { - 'id': '5a2d9b4d6b66d17a5026e1fd', + 'id': '5a2f35ce6b66d17a5026e29e', 'ext': 'mp4', - 'title': 'Dave Foley', - 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', - 'duration': 608, + 'title': 'Riding with Matthew Perry', + 'description': 'md5:a34333153e79bc4526019a5129e7f878', + 'duration': 84, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE): }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False + _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, path, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - - react_data = self._parse_json(self._search_regex( - r'window\.__reactTransmitPacket\s*=\s*({.+?});', - webpage, 'react data'), display_id) - content_blocks = react_data['layout'][path]['contentBlocks'] - video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0] - video_id = video['id'] + site, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE): if auth_storage_cookie and auth_storage_cookie.value: auth_storage = self._parse_json(compat_urllib_parse_unquote( compat_urllib_parse_unquote(auth_storage_cookie.value)), - video_id, fatal=False) or {} + display_id, fatal=False) or {} access_token = auth_storage.get('a') or auth_storage.get('access_token') if not access_token: access_token = self._download_json( - 'https://%s.com/anonymous' % site, display_id, query={ + 'https://%s.com/anonymous' % site, display_id, + 'Downloading token JSON metadata', query={ 'authRel': 'authorization', - 'client_id': try_get( - react_data, lambda x: x['application']['apiClientId'], - compat_str) or '3020a40c2356a645b4b4', + 'client_id': '3020a40c2356a645b4b4', 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, })['access_token'] - try: - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token + headers = self.geo_verification_headers() + headers['Authorization'] = 'Bearer ' + access_token + try: + video = self._download_json( + self._API_BASE_URL + 'content/videos', + display_id, 'Downloading content JSON metadata', + headers=headers, query={ + 'slug': display_id, + })[0] + video_id = video['id'] stream = self._download_json( - 'https://api.discovery.com/v1/streaming/video/' + video_id, - display_id, headers=headers) + self._API_BASE_URL + 'streaming/video/' + video_id, + display_id, 'Downloading streaming JSON metadata', headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): e_description = self._parse_json( From 07f3a05c87619d01c195cad8cd57ec72291ad78d Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Fri, 2 Aug 2019 06:49:01 +0900 Subject: [PATCH 0761/1201] [CONTRIBUTING.md] Add some more coding conventions (#21939) --- CONTRIBUTING.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd9ccbe96..d0e0a5637 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + From 33b529fabd282a371d3a4c21ee861badd20dae28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:03:25 +0700 Subject: [PATCH 0762/1201] [yandexvideo] Add support for DASH formats (#21971) --- youtube_dl/extractor/yandexvideo.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py index 1aea95383..46529be05 100644 --- a/youtube_dl/extractor/yandexvideo.py +++ b/youtube_dl/extractor/yandexvideo.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, url_or_none, ) @@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor): # episode, sports 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d', 'only_matching': True, + }, { + # DASH with DRM + 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', + 'only_matching': True, }] def _real_extract(self, url): @@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor): 'disable_trackings': 1, })['content'] - m3u8_url = url_or_none(content.get('content_url')) or url_or_none( + content_url = url_or_none(content.get('content_url')) or url_or_none( content['streams'][0]['url']) title = content.get('title') or content.get('computed_title') - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + ext = determine_ext(content_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + content_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mpd': + formats = self._extract_mpd_formats( + content_url, video_id, mpd_id='dash') + else: + formats = [{'url': content_url}] + self._sort_formats(formats) description = content.get('description') From be306d6a313903a3ebdb8a8ff055bb6b58c9f818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:25:01 +0700 Subject: [PATCH 0763/1201] [tvigle] Fix extraction and add support for HLS and DASH formats (closes #21967) --- youtube_dl/extractor/tvigle.py | 53 +++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 3475ef4c3..180259aba 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -9,6 +9,8 @@ from ..utils import ( float_or_none, int_or_none, parse_age_limit, + try_get, + url_or_none, ) @@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.tvigle.ru/video/sokrat/', - 'md5': '36514aed3657d4f70b4b2cef8eb520cd', 'info_dict': { 'id': '1848932', 'display_id': 'sokrat', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Сократ', 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17', 'duration': 6586, @@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor): }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', - 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', 'info_dict': { 'id': '5142516', 'ext': 'flv', @@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( (r']+class=["\']player["\'][^>]+id=["\'](\d+)', - r'var\s+cloudId\s*=\s*["\'](\d+)', + r'cloudId\s*=\s*["\'](\d+)', r'class="video-preview current_playing" id="(\d+)"'), webpage, 'video id') @@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor): age_limit = parse_age_limit(item.get('ageRestrictions')) formats = [] - for vcodec, fmts in item['videos'].items(): + for vcodec, url_or_fmts in item['videos'].items(): if vcodec == 'hls': - continue - for format_id, video_url in fmts.items(): - if format_id == 'm3u8': + m3u8_url = url_or_none(url_or_fmts) + if not m3u8_url: continue - height = self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None) - formats.append({ - 'url': video_url, - 'format_id': '%s-%s' % (vcodec, format_id), - 'vcodec': vcodec, - 'height': int_or_none(height), - 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), - }) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif vcodec == 'dash': + mpd_url = url_or_none(url_or_fmts) + if not mpd_url: + continue + formats.extend(self._extract_mpd_formats( + mpd_url, video_id, mpd_id='dash', fatal=False)) + else: + if not isinstance(url_or_fmts, dict): + continue + for format_id, video_url in url_or_fmts.items(): + if format_id == 'm3u8': + continue + video_url = url_or_none(video_url) + if not video_url: + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + filesize = int_or_none(try_get( + item, lambda x: x['video_files_size'][vcodec][format_id])) + formats.append({ + 'url': video_url, + 'format_id': '%s-%s' % (vcodec, format_id), + 'vcodec': vcodec, + 'height': int_or_none(height), + 'filesize': filesize, + }) self._sort_formats(formats) return { From 2e9522b06173f2c5cfb2ba020958242d2a93feb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:36:32 +0700 Subject: [PATCH 0764/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6f1f7e38..c650e25d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version + +Extractors ++ [tvigle] Add support for HLS and DASH formats (#21967) +* [tvigle] Fix extraction (#21967) ++ [yandexvideo] Add support for DASH formats (#21971) +* [discovery] Use API call for video data extraction (#21808) ++ [mgtv] Extract format_note (#21881) +* [tvn24] Fix metadata extraction (#21833, #21834) +* [dlive] Relax URL regular expression (#21909) ++ [openload] Add support for oload.best (#21913) +* [youtube] Improve metadata extraction for age gate content (#21943) + + version 2019.07.30 Extractors From 4f2d735803f723a8d8d6ffbbb1dd6b203f71af58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:37:54 +0700 Subject: [PATCH 0765/1201] release 2019.08.02 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +- .../ISSUE_TEMPLATE/2_site_support_request.md | 4 +- .../ISSUE_TEMPLATE/3_site_feature_request.md | 4 +- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 +- CONTRIBUTING.md | 64 ------------------- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 8 files changed, 14 insertions(+), 78 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index ccd033716..4d3894ad3 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.30 + [debug] youtube-dl version 2019.08.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8709937ad..796e11e54 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index c3a555ed3..aa2348548 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 07042a466..5b2501a65 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.30 + [debug] youtube-dl version 2019.08.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4cf75a2eb..d1758a95c 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d0e0a5637..cd9ccbe96 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -366,67 +366,3 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` -### Inline values - -Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. - -#### Example - -Correct: - -```python -title = self._html_search_regex(r'([^<]+)', webpage, 'title') -``` - -Incorrect: - -```python -TITLE_RE = r'([^<]+)' -# ...some lines of code... -title = self._html_search_regex(TITLE_RE, webpage, 'title') -``` - -### Collapse fallbacks - -Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values. - -#### Example - -Good: - -```python -description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], - webpage, 'description', default=None) -``` - -Unwieldy: - -```python -description = ( - self._og_search_description(webpage, default=None) - or self._html_search_meta('description', webpage, default=None) - or self._html_search_meta('twitter:description', webpage, default=None)) -``` - -### Trailing parentheses - -Always move trailing parentheses after the last argument. - -#### Example - -Correct: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list) -``` - -Incorrect: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list, -) -``` - diff --git a/ChangeLog b/ChangeLog index c650e25d5..7db147498 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.08.02 Extractors + [tvigle] Add support for HLS and DASH formats (#21967) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 04dc83605..0f7fdb23d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.30' +__version__ = '2019.08.02' From d9d3a5a816253f14ee33623662690293365013e0 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Fri, 2 Aug 2019 05:54:56 +0700 Subject: [PATCH 0766/1201] [README.md] Move code from #21939 to the right place --- README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/README.md b/README.md index 8c48a3012..c39b13616 100644 --- a/README.md +++ b/README.md @@ -1216,6 +1216,72 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. From 995f319b0605188d145c78b88319d38b69130132 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Aug 2019 18:08:26 +0100 Subject: [PATCH 0767/1201] [discovery] limit video data by show slug(closes #21980) --- youtube_dl/extractor/discovery.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index c4b90cd90..6287ca685 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -34,7 +34,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)''' + )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { @@ -53,13 +53,17 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', 'only_matching': True, + }, { + # using `show_slug` is important to get the correct video data + 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() + site, show_slug, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -91,6 +95,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): display_id, 'Downloading content JSON metadata', headers=headers, query={ 'slug': display_id, + 'show_slug': show_slug, })[0] video_id = video['id'] stream = self._download_json( From 5efbc1366f4e4d9d4969cbfb404657349a5b3f99 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Aug 2019 19:38:35 +0100 Subject: [PATCH 0768/1201] [roosterteeth] add support for watch URLs --- youtube_dl/extractor/roosterteeth.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index d3eeeba62..8d88ee499 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -17,7 +17,7 @@ from ..utils import ( class RoosterTeethIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P[^/?#&]+)' _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ @@ -49,6 +49,9 @@ class RoosterTeethIE(InfoExtractor): # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, }] def _login(self): From eb9c9c74a6a2f9e13d0efaef304416b30354e5a3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 3 Aug 2019 10:29:20 +0100 Subject: [PATCH 0769/1201] [vimeo] fix album extraction closes #1933 closes #15704 closes #15855 closes #18967 closes #21986 --- youtube_dl/extractor/vimeo.py | 58 +++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b5b44a79a..ddf375c6c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -2,12 +2,14 @@ from __future__ import unicode_literals import base64 +import functools import json import re import itertools from .common import InfoExtractor from ..compat import ( + compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -19,6 +21,7 @@ from ..utils import ( int_or_none, merge_dicts, NO_DEFAULT, + OnDemandPagedList, parse_filesize, qualities, RegexNotFoundError, @@ -98,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): webpage, 'vuid', group='vuid') return xsrft, vuid + def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', + webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + if vimeo_config: + return self._parse_json(vimeo_config, video_id) + def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) @@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?Ppro)?\.com/ - (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) - vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, - 'vimeo config', default=None) + vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + seed_status = vimeo_config.get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'
    ', webpage, 'description', default=None) or self._og_search_description(webpage) From 2906631e1230617883cdef8e227b369a9c98c9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Oct 2019 23:16:46 +0700 Subject: [PATCH 0834/1201] [viewlift] Fix URL matching --- youtube_dl/extractor/viewlift.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py index 391419d9e..851ad936c 100644 --- a/youtube_dl/extractor/viewlift.py +++ b/youtube_dl/extractor/viewlift.py @@ -179,6 +179,10 @@ class ViewLiftIE(ViewLiftBaseIE): 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url) + def _real_extract(self, url): domain, display_id = re.match(self._VALID_URL, url).groups() From 74bc299453884bc4e802ca225815d3134b9510cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Oct 2019 02:03:22 +0700 Subject: [PATCH 0835/1201] [teachable] Skip login when already logged in (closes #22572) --- youtube_dl/extractor/teachable.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index c1a9deafe..7d2e34b3b 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -48,6 +48,16 @@ class TeachableBaseIE(InfoExtractor): 'https://%s/sign_in' % site, None, 'Downloading %s login page' % site) + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']user-signout', + r']+\bhref=["\']/sign_out', + r'Log\s+[Oo]ut\s*<')) + + if is_logged(login_page): + self._logged_in = True + return + login_url = compat_str(urlh.geturl()) login_form = self._hidden_inputs(login_page) @@ -78,10 +88,7 @@ class TeachableBaseIE(InfoExtractor): 'Go to https://%s/ and accept.' % (site, site), expected=True) # Successful login - if any(re.search(p, response) for p in ( - r'class=["\']user-signout', - r']+\bhref=["\']/sign_out', - r'>\s*Log out\s*<')): + if is_logged(response): self._logged_in = True return From 25e911a968f6675a2c06f0d60a09a86972aadc40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Oct 2019 00:53:07 +0700 Subject: [PATCH 0836/1201] [extractor/common] Make _is_valid_url more relaxed --- youtube_dl/extractor/common.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 859786617..50d48c40d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1424,12 +1424,10 @@ class InfoExtractor(object): try: self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) return True - except ExtractorError as e: - if isinstance(e.cause, compat_urllib_error.URLError): - self.to_screen( - '%s: %s URL is invalid, skipping' % (video_id, item)) - return False - raise + except ExtractorError: + self.to_screen( + '%s: %s URL is invalid, skipping' % (video_id, item)) + return False def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ From aaf9d904aa77bfe60714393c0ab413c32cca8a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Oct 2019 00:55:46 +0700 Subject: [PATCH 0837/1201] [orf:tvthek] Make manifest requests non fatal (refs #22578) --- youtube_dl/extractor/orf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 499be0029..3425f7602 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -86,12 +86,13 @@ class ORFTVthekIE(InfoExtractor): if value: format_id_list.append(value) format_id = '-'.join(format_id_list) - if determine_ext(fd['src']) == 'm3u8': + ext = determine_ext(src) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - fd['src'], video_id, 'mp4', m3u8_id=format_id)) - elif determine_ext(fd['src']) == 'f4m': + src, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': formats.extend(self._extract_f4m_formats( - fd['src'], video_id, f4m_id=format_id)) + src, video_id, f4m_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, From 894b3826f5a2e1742010c20554a6a1b9e98a51ee Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Fri, 4 Oct 2019 18:52:15 +0700 Subject: [PATCH 0838/1201] [youtube] Add support for yt.lelux.fi (#22597) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a3364a14e..6bd56f340 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -391,6 +391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| (?:www\.)?yt\.elukerio\.org/| + (?:www\.)?yt\.lelux\.fi/| (?:www\.)?kgg2m7yk5aybusll\.onion/| (?:www\.)?qklhadlycap4cnod\.onion/| (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| From ca20b1304818a8d2d8eadfbe6f5387284e7ebc4d Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Fri, 4 Oct 2019 13:57:18 +0200 Subject: [PATCH 0839/1201] [nrktv:seriebase] Fix extraction (#22596) --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 5f43e692f..60933f069 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -406,7 +406,7 @@ class NRKTVSerieBaseIE(InfoExtractor): def _extract_series(self, webpage, display_id, fatal=True): config = self._parse_json( self._search_regex( - (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;', + (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', r'({.+?})\s*,\s*"[^"]+"\s*\)\s*'), webpage, 'config', default='{}' if not fatal else NO_DEFAULT), display_id, fatal=False) From 9679a62a283f1384c7572ec78f7996e1276d5d7a Mon Sep 17 00:00:00 2001 From: kr4ssi <44404263+kr4ssi@users.noreply.github.com> Date: Fri, 4 Oct 2019 13:57:51 +0200 Subject: [PATCH 0840/1201] [openload] Add support for oload.monster (#22592) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 1fe581780..66e38cdb4 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -246,7 +246,7 @@ class OpenloadIE(InfoExtractor): _DOMAINS = r''' (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|press|pw|life|live|space|services|website|vip)| + oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)| oladblock\.(?:services|xyz|me)|openloed\.co ) ''' @@ -365,6 +365,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.online/f/W8o2UfN1vNY/', 'only_matching': True, + }, { + 'url': 'https://oload.monster/f/W8o2UfN1vNY/', + 'only_matching': True, }, { 'url': 'https://oload.press/embed/drTBl1aOTvk/', 'only_matching': True, From 76e510b92c4a1c4b0001f892504ba2cbb4b8d486 Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Fri, 4 Oct 2019 19:01:03 +0700 Subject: [PATCH 0841/1201] [youtube] Remove support for invidious.enkirton.net (#22543) --- youtube_dl/extractor/youtube.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6bd56f340..5e397324b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -383,7 +383,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:www|no)\.)?invidiou\.sh/| (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| - (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| (?:www\.)?invidious\.nixnet\.xyz/| From 4e72d02f39f0d8e9ae9bbe8233c157bef3b58bdf Mon Sep 17 00:00:00 2001 From: Stephan Date: Fri, 4 Oct 2019 14:05:35 +0200 Subject: [PATCH 0842/1201] [xvideos] Extend _VALID_URL (#22471) --- youtube_dl/extractor/xvideos.py | 39 ++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 166bcf443..8fc64914c 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -17,7 +17,8 @@ class XVideosIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?xvideos\.com/video| + (?:[^/]+\.)?xvideos2?\.com/video| + (?:www\.)?xvideos\.es/video| flashservice\.xvideos\.com/embedframe/| static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= ) @@ -39,6 +40,42 @@ class XVideosIE(InfoExtractor): }, { 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'only_matching': True, + }, { + 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True }] def _real_extract(self, url): From c2915de82e0ee793330d553899347ec54a4b834e Mon Sep 17 00:00:00 2001 From: Patrice Levesque Date: Fri, 4 Oct 2019 08:14:31 -0400 Subject: [PATCH 0843/1201] [telequebec] Add support for coucou.telequebec.tv (#22482) --- youtube_dl/extractor/telequebec.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 6965c127b..911385d01 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -22,7 +22,13 @@ class TeleQuebecBaseIE(InfoExtractor): class TeleQuebecIE(TeleQuebecBaseIE): - _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + zonevideo\.telequebec\.tv/media| + coucou\.telequebec\.tv/videos + )/(?P\d+) + ''' _TESTS = [{ # available till 01.01.2023 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', @@ -41,6 +47,9 @@ class TeleQuebecIE(TeleQuebecBaseIE): # no description 'url': 'http://zonevideo.telequebec.tv/media/30261', 'only_matching': True, + }, { + 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain', + 'only_matching': True, }] def _real_extract(self, url): From b64045cd2a564bb44ef917803678ca362f412eb4 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 4 Oct 2019 13:17:16 +0100 Subject: [PATCH 0844/1201] [peertube] Update instances (#22414) --- youtube_dl/extractor/peertube.py | 397 +++++++++++++++++++++++++++---- 1 file changed, 347 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index b50543e32..d3a83ea2b 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -18,81 +18,385 @@ from ..utils import ( class PeerTubeIE(InfoExtractor): _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances + peertube\.rainbowswingers\.net| + tube\.stanisic\.nl| + peer\.suiri\.us| + medias\.libox\.fr| + videomensoif\.ynh\.fr| + peertube\.travelpandas\.eu| + peertube\.rachetjay\.fr| + peertube\.montecsys\.fr| + tube\.eskuero\.me| + peer\.tube| + peertube\.umeahackerspace\.se| + tube\.nx-pod\.de| + video\.monsieurbidouille\.fr| tube\.openalgeria\.org| - peertube\.pointsecu\.fr| + vid\.lelux\.fi| + video\.anormallostpod\.ovh| + tube\.crapaud-fou\.org| + peertube\.stemy\.me| + lostpod\.space| + exode\.me| + peertube\.snargol\.com| + vis\.ion\.ovh| + videosdulib\.re| + v\.mbius\.io| + videos\.judrey\.eu| + peertube\.osureplayviewer\.xyz| + peertube\.mathieufamily\.ovh| + www\.videos-libr\.es| + fightforinfo\.com| + peertube\.fediverse\.ru| + peertube\.oiseauroch\.fr| + video\.nesven\.eu| + v\.bearvideo\.win| + video\.qoto\.org| + justporn\.cc| + video\.vny\.fr| + peervideo\.club| + tube\.taker\.fr| + peertube\.chantierlibre\.org| + tube\.ipfixe\.info| + tube\.kicou\.info| + tube\.dodsorf\.as| + videobit\.cc| + video\.yukari\.moe| + videos\.elbinario\.net| + hkvideo\.live| + pt\.tux\.tf| + www\.hkvideo\.live| + FIGHTFORINFO\.com| + pt\.765racing\.com| + peertube\.gnumeria\.eu\.org| + nordenmedia\.com| + peertube\.co\.uk| + tube\.darfweb\.eu| + tube\.kalah-france\.org| + 0ch\.in| + vod\.mochi\.academy| + film\.node9\.org| + peertube\.hatthieves\.es| + video\.fitchfamily\.org| + peertube\.ddns\.net| + video\.ifuncle\.kr| + video\.fdlibre\.eu| + tube\.22decembre\.eu| + peertube\.harmoniescreatives\.com| + tube\.fabrigli\.fr| + video\.thedwyers\.co| + video\.bruitbruit\.com| + peertube\.foxfam\.club| + peer\.philoxweb\.be| + videos\.bugs\.social| + peertube\.malbert\.xyz| + peertube\.bilange\.ca| + libretube\.net| + diytelevision\.com| + peertube\.fedilab\.app| + libre\.video| + video\.mstddntfdn\.online| + us\.tv| + peertube\.sl-network\.fr| + peertube\.dynlinux\.io| + peertube\.david\.durieux\.family| + peertube\.linuxrocks\.online| + peerwatch\.xyz| + v\.kretschmann\.social| + tube\.otter\.sh| + yt\.is\.nota\.live| + tube\.dragonpsi\.xyz| + peertube\.boneheadmedia\.com| + videos\.funkwhale\.audio| + watch\.44con\.com| + peertube\.gcaillaut\.fr| + peertube\.icu| + pony\.tube| + spacepub\.space| + tube\.stbr\.io| + v\.mom-gay\.faith| + tube\.port0\.xyz| + peertube\.simounet\.net| + play\.jergefelt\.se| + peertube\.zeteo\.me| + tube\.danq\.me| + peertube\.kerenon\.com| + tube\.fab-l3\.org| + tube\.calculate\.social| + peertube\.mckillop\.org| + tube\.netzspielplatz\.de| + vod\.ksite\.de| + peertube\.laas\.fr| + tube\.govital\.net| + peertube\.stephenson\.cc| + bistule\.nohost\.me| + peertube\.kajalinifi\.de| + video\.ploud\.jp| + video\.omniatv\.com| + peertube\.ffs2play\.fr| + peertube\.leboulaire\.ovh| + peertube\.tronic-studio\.com| + peertube\.public\.cat| + peertube\.metalbanana\.net| + video\.1000i100\.fr| + peertube\.alter-nativ-voll\.de| + tube\.pasa\.tf| + tube\.worldofhauru\.xyz| + pt\.kamp\.site| + peertube\.teleassist\.fr| + videos\.mleduc\.xyz| + conf\.tube| + media\.privacyinternational\.org| + pt\.forty-two\.nl| + video\.halle-leaks\.de| + video\.grosskopfgames\.de| + peertube\.schaeferit\.de| + peertube\.jackbot\.fr| + tube\.extinctionrebellion\.fr| + peertube\.f-si\.org| + video\.subak\.ovh| + videos\.koweb\.fr| + peertube\.zergy\.net| + peertube\.roflcopter\.fr| + peertube\.floss-marketing-school\.com| + vloggers\.social| + peertube\.iriseden\.eu| + videos\.ubuntu-paris\.org| + peertube\.mastodon\.host| + armstube\.com| + peertube\.s2s\.video| + peertube\.lol| + tube\.open-plug\.eu| + open\.tube| + peertube\.ch| + peertube\.normandie-libre\.fr| + peertube\.slat\.org| + video\.lacaveatonton\.ovh| + peertube\.uno| + peertube\.servebeer\.com| + peertube\.fedi\.quebec| + tube\.h3z\.jp| + tube\.plus200\.com| + peertube\.eric\.ovh| + tube\.metadocs\.cc| + tube\.unmondemeilleur\.eu| + gouttedeau\.space| + video\.antirep\.net| + nrop\.cant\.at| + tube\.ksl-bmx\.de| + tube\.plaf\.fr| + tube\.tchncs\.de| + video\.devinberg\.com| + hitchtube\.fr| + peertube\.kosebamse\.com| + yunopeertube\.myddns\.me| + peertube\.varney\.fr| + peertube\.anon-kenkai\.com| + tube\.maiti\.info| + tubee\.fr| + videos\.dinofly\.com| + toobnix\.org| + videotape\.me| + voca\.tube| + video\.heromuster\.com| + video\.lemediatv\.fr| + video\.up\.edu\.ph| + balafon\.video| + video\.ivel\.fr| + thickrips\.cloud| + pt\.laurentkruger\.fr| + video\.monarch-pass\.net| + peertube\.artica\.center| + video\.alternanet\.fr| + indymotion\.fr| + fanvid\.stopthatimp\.net| + video\.farci\.org| + v\.lesterpig\.com| + video\.okaris\.de| + tube\.pawelko\.net| + peertube\.mablr\.org| + tube\.fede\.re| + pytu\.be| + evertron\.tv| + devtube\.dev-wiki\.de| + raptube\.antipub\.org| + video\.selea\.se| + peertube\.mygaia\.org| + video\.oh14\.de| + peertube\.livingutopia\.org| + peertube\.the-penguin\.de| + tube\.thechangebook\.org| + tube\.anjara\.eu| + pt\.pube\.tk| + video\.samedi\.pm| + mplayer\.demouliere\.eu| + widemus\.de| + peertube\.me| + peertube\.zapashcanon\.fr| + video\.latavernedejohnjohn\.fr| + peertube\.pcservice46\.fr| + peertube\.mazzonetto\.eu| + video\.irem\.univ-paris-diderot\.fr| + video\.livecchi\.cloud| + alttube\.fr| + video\.coop\.tools| + video\.cabane-libre\.org| + peertube\.openstreetmap\.fr| + videos\.alolise\.org| + irrsinn\.video| + video\.antopie\.org| + scitech\.video| + tube2\.nemsia\.org| + video\.amic37\.fr| + peertube\.freeforge\.eu| + video\.arbitrarion\.com| + video\.datsemultimedia\.com| + stoptrackingus\.tv| + peertube\.ricostrongxxx\.com| + docker\.videos\.lecygnenoir\.info| + peertube\.togart\.de| + tube\.postblue\.info| + videos\.domainepublic\.net| + peertube\.cyber-tribal\.com| + video\.gresille\.org| + peertube\.dsmouse\.net| + cinema\.yunohost\.support| + tube\.theocevaer\.fr| + repro\.video| + tube\.4aem\.com| + quaziinc\.com| + peertube\.metawurst\.space| + videos\.wakapo\.com| + video\.ploud\.fr| + video\.freeradical\.zone| + tube\.valinor\.fr| + refuznik\.video| + pt\.kircheneuenburg\.de| + peertube\.asrun\.eu| + peertube\.lagob\.fr| + videos\.side-ways\.net| + 91video\.online| + video\.valme\.io| + video\.taboulisme\.com| + videos-libr\.es| + tv\.mooh\.fr| + nuage\.acostey\.fr| + video\.monsieur-a\.fr| + peertube\.librelois\.fr| + videos\.pair2jeux\.tube| + videos\.pueseso\.club| + peer\.mathdacloud\.ovh| + media\.assassinate-you\.net| + vidcommons\.org| + ptube\.rousset\.nom\.fr| + tube\.cyano\.at| + videos\.squat\.net| + video\.iphodase\.fr| + peertube\.makotoworkshop\.org| + peertube\.serveur\.slv-valbonne\.fr| + vault\.mle\.party| + hostyour\.tv| + videos\.hack2g2\.fr| + libre\.tube| + pire\.artisanlogiciel\.net| + videos\.numerique-en-commun\.fr| + video\.netsyms\.com| + video\.die-partei\.social| + video\.writeas\.org| + peertube\.swarm\.solvingmaz\.es| + tube\.pericoloso\.ovh| + watching\.cypherpunk\.observer| + videos\.adhocmusic\.com| + tube\.rfc1149\.net| + peertube\.librelabucm\.org| + videos\.numericoop\.fr| + peertube\.koehn\.com| + peertube\.anarchmusicall\.net| + tube\.kampftoast\.de| + vid\.y-y\.li| + peertube\.xtenz\.xyz| + diode\.zone| + tube\.egf\.mn| + peertube\.nomagic\.uk| + visionon\.tv| + videos\.koumoul\.com| + video\.rastapuls\.com| + video\.mantlepro\.com| + video\.deadsuperhero\.com| + peertube\.musicstudio\.pro| + peertube\.we-keys\.fr| + artitube\.artifaille\.fr| + peertube\.ethernia\.net| + tube\.midov\.pl| + peertube\.fr| + watch\.snoot\.tube| + peertube\.donnadieu\.fr| + argos\.aquilenet\.fr| + tube\.nemsia\.org| + tube\.bruniau\.net| + videos\.darckoune\.moe| + tube\.traydent\.info| + dev\.videos\.lecygnenoir\.info| + peertube\.nayya\.org| + peertube\.live| + peertube\.mofgao\.space| + video\.lequerrec\.eu| + peertube\.amicale\.net| + aperi\.tube| + tube\.ac-lyon\.fr| + video\.lw1\.at| + www\.yiny\.org| + videos\.pofilo\.fr| + tube\.lou\.lt| + choob\.h\.etbus\.ch| + tube\.hoga\.fr| + peertube\.heberge\.fr| + video\.obermui\.de| + videos\.cloudfrancois\.fr| + betamax\.video| + video\.typica\.us| + tube\.piweb\.be| + video\.blender\.org| + peertube\.cat| + tube\.kdy\.ch| + pe\.ertu\.be| + peertube\.social| + videos\.lescommuns\.org| + tv\.datamol\.org| + videonaute\.fr| + dialup\.express| peertube\.nogafa\.org| - peertube\.pl| megatube\.lilomoino\.fr| peertube\.tamanoir\.foucry\.net| - peertube\.inapurna\.org| - peertube\.netzspielplatz\.de| - video\.deadsuperhero\.com| peertube\.devosi\.org| peertube\.1312\.media| - tube\.worldofhauru\.xyz| tube\.bootlicker\.party| skeptikon\.fr| - peertube\.geekshell\.fr| - tube\.opportunis\.me| - peertube\.peshane\.net| video\.blueline\.mg| tube\.homecomputing\.fr| - videos\.cloudfrancois\.fr| - peertube\.viviers-fibre\.net| tube\.ouahpiti\.info| video\.tedomum\.net| video\.g3l\.org| fontube\.fr| peertube\.gaialabs\.ch| - peertube\.extremely\.online| - peertube\.public-infrastructure\.eu| tube\.kher\.nl| peertube\.qtg\.fr| - tube\.22decembre\.eu| - facegirl\.me| video\.migennes\.net| - janny\.moe| tube\.p2p\.legal| - video\.atlanti\.se| troll\.tv| - peertube\.geekael\.fr| - vid\.leotindall\.com| - video\.anormallostpod\.ovh| - p-tube\.h3z\.jp| - tube\.darfweb\.eu| videos\.iut-orsay\.fr| peertube\.solidev\.net| - videos\.symphonie-of-code\.fr| - testtube\.ortg\.de| videos\.cemea\.org| - peertube\.gwendalavir\.eu| video\.passageenseine\.fr| videos\.festivalparminous\.org| peertube\.touhoppai\.moe| - peertube\.duckdns\.org| sikke\.fi| - peertube\.mastodon\.host| - firedragonvideos\.com| - vidz\.dou\.bet| - peertube\.koehn\.com| peer\.hostux\.social| share\.tube| peertube\.walkingmountains\.fr| - medias\.libox\.fr| - peertube\.moe| - peertube\.xyz| - jp\.peertube\.network| videos\.benpro\.fr| - tube\.otter\.sh| - peertube\.angristan\.xyz| peertube\.parleur\.net| - peer\.ecutsa\.fr| peertube\.heraut\.eu| - peertube\.tifox\.fr| - peertube\.maly\.io| - vod\.mochi\.academy| - exode\.me| - coste\.video| tube\.aquilenet\.fr| peertube\.gegeweb\.eu| framatube\.org| @@ -100,18 +404,11 @@ class PeerTubeIE(InfoExtractor): tube\.conferences-gesticulees\.net| peertube\.datagueule\.tv| video\.lqdn\.fr| - meilleurtube\.delire\.party| tube\.mochi\.academy| - peertube\.dav\.li| media\.zat\.im| - pytu\.be| - peertube\.valvin\.fr| - peertube\.nsa\.ovh| video\.colibris-outilslibres\.org| - video\.hispagatos\.org| tube\.svnet\.fr| peertube\.video| - videos\.lecygnenoir\.info| peertube3\.cpy\.re| peertube2\.cpy\.re| videos\.tcit\.fr| @@ -126,7 +423,7 @@ class PeerTubeIE(InfoExtractor): (?P%s) ''' % (_INSTANCES_RE, _UUID_RE) _TESTS = [{ - 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', + 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', 'md5': '80f24ff364cc9d333529506a263e7feb', 'info_dict': { 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', From fd4db1ebc231b65bea91add4cd55ce564b05eee3 Mon Sep 17 00:00:00 2001 From: axelerometer <54915681+axelerometer@users.noreply.github.com> Date: Fri, 4 Oct 2019 15:22:01 +0300 Subject: [PATCH 0845/1201] [chaturbate] Extend _VALID_URL (#22309) --- youtube_dl/extractor/chaturbate.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index e2b828d8a..656e715ae 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class ChaturbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.chaturbate.com/siswet19/', 'info_dict': { @@ -21,6 +21,9 @@ class ChaturbateIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Room is offline', + }, { + 'url': 'https://chaturbate.com/fullvideo/?b=caylin', + 'only_matching': True, }, { 'url': 'https://en.chaturbate.com/siswet19/', 'only_matching': True, @@ -32,7 +35,8 @@ class ChaturbateIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=self.geo_verification_headers()) + 'https://chaturbate.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) m3u8_urls = [] From 0b87beefe60fb6ae52529603fd5826364146dfb7 Mon Sep 17 00:00:00 2001 From: Anh Nhan Nguyen Date: Fri, 4 Oct 2019 14:27:58 +0200 Subject: [PATCH 0846/1201] [gfycat] Extend _VALID_URL (#22225) --- youtube_dl/extractor/gfycat.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index bbe3cb283..18a30fe67 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#\.]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -53,6 +53,12 @@ class GfycatIE(InfoExtractor): }, { 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball', 'only_matching': True + }, { + 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif', + 'only_matching': True + }, { + 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4', + 'only_matching': True }] def _real_extract(self, url): From 3a37f2c3be16bb75a12d0617b5bc80ee6cab0f61 Mon Sep 17 00:00:00 2001 From: bitraid Date: Fri, 4 Oct 2019 15:48:20 +0300 Subject: [PATCH 0847/1201] [wimp] Remove extractor (closes #22088) (#22091) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/wimp.py | 54 ------------------------------ 2 files changed, 55 deletions(-) delete mode 100644 youtube_dl/extractor/wimp.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 44120cae2..a2d6e5314 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1413,7 +1413,6 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE -from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wsj import ( diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py deleted file mode 100644 index ea234e3c5..000000000 --- a/youtube_dl/extractor/wimp.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .youtube import YoutubeIE - - -class WimpIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P[^/]+)' - _TESTS = [{ - 'url': 'http://www.wimp.com/maru-is-exhausted/', - 'md5': 'ee21217ffd66d058e8b16be340b74883', - 'info_dict': { - 'id': 'maru-is-exhausted', - 'ext': 'mp4', - 'title': 'Maru is exhausted.', - 'description': 'md5:57e099e857c0a4ea312542b684a869b8', - } - }, { - 'url': 'http://www.wimp.com/clowncar/', - 'md5': '5c31ad862a90dc5b1f023956faec13fe', - 'info_dict': { - 'id': 'cG4CEr2aiSg', - 'ext': 'webm', - 'title': 'Basset hound clown car...incredible!', - 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom', - 'upload_date': '20140303', - 'uploader': 'Gretchen Hoey', - 'uploader_id': 'gretchenandjeff1', - }, - 'add_ie': ['Youtube'], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - youtube_id = self._search_regex( - (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']", - r'data-id=["\']([0-9A-Za-z_-]{11})'), - webpage, 'video URL', default=None) - if youtube_id: - return self.url_result(youtube_id, YoutubeIE.ie_key()) - - info_dict = self._extract_jwplayer_data( - webpage, video_id, require_title=False) - - info_dict.update({ - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - }) - - return info_dict From 05446d483d089d0bc7fa3037900dadc856d3e687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Oct 2019 20:14:45 +0700 Subject: [PATCH 0848/1201] [telequebec:squat] Add support for squat.telequebec.tv (closes #18503) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/telequebec.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a2d6e5314..8d3e433c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1129,6 +1129,7 @@ from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .telequebec import ( TeleQuebecIE, + TeleQuebecSquatIE, TeleQuebecEmissionIE, TeleQuebecLiveIE, ) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 911385d01..ae9f66787 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, smuggle_url, try_get, + unified_timestamp, ) @@ -70,6 +71,52 @@ class TeleQuebecIE(TeleQuebecBaseIE): return info +class TeleQuebecSquatIE(InfoExtractor): + _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P\d+)' + _TESTS = [{ + 'url': 'https://squat.telequebec.tv/videos/9314', + 'info_dict': { + 'id': 'd59ae78112d542e793d83cc9d3a5b530', + 'ext': 'mp4', + 'title': 'Poupeflekta', + 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75', + 'duration': 1351, + 'timestamp': 1569057600, + 'upload_date': '20190921', + 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir', + 'season': 'Saison 3', + 'season_number': 3, + 'episode_number': 57, + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id, + video_id) + + media_id = video['sourceId'] + + return { + '_type': 'url_transparent', + 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id, + 'ie_key': TeleQuebecIE.ie_key(), + 'id': media_id, + 'title': video.get('titre'), + 'description': video.get('description'), + 'timestamp': unified_timestamp(video.get('datePublication')), + 'series': video.get('container'), + 'season': video.get('saison'), + 'season_number': int_or_none(video.get('noSaison')), + 'episode_number': int_or_none(video.get('episode')), + } + + class TeleQuebecEmissionIE(TeleQuebecBaseIE): _VALID_URL = r'''(?x) https?:// From 4bf568d36cf516b38e4634e07bd8b4c3d33324f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Oct 2019 21:43:31 +0700 Subject: [PATCH 0849/1201] [pornhub:uservideos:upload] Fix extraction (closes #22619) --- youtube_dl/extractor/pornhub.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 11b8cfcf7..ba0ad7da2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -403,6 +403,15 @@ class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') @@ -411,13 +420,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) - page_url = self._make_page_url(url) - entries = [] for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, item_id, 'Downloading page %d' % page_num, + url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -547,18 +554,6 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubPagedVideoListIE, cls).suitable(url)) - def _make_page_url(self, url): - return url - - @staticmethod - def _has_more(webpage): - return re.search( - r'''(?x) - ]+\bclass=["\']page_next| - ]+\brel=["\']next| - ]+\bid=["\']moreDataBtn - ''', webpage) is not None - class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' @@ -572,11 +567,3 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'only_matching': True, }] - - def _make_page_url(self, url): - mobj = re.match(self._VALID_URL, url) - return '%s/ajax' % mobj.group('url') - - @staticmethod - def _has_more(webpage): - return True From 560d3b7d7c86a0bfff36d59cb977fd3c01b10ad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Oct 2019 22:04:49 +0700 Subject: [PATCH 0850/1201] [redtube] Improve metadata extraction (closes #22492, closes #22615) --- youtube_dl/extractor/redtube.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 10311a81a..5c84028ef 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + merge_dicts, str_to_int, unified_strdate, url_or_none, @@ -45,11 +46,14 @@ class RedTubeIE(InfoExtractor): if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): raise ExtractorError('Video %s has been removed' % video_id, expected=True) - title = self._html_search_regex( - (r']+class="(?:video_title_text|videoTitle)[^"]*">(?P(?:(?!\1).)+)</h\1>', - r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), - webpage, 'title', group='title', - default=None) or self._og_search_title(webpage) + info = self._search_json_ld(webpage, video_id, default={}) + + if not info.get('title'): + info['title'] = self._html_search_regex( + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) formats = [] sources = self._parse_json( @@ -88,28 +92,28 @@ class RedTubeIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( - r'<span[^>]+>ADDED ([^<]+)<', - webpage, 'upload date', fatal=False)) + r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', + webpage, 'upload date', default=None)) duration = int_or_none(self._og_search_property( 'video:duration', webpage, default=None) or self._search_regex( r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', - r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'), - webpage, 'view count', fatal=False)) + r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', + r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), + webpage, 'view count', default=None)) # No self-labeling, but they describe themselves as # "Home of Videos Porno" age_limit = 18 - return { + return merge_dicts(info, { 'id': video_id, 'ext': 'mp4', - 'title': title, 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'age_limit': age_limit, 'formats': formats, - } + }) From d4bb825b83a87813f54d007febd79d2f3dcee7b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 9 Oct 2019 11:07:46 +0100 Subject: [PATCH 0851/1201] [globo] fix format extraction(closes #20319) --- youtube_dl/extractor/globo.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index fb8f7679b..b9c400a57 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -96,6 +96,8 @@ class GloboIE(InfoExtractor): video = self._download_json( 'http://api.globovideos.com/videos/%s/playlist' % video_id, video_id)['videos'][0] + if video.get('encrypted') is True: + raise ExtractorError('This video is DRM protected.', expected=True) title = video['title'] @@ -109,8 +111,8 @@ class GloboIE(InfoExtractor): security = self._download_json( 'http://security.video.globo.com/videos/%s/hash' % video_id, video_id, 'Downloading security hash for %s' % resource_id, query={ - 'player': 'flash', - 'version': '17.0.0.132', + 'player': 'desktop', + 'version': '5.19.1', 'resource_id': resource_id, }) @@ -122,19 +124,18 @@ class GloboIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, message), expected=True) continue - hash_code = security_hash[:2] - received_time = security_hash[2:12] - received_random = security_hash[12:22] - received_md5 = security_hash[22:] + assert security_hash[:2] in ('04', '14') + received_time = security_hash[3:13] + received_md5 = security_hash[24:] sign_time = compat_str(int(received_time) + 86400) padding = '%010d' % random.randint(1, 10000000000) - md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode() + md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5 + signed_hash = security_hash[:23] + sign_time + padding + signed_md5 - signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') + signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', From 1907f06e7b0689840b75810e5ad2683581f83924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Oct 2019 00:11:41 +0700 Subject: [PATCH 0852/1201] [kaltura] Fix embed info strip (refs #22658) --- youtube_dl/extractor/kaltura.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 0a733424c..1c486c038 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -151,7 +151,8 @@ class KalturaIE(InfoExtractor): if mobj: embed_info = mobj.groupdict() for k, v in embed_info.items(): - embed_info[k] = v.strip() + if v: + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( From 07b50f616e407c8b7b2c183298acbb58e2ddf09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Oct 2019 00:24:03 +0700 Subject: [PATCH 0853/1201] [kaltura] Fix service URL extraction (closes #22658) --- youtube_dl/extractor/kaltura.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 1c486c038..2d38b758b 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -155,11 +155,11 @@ class KalturaIE(InfoExtractor): embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) - service_url = re.search( - r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), + service_mobj = re.search( + r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), webpage) - if service_url: - url = smuggle_url(url, {'service_url': service_url.group(1)}) + if service_mobj: + url = smuggle_url(url, {'service_url': service_mobj.group('id')}) return url def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): From 2765c47a8c4e7154fa0a9be0bb63f3bcba592b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 10 Oct 2019 03:40:01 +0700 Subject: [PATCH 0854/1201] [promptfile] Remove extractor (closes #6239) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/promptfile.py | 70 ------------------------------ 2 files changed, 71 deletions(-) delete mode 100644 youtube_dl/extractor/promptfile.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d3e433c3..f393683da 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -893,7 +893,6 @@ from .puhutv import ( PuhuTVSerieIE, ) from .presstv import PressTVIE -from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py deleted file mode 100644 index 23ac93d7e..000000000 --- a/youtube_dl/extractor/promptfile.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - urlencode_postdata, -) - - -class PromptFileIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)' - _TEST = { - 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', - 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', - 'info_dict': { - 'id': '86D1CE8462-576CAAE416', - 'ext': 'mp4', - 'title': 'oceans.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - chash = self._search_regex( - r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') - fields = self._hidden_inputs(webpage) - keys = list(fields.keys()) - chash_key = keys[0] if len(keys) == 1 else next( - key for key in keys if key.startswith('cha')) - fields[chash_key] = chash + fields[chash_key] - - webpage = self._download_webpage( - url, video_id, 'Downloading video page', - data=urlencode_postdata(fields), - headers={'Content-type': 'application/x-www-form-urlencoded'}) - - video_url = self._search_regex( - (r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File', - r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), - webpage, 'video url', group='url') - title = self._html_search_regex( - r'<span.+title="([^"]+)">', webpage, 'title') - thumbnail = self._html_search_regex( - r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"', - webpage, 'thumbnail', fatal=False, flags=re.DOTALL) - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - 'ext': determine_ext(title), - }] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } From c317b6163b294f4cdc2d1dff96e1a63da1bae910 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 10 Oct 2019 00:01:37 +0100 Subject: [PATCH 0855/1201] [vessel] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/generic.py | 6 -- youtube_dl/extractor/vessel.py | 157 ----------------------------- 3 files changed, 164 deletions(-) delete mode 100644 youtube_dl/extractor/vessel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f393683da..7a1e0dad6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1282,7 +1282,6 @@ from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE -from .vessel import VesselIE from .vesti import VestiIE from .vevo import ( VevoIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d1725d98b..ec43c5ae4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -77,7 +77,6 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE -from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE @@ -2491,11 +2490,6 @@ class GenericIE(InfoExtractor): if tp_urls: return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') - # Look for Vessel embeds - vessel_urls = VesselIE._extract_urls(webpage) - if vessel_urls: - return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()) - # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py deleted file mode 100644 index 31eee0ba7..000000000 --- a/youtube_dl/extractor/vessel.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_iso8601, - sanitized_Request, -) - - -class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)' - _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' - _LOGIN_URL = 'https://www.vessel.com/api/account/login' - _NETRC_MACHINE = 'vessel' - _TESTS = [{ - 'url': 'https://www.vessel.com/videos/HDN7G5UMs', - 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', - 'info_dict': { - 'id': 'HDN7G5UMs', - 'ext': 'mp4', - 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20150317', - 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', - 'timestamp': int, - }, - }, { - 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', - 'only_matching': True, - }, { - 'url': 'https://www.vessel.com/videos/F01_dsLj1', - 'only_matching': True, - }, { - 'url': 'https://www.vessel.com/videos/RRX-sir-J', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1', - webpage)] - - @staticmethod - def make_json_request(url, data): - payload = json.dumps(data).encode('utf-8') - req = sanitized_Request(url, payload) - req.add_header('Content-Type', 'application/json; charset=utf-8') - return req - - @staticmethod - def find_assets(data, asset_type, asset_id=None): - for asset in data.get('assets', []): - if not asset.get('type') == asset_type: - continue - elif asset_id is not None and not asset.get('id') == asset_id: - continue - else: - yield asset - - def _check_access_rights(self, data): - access_info = data.get('__view', {}) - if not access_info.get('allow_access', True): - err_code = access_info.get('error_code') or '' - if err_code == 'ITEM_PAID_ONLY': - raise ExtractorError( - 'This video requires subscription.', expected=True) - else: - raise ExtractorError( - 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - self.report_login() - data = { - 'client_id': 'web', - 'type': 'password', - 'user_key': username, - 'password': password, - } - login_request = VesselIE.make_json_request(self._LOGIN_URL, data) - self._download_webpage(login_request, None, False, 'Wrong login info') - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) - asset_id = data['model']['data']['id'] - - req = VesselIE.make_json_request( - self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) - data = self._download_json(req, video_id) - video_asset_id = data.get('main_video_asset') - - self._check_access_rights(data) - - try: - video_asset = next( - VesselIE.find_assets(data, 'video', asset_id=video_asset_id)) - except StopIteration: - raise ExtractorError('No video assets found') - - formats = [] - for f in video_asset.get('sources', []): - location = f.get('location') - if not location: - continue - name = f.get('name') - if name == 'hls-index': - formats.extend(self._extract_m3u8_formats( - location, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) - elif name == 'dash-index': - formats.extend(self._extract_mpd_formats( - location, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': name, - 'tbr': f.get('bitrate'), - 'height': f.get('height'), - 'width': f.get('width'), - 'url': location, - }) - self._sort_formats(formats) - - thumbnails = [] - for im_asset in VesselIE.find_assets(data, 'image'): - thumbnails.append({ - 'url': im_asset['location'], - 'width': im_asset.get('width', 0), - 'height': im_asset.get('height', 0), - }) - - return { - 'id': video_id, - 'title': data['title'], - 'formats': formats, - 'thumbnails': thumbnails, - 'description': data.get('short_description'), - 'duration': data.get('duration'), - 'comment_count': data.get('comment_count'), - 'like_count': data.get('like_count'), - 'view_count': data.get('view_count'), - 'timestamp': parse_iso8601(data.get('released_at')), - } From 311ee457314359662c975cd29f2ee58ad068db49 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 14 Oct 2019 18:36:25 +0100 Subject: [PATCH 0856/1201] [nbc] switch to graphql api(closes #18581)(closes #22693)(closes #22701) --- youtube_dl/extractor/nbc.py | 39 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3282f84ee..10680b202 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -10,7 +10,6 @@ from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( smuggle_url, - try_get, update_url_query, int_or_none, ) @@ -85,27 +84,41 @@ class NBCIE(AdobePassIE): permalink, video_id = re.match(self._VALID_URL, url).groups() permalink = 'http' + compat_urllib_parse_unquote(permalink) response = self._download_json( - 'https://api.nbc.com/v3/videos', video_id, query={ - 'filter[permalink]': permalink, - 'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating', - 'fields[shows]': 'shortTitle', - 'include': 'show.shortTitle', + 'https://friendship.nbc.co/v2/graphql', video_id, query={ + 'query': '''{ + page(name: "%s", platform: web, type: VIDEO, userId: "0") { + data { + ... on VideoPageData { + description + episodeNumber + keywords + locked + mpxAccountId + mpxGuid + rating + seasonNumber + secondaryTitle + seriesShortTitle + } + } + } +}''' % permalink, }) - video_data = response['data'][0]['attributes'] + video_data = response['data']['page']['data'] query = { 'mbr': 'true', 'manifest': 'm3u', } - video_id = video_data['guid'] - title = video_data['title'] - if video_data.get('entitlement') == 'auth': + video_id = video_data['mpxGuid'] + title = video_data['secondaryTitle'] + if video_data.get('locked'): resource = self._get_mvpd_resource( 'nbcentertainment', title, video_id, - video_data.get('vChipRating')) + video_data.get('rating')) query['auth'] = self._extract_mvpd_auth( url, video_id, 'nbcentertainment', resource) theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, + 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id), query), {'force_smil_url': True}) return { '_type': 'url_transparent', @@ -117,7 +130,7 @@ class NBCIE(AdobePassIE): 'season_number': int_or_none(video_data.get('seasonNumber')), 'episode_number': int_or_none(video_data.get('episodeNumber')), 'episode': title, - 'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']), + 'series': video_data.get('seriesShortTitle'), 'ie_key': 'ThePlatform', } From a1ee23e98fe2ec80b8726829927fcae1267e76b1 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 14 Oct 2019 18:37:35 +0100 Subject: [PATCH 0857/1201] [vimeo] fix VHX embed extraction --- youtube_dl/extractor/vimeo.py | 97 ++++------------------------------- 1 file changed, 9 insertions(+), 88 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ddf375c6c..5dc38e243 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -23,7 +23,6 @@ from ..utils import ( NO_DEFAULT, OnDemandPagedList, parse_filesize, - qualities, RegexNotFoundError, sanitized_Request, smuggle_url, @@ -211,6 +210,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): video_uploader_url = owner.get('url') return { + 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, 'uploader': owner.get('name'), 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, @@ -730,7 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor): channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None info_dict = { - 'id': video_id, 'formats': formats, 'timestamp': unified_timestamp(timestamp), 'description': video_description, @@ -1061,7 +1060,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): if source_format: info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) - info_dict['id'] = video_id return info_dict @@ -1115,94 +1113,17 @@ class VimeoLikesIE(VimeoChannelIE): return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id) -class VHXEmbedIE(InfoExtractor): +class VHXEmbedIE(VimeoBaseInfoExtractor): IE_NAME = 'vhx:embed' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' - def _call_api(self, video_id, access_token, path='', query=None): - return self._download_json( - 'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={ - 'Authorization': 'Bearer ' + access_token, - }, query=query) - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - credentials = self._parse_json(self._search_regex( - r'(?s)credentials\s*:\s*({.+?}),', webpage, - 'config'), video_id, js_to_json) - access_token = credentials['access_token'] - - query = {} - for k, v in credentials.items(): - if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined': - if k == 'authUserToken': - query['auth_user_token'] = v - else: - query[k] = v - files = self._call_api(video_id, access_token, '/files', query) - - formats = [] - for f in files: - href = try_get(f, lambda x: x['_links']['source']['href']) - if not href: - continue - method = f.get('method') - if method == 'hls': - formats.extend(self._extract_m3u8_formats( - href, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif method == 'dash': - formats.extend(self._extract_mpd_formats( - href, video_id, mpd_id='dash', fatal=False)) - else: - fmt = { - 'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])), - 'format_id': 'http', - 'preference': 1, - 'url': href, - 'vcodec': f.get('codec'), - } - quality = f.get('quality') - if quality: - fmt.update({ - 'format_id': 'http-' + quality, - 'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)), - }) - formats.append(fmt) - self._sort_formats(formats) - - video_data = self._call_api(video_id, access_token) - title = video_data.get('title') or video_data['name'] - - subtitles = {} - for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []: - lang = subtitle.get('srclang') or subtitle.get('label') - for _link in subtitle.get('_links', {}).values(): - href = _link.get('href') - if not href: - continue - subtitles.setdefault(lang, []).append({ - 'url': href, - }) - - q = qualities(['small', 'medium', 'large', 'source']) - thumbnails = [] - for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items(): - thumbnails.append({ - 'id': thumbnail_id, - 'url': thumbnail_url, - 'preference': q(thumbnail_id), - }) - - return { - 'id': video_id, - 'title': title, - 'description': video_data.get('description'), - 'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])), - 'formats': formats, - 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'timestamp': unified_timestamp(video_data.get('created_at')), - 'view_count': int_or_none(video_data.get('plays_count')), - } + config_url = self._parse_json(self._search_regex( + r'window\.OTTData\s*=\s*({.+})', webpage, + 'ott data'), video_id, js_to_json)['config_url'] + config = self._download_json(config_url, video_id) + info = self._parse_config(config, video_id) + self._vimeo_sort_formats(info['formats']) + return info From 7e05df71b7d8c0e1ea9beafff48275ef3c9e27d2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 00:10:22 +0100 Subject: [PATCH 0858/1201] [nexx] handle result list(closes #22666) --- youtube_dl/extractor/nexx.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 82d526c22..f9aad83c4 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -295,13 +295,23 @@ class NexxIE(InfoExtractor): video = None + def find_video(result): + if isinstance(result, dict): + return result + elif isinstance(result, list): + vid = int(video_id) + for v in result: + if try_get(v, lambda x: x['general']['ID'], int) == vid: + return v + return None + response = self._download_json( 'https://arc.nexx.cloud/api/video/%s.json' % video_id, video_id, fatal=False) if response and isinstance(response, dict): result = response.get('result') - if result and isinstance(result, dict): - video = result + if result: + video = find_video(result) # not all videos work via arc, e.g. nexx:741:1269984 if not video: @@ -348,7 +358,7 @@ class NexxIE(InfoExtractor): request_token = hashlib.md5( ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() - video = self._call_api( + result = self._call_api( domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', 'addInteractionOptions': '1', @@ -363,6 +373,7 @@ class NexxIE(InfoExtractor): 'X-Request-CID': cid, 'X-Request-Token': request_token, }) + video = find_video(result) general = video['general'] title = general['title'] From 2af01c0293db53dc80c552df3986d0e088b65b76 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 15:18:51 +0100 Subject: [PATCH 0859/1201] [bokecc] improve player params extraction(closes #22638) --- youtube_dl/extractor/bokecc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py index 86a7f4d7d..6017e8344 100644 --- a/youtube_dl/extractor/bokecc.py +++ b/youtube_dl/extractor/bokecc.py @@ -11,8 +11,8 @@ from ..utils import ExtractorError class BokeCCBaseIE(InfoExtractor): def _extract_bokecc_formats(self, webpage, video_id, format_id=None): player_params_str = self._html_search_regex( - r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)', - webpage, 'player params') + r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)', + webpage, 'player params', group='query') player_params = compat_parse_qs(player_params_str) @@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE): _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _TESTS = [{ - 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', + 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A', 'info_dict': { - 'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30', + 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461', 'ext': 'flv', 'title': 'BokeCC Video', }, From 30eb05cb41d95a73f7baff8da9ec1d6a50b08f50 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 19:54:53 +0100 Subject: [PATCH 0860/1201] [globo] extract subtitles(closes #22713) --- youtube_dl/extractor/globo.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index b9c400a57..9ad1d95fb 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -102,10 +102,18 @@ class GloboIE(InfoExtractor): title = video['title'] formats = [] + subtitles = {} for resource in video['resources']: resource_id = resource.get('_id') resource_url = resource.get('url') - if not resource_id or not resource_url: + resource_type = resource.get('type') + if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'): + continue + + if resource_type == 'subtitle': + subtitles.setdefault(resource.get('language') or 'por', []).append({ + 'url': resource_url, + }) continue security = self._download_json( @@ -165,7 +173,8 @@ class GloboIE(InfoExtractor): 'duration': duration, 'uploader': uploader, 'uploader_id': uploader_id, - 'formats': formats + 'formats': formats, + 'subtitles': subtitles, } From 974311b5aa1a53564a00915b9228af30e2a5b40d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 15 Oct 2019 21:01:59 +0100 Subject: [PATCH 0861/1201] [vimeo] improve album videos id extraction(closes #22599) --- youtube_dl/extractor/vimeo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5dc38e243..9abd59d98 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -938,7 +938,7 @@ class VimeoAlbumIE(VimeoChannelIE): def _fetch_page(self, album_id, authorizaion, hashed_pass, page): api_page = page + 1 query = { - 'fields': 'link', + 'fields': 'link,uri', 'page': api_page, 'per_page': self._PAGE_SIZE, } @@ -953,7 +953,9 @@ class VimeoAlbumIE(VimeoChannelIE): link = video.get('link') if not link: continue - yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link)) + uri = video.get('uri') + video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None + yield self.url_result(link, VimeoIE.ie_key(), video_id) def _real_extract(self, url): album_id = self._match_id(url) From 173190f5e3946173daea0539cf0e749cb14acd12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Oct 2019 03:25:13 +0700 Subject: [PATCH 0862/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/ChangeLog b/ChangeLog index 80681a9ae..8a59398d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,39 @@ +version <unreleased> + +Core +* [extractor/common] Make _is_valid_url more relaxed + +Extractors +* [vimeo] Improve album videos id extraction (#22599) ++ [globo] Extract subtitles (#22713) +* [bokecc] Improve player params extraction (#22638) +* [nexx] Handle result list (#22666) +* [vimeo] Fix VHX embed extraction +* [nbc] Switch to graphql API (#18581, #22693, #22701) +- [vessel] Remove extractor +- [promptfile] Remove extractor (#6239) +* [kaltura] Fix service URL extraction (#22658) +* [kaltura] Fix embed info strip (#22658) +* [globo] Fix format extraction (#20319) +* [redtube] Improve metadata extraction (#22492, #22615) +* [pornhub:uservideos:upload] Fix extraction (#22619) ++ [telequebec:squat] Add support for squat.telequebec.tv (#18503) +- [wimp] Remove extractor (#22088, #22091) ++ [gfycat] Extend URL regular expression (#22225) ++ [chaturbate] Extend URL regular expression (#22309) +* [peertube] Update instances (#22414) ++ [telequebec] Add support for coucou.telequebec.tv (#22482) ++ [xvideos] Extend URL regular expression (#22471) +- [youtube] Remove support for invidious.enkirton.net (#22543) ++ [openload] Add support for oload.monster (#22592) +* [nrktv:seriebase] Fix extraction (#22596) ++ [youtube] Add support for yt.lelux.fi (#22597) +* [orf:tvthek] Make manifest requests non fatal (#22578) +* [teachable] Skip login when already logged in (#22572) +* [viewlift] Improve extraction (#22545) +* [nonktube] Fix extraction (#22544) + + version 2019.09.28 Core From 7815d6b74373feb90d969b5fcde7df11702fa5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 16 Oct 2019 03:26:47 +0700 Subject: [PATCH 0863/1201] release 2019.10.16 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 4 +--- youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 2fea0120e..5cd9f0dc0 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.09.28 + [debug] youtube-dl version 2019.10.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 6116acc79..6cc34796a 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 79d1a7f3c..0b7911e79 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 9bda3d440..a6f417d38 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.09.28 + [debug] youtube-dl version 2019.10.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 581344917..3fe753b62 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.09.28** +- [ ] I've verified that I'm running youtube-dl version **2019.10.16** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 8a59398d9..dc5c32a1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.10.16 Core * [extractor/common] Make _is_valid_url more relaxed diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 35275278b..0cbad28ea 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -694,7 +694,6 @@ - **PornoXO** - **PornTube** - **PressTV** - - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital - **puhutv** - **puhutv:serie** @@ -884,6 +883,7 @@ - **TeleQuebec** - **TeleQuebecEmission** - **TeleQuebecLive** + - **TeleQuebecSquat** - **TeleTask** - **Telewebion** - **TennisTV** @@ -991,7 +991,6 @@ - **VeeHD** - **Veoh** - **verystream** - - **Vessel** - **Vesti**: Вести.Ru - **Vevo** - **VevoPlaylist** @@ -1090,7 +1089,6 @@ - **Weibo** - **WeiboMobile** - **WeiqiTV**: WQTV - - **Wimp** - **Wistia** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c3eafb068..53889b7cb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.09.28' +__version__ = '2019.10.16' From 6d394a66f54216cc2b0b68fadd958eaf455c2778 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 12:03:46 +0100 Subject: [PATCH 0864/1201] [atresplayer] fix extraction(closes #16277)(closes #16716) --- youtube_dl/extractor/atresplayer.py | 213 +++++++++------------------- 1 file changed, 64 insertions(+), 149 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index ae1c09427..b96218f6c 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,202 +1,117 @@ from __future__ import unicode_literals -import time -import hmac -import hashlib import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, - float_or_none, int_or_none, - sanitized_Request, urlencode_postdata, - xpath_text, ) class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { - 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', - 'md5': 'efd56753cda1bb64df52a3074f62e38a', + 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', 'info_dict': { - 'id': 'capitulo-10-especial-solidario-nochebuena', + 'id': '5d4aa2c57ed1a88fc715a615', 'ext': 'mp4', - 'title': 'Especial Solidario de Nochebuena', - 'description': 'md5:e2d52ff12214fa937107d21064075bf1', - 'duration': 5527.6, - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Capítulo 7: Asuntos pendientes', + 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', + 'duration': 3413, + }, + 'params': { + 'format': 'bestvideo', }, 'skip': 'This video is only available for registered users' }, { - 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', - 'md5': '6e52cbb513c405e403dbacb7aacf8747', - 'info_dict': { - 'id': 'capitulo-112-david-bustamante', - 'ext': 'flv', - 'title': 'David Bustamante', - 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', - 'duration': 1439.0, - 'thumbnail': r're:^https?://.*\.jpg$', - }, + 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', + 'only_matching': True, }, { - 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', + 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', 'only_matching': True, }, ] - - _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' - _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)' - _TIMESTAMP_SHIFT = 30000 - - _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json' - _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json' - _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' - _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s' - - _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' - - _ERRORS = { - 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.', - 'DELETED': 'This video has expired and is no longer available for online streaming.', - 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.', - # 'PREMIUM': 'PREMIUM', - } + _API_BASE = 'https://api.atresplayer.com/' def _real_initialize(self): self._login() + def _handle_error(self, e, code): + if isinstance(e.cause, compat_HTTPError) and e.cause.code == code: + error = self._parse_json(e.cause.read(), None) + if error.get('error') == 'required_registered': + self.raise_login_required() + raise ExtractorError(error['error_description'], expected=True) + raise + def _login(self): username, password = self._get_login_info() if username is None: return - login_form = { - 'j_username': username, - 'j_password': password, - } + self._request_webpage( + self._API_BASE + 'login', None, 'Downloading login page') - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - response = self._download_webpage( - request, None, 'Logging in') + try: + target_url = self._download_json( + 'https://account.atresmedia.com/api/login', None, + 'Logging in', headers={ + 'Content-Type': 'application/x-www-form-urlencoded' + }, data=urlencode_postdata({ + 'username': username, + 'password': password, + }))['targetUrl'] + except ExtractorError as e: + self._handle_error(e, 400) - error = self._html_search_regex( - r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>', - response, 'error', default=None) - if error: - raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + self._request_webpage(target_url, None, 'Following Target URL') def _real_extract(self, url): - video_id = self._match_id(url) + display_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) + try: + episode = self._download_json( + self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) + except ExtractorError as e: + self._handle_error(e, 403) - episode_id = self._search_regex( - r'episode="([^"]+)"', webpage, 'episode id') - - request = sanitized_Request( - self._PLAYER_URL_TEMPLATE % episode_id, - headers={'User-Agent': self._USER_AGENT}) - player = self._download_json(request, episode_id, 'Downloading player JSON') - - episode_type = player.get('typeOfEpisode') - error_message = self._ERRORS.get(episode_type) - if error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + title = episode['titulo'] formats = [] - video_url = player.get('urlVideo') - if video_url: - format_info = { - 'url': video_url, - 'format_id': 'http', - } - mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url) - if mobj: - format_info.update({ - 'width': int_or_none(mobj.group('width')), - 'height': int_or_none(mobj.group('height')), - 'tbr': int_or_none(mobj.group('bitrate')), - }) - formats.append(format_info) - - timestamp = int_or_none(self._download_webpage( - self._TIME_API_URL, - video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) - timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) - token = hmac.new( - self._MAGIC.encode('ascii'), - (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5 - ).hexdigest() - - request = sanitized_Request( - self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), - headers={'User-Agent': self._USER_AGENT}) - - fmt_json = self._download_json( - request, video_id, 'Downloading windows video JSON') - - result = fmt_json.get('resultDes') - if result.lower() != 'ok': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, result), expected=True) - - for format_id, video_url in fmt_json['resultObject'].items(): - if format_id == 'token' or not video_url.startswith('http'): + for source in episode.get('sources', []): + src = source.get('src') + if not src: continue - if 'geodeswowsmpra3player' in video_url: - # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] - # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) - # this videos are protected by DRM, the f4m downloader doesn't support them - continue - video_url_hd = video_url.replace('free_es', 'es') - formats.extend(self._extract_f4m_formats( - video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', - fatal=False)) - formats.extend(self._extract_mpd_formats( - video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', - fatal=False)) + src_type = source.get('type') + if src_type == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif src_type == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) self._sort_formats(formats) - path_data = player.get('pathData') - - episode = self._download_xml( - self._EPISODE_URL_TEMPLATE % path_data, video_id, - 'Downloading episode XML') - - duration = float_or_none(xpath_text( - episode, './media/asset/info/technical/contentDuration', 'duration')) - - art = episode.find('./media/asset/info/art') - title = xpath_text(art, './name', 'title') - description = xpath_text(art, './description', 'description') - thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') - - subtitles = {} - subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') - if subtitle_url: - subtitles['es'] = [{ - 'ext': 'srt', - 'url': subtitle_url, - }] + heartbeat = episode.get('heartbeat') or {} + omniture = episode.get('omniture') or {} + get_meta = lambda x: heartbeat.get(x) or omniture.get(x) return { + 'display_id': display_id, 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, + 'description': episode.get('descripcion'), + 'thumbnail': episode.get('imgPoster'), + 'duration': int_or_none(episode.get('duration')), 'formats': formats, - 'subtitles': subtitles, + 'channel': get_meta('channel'), + 'season': get_meta('season'), + 'episode_number': int_or_none(get_meta('episodeNumber')), } From e29e96a9f5bc390789d176d509f592e208aa30d8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 15:06:48 +0100 Subject: [PATCH 0865/1201] [dumpert] fix extraction(closes #22428)(closes #22564) --- youtube_dl/extractor/dumpert.py | 83 +++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index be2e3d378..d9d9afdec 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -1,20 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import compat_b64decode from ..utils import ( + int_or_none, qualities, - sanitized_Request, ) class DumpertIE(InfoExtractor): - _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)' + _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)' _TESTS = [{ - 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', + 'url': 'https://www.dumpert.nl/item/6646981_951bc60f', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', 'info_dict': { 'id': '6646981/951bc60f', @@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', } }, { - 'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/', + 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', + 'only_matching': True, + }, { + 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f', + 'only_matching': True, + }, { + 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - protocol = mobj.group('protocol') - - url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id) - req = sanitized_Request(url) - req.add_header('Cookie', 'nsfw=1; cpc=10') - webpage = self._download_webpage(req, video_id) - - files_base64 = self._search_regex( - r'data-files="([^"]+)"', webpage, 'data files') - - files = self._parse_json( - compat_b64decode(files_base64).decode('utf-8'), - video_id) + video_id = self._match_id(url).replace('_', '/') + item = self._download_json( + 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'), + video_id)['items'][0] + title = item['title'] + media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO') quality = qualities(['flv', 'mobile', 'tablet', '720p']) - - formats = [{ - 'url': video_url, - 'format_id': format_id, - 'quality': quality(format_id), - } for format_id, video_url in files.items() if format_id != 'still'] + formats = [] + for variant in media.get('variants', []): + uri = variant.get('uri') + if not uri: + continue + version = variant.get('version') + formats.append({ + 'url': uri, + 'format_id': version, + 'quality': quality(version), + }) self._sort_formats(formats) - title = self._html_search_meta( - 'title', webpage) or self._og_search_title(webpage) - description = self._html_search_meta( - 'description', webpage) or self._og_search_description(webpage) - thumbnail = files.get('still') or self._og_search_thumbnail(webpage) + thumbnails = [] + stills = item.get('stills') or {} + for t in ('thumb', 'still'): + for s in ('', '-medium', '-large'): + still_id = t + s + still_url = stills.get(still_id) + if not still_url: + continue + thumbnails.append({ + 'id': still_id, + 'url': still_url, + }) + + stats = item.get('stats') or {} return { 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats + 'description': item.get('description'), + 'thumbnails': thumbnails, + 'formats': formats, + 'duration': int_or_none(media.get('duration')), + 'like_count': int_or_none(stats.get('kudos_total')), + 'view_count': int_or_none(stats.get('views_total')), } From 2b115b9460502944d6088cf42810c440495128a3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 15:41:58 +0100 Subject: [PATCH 0866/1201] [servingsys] Remove extractor(closes #22639) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/servingsys.py | 72 ------------------------------ 2 files changed, 73 deletions(-) delete mode 100644 youtube_dl/extractor/servingsys.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7a1e0dad6..53d527440 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -995,7 +995,6 @@ from .scrippsnetworks import ScrippsNetworksWatchIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE -from .servingsys import ServingSysIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py deleted file mode 100644 index c013d678f..000000000 --- a/youtube_dl/extractor/servingsys.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, -) - - -class ServingSysIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)' - - _TEST = { - 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', - 'info_dict': { - 'id': '5349193', - 'title': 'AdAPPter_Hyundai_demo', - }, - 'playlist': [{ - 'md5': 'baed851342df6846eb8677a60a011a0f', - 'info_dict': { - 'id': '29955898', - 'ext': 'flv', - 'title': 'AdAPPter_Hyundai_demo (1)', - 'duration': 74, - 'tbr': 1378, - 'width': 640, - 'height': 400, - }, - }, { - 'md5': '979b4da2655c4bc2d81aeb915a8c5014', - 'info_dict': { - 'id': '29907998', - 'ext': 'flv', - 'title': 'AdAPPter_Hyundai_demo (2)', - 'duration': 34, - 'width': 854, - 'height': 480, - 'tbr': 516, - }, - }], - 'params': { - 'playlistend': 2, - }, - '_skip': 'Blocked in the US [sic]', - } - - def _real_extract(self, url): - pl_id = self._match_id(url) - vast_doc = self._download_xml(url, pl_id) - - title = vast_doc.find('.//AdTitle').text - media = vast_doc.find('.//MediaFile').text - info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') - - doc = self._download_xml(info_url, pl_id, 'Downloading video info') - entries = [{ - '_type': 'video', - 'id': a.attrib['id'], - 'title': '%s (%s)' % (title, a.attrib['assetID']), - 'url': a.attrib['URL'], - 'duration': int_or_none(a.attrib.get('length')), - 'tbr': int_or_none(a.attrib.get('bitrate')), - 'height': int_or_none(a.attrib.get('height')), - 'width': int_or_none(a.attrib.get('width')), - } for a in doc.findall('.//AdditionalAssets/asset')] - - return { - '_type': 'playlist', - 'id': pl_id, - 'title': title, - 'entries': entries, - } From d07866f13efac39bf3f0b331870a15e0f5e98057 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 15:45:45 +0100 Subject: [PATCH 0867/1201] [mit] Remove support for video.mit.edu(closes #22403) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/mit.py | 24 ------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 53d527440..ea47b99f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -644,7 +644,7 @@ from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE from .miomio import MioMioIE -from .mit import TechTVMITIE, MITIE, OCWMITIE +from .mit import TechTVMITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import ( MixcloudIE, diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 1aea78d11..e1506a745 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -65,30 +65,6 @@ class TechTVMITIE(InfoExtractor): } -class MITIE(TechTVMITIE): - IE_NAME = 'video.mit.edu' - _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' - - _TEST = { - 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', - 'md5': '7db01d5ccc1895fc5010e9c9e13648da', - 'info_dict': { - 'id': '21783', - 'ext': 'mp4', - 'title': 'The Government is Profiling You', - 'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - page_title = mobj.group('title') - webpage = self._download_webpage(url, page_title) - embed_url = self._search_regex( - r'<iframe .*?src="(.+?)"', webpage, 'embed url') - return self.url_result(embed_url) - - class OCWMITIE(InfoExtractor): IE_NAME = 'ocw.mit.edu' _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' From bc48773ed4c068adfe67078714814035660e5ca4 Mon Sep 17 00:00:00 2001 From: MobiDotS <msaad615@gmail.com> Date: Wed, 16 Oct 2019 10:13:35 -0500 Subject: [PATCH 0868/1201] [twitch] update VOD URL matching (closes #22395) (#22727) --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 0500e33a6..ca7676fe2 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -248,7 +248,7 @@ class TwitchVodIE(TwitchItemBaseIE): https?:// (?: (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/| - player\.twitch\.tv/\?.*?\bvideo=v + player\.twitch\.tv/\?.*?\bvideo=v? ) (?P<id>\d+) ''' @@ -306,6 +306,9 @@ class TwitchVodIE(TwitchItemBaseIE): }, { 'url': 'https://www.twitch.tv/northernlion/video/291940395', 'only_matching': True, + }, { + 'url': 'https://player.twitch.tv/?video=480452374', + 'only_matching': True, }] def _real_extract(self, url): From 000115759485797be719c71716c1ac35f003ba6c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 16 Oct 2019 23:57:40 +0100 Subject: [PATCH 0869/1201] [atresplayer] Add coding cookie --- youtube_dl/extractor/atresplayer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index b96218f6c..c2cec9845 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re From 86f63633c8e7c62ce245d1352d4d381efb614466 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 13:20:16 +0100 Subject: [PATCH 0870/1201] [audioboom] improve metadata extraction --- youtube_dl/extractor/audioboom.py | 34 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index 393f381c6..c51837b40 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -2,22 +2,25 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + clean_html, + float_or_none, +) class AudioBoomIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', - 'md5': '63a8d73a055c6ed0f1e51921a10a5a76', + 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry', + 'md5': '7b00192e593ff227e6a315486979a42d', 'info_dict': { - 'id': '4279833', + 'id': '7398103', 'ext': 'mp3', - 'title': '3/09/2016 Czaban Hour 3', - 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', - 'duration': 2245.72, - 'uploader': 'SB Nation A.M.', - 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', + 'title': 'Asim Chaudhry', + 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc', + 'duration': 4000.99, + 'uploader': 'Sue Perkins: An hour or so with...', + 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', } }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', @@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor): clip = None clip_store = self._parse_json( - self._search_regex( - r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id, + self._html_search_regex( + r'data-new-clip-store=(["\'])(?P<json>{.+?})\1', webpage, 'clip store', default='{}', group='json'), video_id, fatal=False) if clip_store: @@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor): audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( 'audio', webpage, 'audio url') - title = from_clip('title') or self._og_search_title(webpage) - description = from_clip('description') or self._og_search_description(webpage) + title = from_clip('title') or self._html_search_meta( + ['og:title', 'og:audio:title', 'audio_title'], webpage) + description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage) duration = float_or_none(from_clip('duration') or self._html_search_meta( 'weibo:audio:duration', webpage)) - uploader = from_clip('author') or self._og_search_property( - 'audio:artist', webpage, 'uploader', fatal=False) + uploader = from_clip('author') or self._html_search_meta( + ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader') uploader_url = from_clip('author_url') or self._html_search_meta( 'audioboo:channel', webpage, 'uploader url') From 755541a4c8ac3dd4e8b9abd0c7df95182a1f3fd4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 13:21:44 +0100 Subject: [PATCH 0871/1201] [mangomolo] fix video format extraction and add support for player URLs --- youtube_dl/extractor/generic.py | 8 ++++++-- youtube_dl/extractor/mangomolo.py | 17 +++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ec43c5ae4..5ed952b29 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2962,10 +2962,14 @@ class GenericIE(InfoExtractor): # Look for Mangomolo embeds mobj = re.search( - r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/ + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?// + (?: + admin\.mangomolo\.com/analytics/index\.php/customers/embed| + player\.mangomolo\.com/v1 + )/ (?: video\?.*?\bid=(?P<video_id>\d+)| - index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) + (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) ).+?)\1''', webpage) if mobj is not None: info = { diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 482175a34..acee370e9 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -10,18 +10,21 @@ from ..utils import int_or_none class MangomoloBaseIE(InfoExtractor): + _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)' + def _get_real_id(self, page_id): return page_id def _real_extract(self, url): page_id = self._get_real_id(self._match_id(url)) - webpage = self._download_webpage(url, page_id) + webpage = self._download_webpage( + 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id) hidden_inputs = self._hidden_inputs(webpage) m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' format_url = self._html_search_regex( [ - r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', + r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r'<a[^>]+href="(rtsp://[^"]+)"' ], webpage, 'format url') formats = self._extract_wowza_formats( @@ -39,14 +42,16 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): - IE_NAME = 'mangomolo:video' - _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _TYPE = 'video' + IE_NAME = 'mangomolo:' + _TYPE + _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): - IE_NAME = 'mangomolo:live' - _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _TYPE = 'live' + IE_NAME = 'mangomolo:' + _TYPE + _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True def _get_real_id(self, page_id): From 59296bae7ec6d15b0df37dce34bdd96381c0e743 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 13:26:45 +0100 Subject: [PATCH 0872/1201] [xfileshare] clean extractor - update the list of domains - add support for aa-encoded video data - improve jwplayer format extraction - add support for Clappr sources closes #17032 closes #17906 closes #18237 closes #18239 --- youtube_dl/extractor/xfileshare.py | 192 +++++++++++++---------------- 1 file changed, 86 insertions(+), 106 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index b38c7a7b3..48ef07ed1 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -4,37 +4,64 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_chr from ..utils import ( decode_packed_codes, determine_ext, ExtractorError, int_or_none, - NO_DEFAULT, + js_to_json, urlencode_postdata, ) +# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 +def aa_decode(aa_code): + symbol_table = [ + ('7', '((゚ー゚) + (o^_^o))'), + ('6', '((o^_^o) +(o^_^o))'), + ('5', '((゚ー゚) + (゚Θ゚))'), + ('2', '((o^_^o) - (゚Θ゚))'), + ('4', '(゚ー゚)'), + ('3', '(o^_^o)'), + ('1', '(゚Θ゚)'), + ('0', '(c^_^o)'), + ] + delim = '(゚Д゚)[゚ε゚]+' + ret = '' + for aa_char in aa_code.split(delim): + for val, pat in symbol_table: + aa_char = aa_char.replace(pat, val) + aa_char = aa_char.replace('+ ', '') + m = re.match(r'^\d+', aa_char) + if m: + ret += compat_chr(int(m.group(0), 8)) + else: + m = re.match(r'^u([\da-f]+)', aa_char) + if m: + ret += compat_chr(int(m.group(1), 16)) + return ret + + class XFileShareIE(InfoExtractor): _SITES = ( - (r'daclips\.(?:in|com)', 'DaClips'), - (r'filehoot\.com', 'FileHoot'), - (r'gorillavid\.(?:in|com)', 'GorillaVid'), - (r'movpod\.in', 'MovPod'), - (r'powerwatch\.pw', 'PowerWatch'), - (r'rapidvideo\.ws', 'Rapidvideo.ws'), + (r'clipwatching\.com', 'ClipWatching'), + (r'gounlimited\.to', 'GoUnlimited'), + (r'govid\.me', 'GoVid'), + (r'holavid\.com', 'HolaVid'), + (r'streamty\.com', 'Streamty'), (r'thevideobee\.to', 'TheVideoBee'), - (r'vidto\.(?:me|se)', 'Vidto'), - (r'streamin\.to', 'Streamin.To'), - (r'xvidstage\.com', 'XVIDSTAGE'), - (r'vidabc\.com', 'Vid ABC'), + (r'uqload\.com', 'Uqload'), (r'vidbom\.com', 'VidBom'), (r'vidlo\.us', 'vidlo'), - (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'), - (r'fastvideo\.me', 'FastVideo.me'), + (r'vidlocker\.xyz', 'VidLocker'), + (r'vidshare\.tv', 'VidShare'), + (r'vup\.to', 'VUp'), + (r'xvideosharing\.com', 'XVideoSharing'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) - _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' % '|'.join(site for site in list(zip(*_SITES))[0])) _FILE_NOT_FOUND_REGEXES = ( @@ -43,82 +70,14 @@ class XFileShareIE(InfoExtractor): ) _TESTS = [{ - 'url': 'http://gorillavid.in/06y9juieqpmi', - 'md5': '5ae4a3580620380619678ee4875893ba', + 'url': 'http://xvideosharing.com/fq65f94nd2ve', + 'md5': '4181f63957e8fe90ac836fa58dc3c8a6', 'info_dict': { - 'id': '06y9juieqpmi', + 'id': 'fq65f94nd2ve', 'ext': 'mp4', - 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', + 'title': 'sample', 'thumbnail': r're:http://.*\.jpg', }, - }, { - 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', - 'only_matching': True, - }, { - 'url': 'http://daclips.in/3rso4kdn6f9m', - 'md5': '1ad8fd39bb976eeb66004d3a4895f106', - 'info_dict': { - 'id': '3rso4kdn6f9m', - 'ext': 'mp4', - 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', - 'thumbnail': r're:http://.*\.jpg', - } - }, { - 'url': 'http://movpod.in/0wguyyxi1yca', - 'only_matching': True, - }, { - 'url': 'http://filehoot.com/3ivfabn7573c.html', - 'info_dict': { - 'id': '3ivfabn7573c', - 'ext': 'mp4', - 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', - 'thumbnail': r're:http://.*\.jpg', - }, - 'skip': 'Video removed', - }, { - 'url': 'http://vidto.me/ku5glz52nqe1.html', - 'info_dict': { - 'id': 'ku5glz52nqe1', - 'ext': 'mp4', - 'title': 'test' - } - }, { - 'url': 'http://powerwatch.pw/duecjibvicbu', - 'info_dict': { - 'id': 'duecjibvicbu', - 'ext': 'mp4', - 'title': 'Big Buck Bunny trailer', - }, - }, { - 'url': 'http://xvidstage.com/e0qcnl03co6z', - 'info_dict': { - 'id': 'e0qcnl03co6z', - 'ext': 'mp4', - 'title': 'Chucky Prank 2015.mp4', - }, - }, { - # removed by administrator - 'url': 'http://xvidstage.com/amfy7atlkx25', - 'only_matching': True, - }, { - 'url': 'http://vidabc.com/i8ybqscrphfv', - 'info_dict': { - 'id': 'i8ybqscrphfv', - 'ext': 'mp4', - 'title': 're:Beauty and the Beast 2017', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.rapidvideo.cool/b667kprndr8w', - 'only_matching': True, - }, { - 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html', - 'only_matching': True, - }, { - 'url': 'http://vidto.se/1tx1pf6t12cg.html', - 'only_matching': True, }] @staticmethod @@ -131,10 +90,9 @@ class XFileShareIE(InfoExtractor): webpage)] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + host, video_id = re.match(self._VALID_URL, url).groups() - url = 'http://%s/%s' % (mobj.group('host'), video_id) + url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) webpage = self._download_webpage(url, video_id) if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): @@ -142,7 +100,7 @@ class XFileShareIE(InfoExtractor): fields = self._hidden_inputs(webpage) - if fields['op'] == 'download1': + if fields.get('op') == 'download1': countdown = int_or_none(self._search_regex( r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>', webpage, 'countdown', default=None)) @@ -160,13 +118,37 @@ class XFileShareIE(InfoExtractor): (r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'h4-fine[^>]*>([^<]+)<', - r'>Watch (.+) ', + r'>Watch (.+)[ <]', r'<h2 class="video-page-head">([^<]+)</h2>', - r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to + r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to + r'title\s*:\s*"([^"]+)"'), # govid.me webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or video_id).strip() - def extract_formats(default=NO_DEFAULT): + for regex, func in ( + (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes), + (r'(゚.+)', aa_decode)): + obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None) + if obf_code: + webpage = webpage.replace(obf_code, func(obf_code)) + + formats = [] + + jwplayer_data = self._search_regex( + [ + r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);', + r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);', + ], webpage, + 'jwplayer data', default=None) + if jwplayer_data: + jwplayer_data = self._parse_json( + jwplayer_data.replace(r"\'", "'"), video_id, js_to_json) + if jwplayer_data: + formats = self._parse_jwplayer_data( + jwplayer_data, video_id, False, + m3u8_id='hls', mpd_id='dash')['formats'] + + if not formats: urls = [] for regex in ( r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', @@ -177,6 +159,12 @@ class XFileShareIE(InfoExtractor): video_url = mobj.group('url') if video_url not in urls: urls.append(video_url) + + sources = self._search_regex( + r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None) + if sources: + urls.extend(self._parse_json(sources, video_id)) + formats = [] for video_url in urls: if determine_ext(video_url) == 'm3u8': @@ -189,21 +177,13 @@ class XFileShareIE(InfoExtractor): 'url': video_url, 'format_id': 'sd', }) - if not formats and default is not NO_DEFAULT: - return default - self._sort_formats(formats) - return formats - - formats = extract_formats(default=None) - - if not formats: - webpage = decode_packed_codes(self._search_regex( - r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", - webpage, 'packed code')) - formats = extract_formats() + self._sort_formats(formats) thumbnail = self._search_regex( - r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) + [ + r'<video[^>]+poster="([^"]+)"', + r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],', + ], webpage, 'thumbnail', default=None) return { 'id': video_id, From 34e3885bc9e3aecab104b96eabce03854ac8f7a2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 17 Oct 2019 15:55:44 +0100 Subject: [PATCH 0873/1201] [viewster->contv] remove viewster extractor and add support for contv.com --- youtube_dl/extractor/contv.py | 118 ++++++++++++++++ youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/viewster.py | 217 ----------------------------- 3 files changed, 119 insertions(+), 218 deletions(-) create mode 100644 youtube_dl/extractor/contv.py delete mode 100644 youtube_dl/extractor/viewster.py diff --git a/youtube_dl/extractor/contv.py b/youtube_dl/extractor/contv.py new file mode 100644 index 000000000..84b462d40 --- /dev/null +++ b/youtube_dl/extractor/contv.py @@ -0,0 +1,118 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, +) + + +class CONtvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter', + 'info_dict': { + 'id': 'CEG10022949', + 'ext': 'mp4', + 'title': 'Days Of Thrills & Laughter', + 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb', + 'upload_date': '20180703', + 'timestamp': 1530634789.61, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites', + 'info_dict': { + 'id': 'CLIP-show_fotld_bts', + 'title': 'Fight of the Living Dead: Behind the Scenes Bites', + }, + 'playlist_mincount': 7, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + details = self._download_json( + 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id, + video_id, query={'device': 'web'}) + + if details.get('type') == 'episodic': + seasons = self._download_json( + 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id, + video_id) + entries = [] + for season in seasons: + for episode in season.get('episodes', []): + episode_id = episode.get('id') + if not episode_id: + continue + entries.append(self.url_result( + 'https://www.contv.com/details-movie/' + episode_id, + CONtvIE.ie_key(), episode_id)) + return self.playlist_result(entries, video_id, details.get('title')) + + m_details = details['details'] + title = details['title'] + + formats = [] + + media_hls_url = m_details.get('media_hls_url') + if media_hls_url: + formats.extend(self._extract_m3u8_formats( + media_hls_url, video_id, 'mp4', + m3u8_id='hls', fatal=False)) + + media_mp4_url = m_details.get('media_mp4_url') + if media_mp4_url: + formats.append({ + 'format_id': 'http', + 'url': media_mp4_url, + }) + + self._sort_formats(formats) + + subtitles = {} + captions = m_details.get('captions') or {} + for caption_url in captions.values(): + subtitles.setdefault('en', []).append({ + 'url': caption_url + }) + + thumbnails = [] + for image in m_details.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + }) + + description = None + for p in ('large_', 'medium_', 'small_', ''): + d = m_details.get(p + 'description') + if d: + description = d + break + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': description, + 'timestamp': float_or_none(details.get('metax_added_on'), 1000), + 'subtitles': subtitles, + 'duration': float_or_none(m_details.get('duration'), 1000), + 'view_count': int_or_none(details.get('num_watched')), + 'like_count': int_or_none(details.get('num_fav')), + 'categories': details.get('category'), + 'tags': details.get('tags'), + 'season_number': int_or_none(details.get('season')), + 'episode_number': int_or_none(details.get('episode')), + 'release_year': int_or_none(details.get('pub_year')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ea47b99f6..1db21529f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -231,6 +231,7 @@ from .commonprotocols import ( RtmpIE, ) from .condenast import CondeNastIE +from .contv import CONtvIE from .corus import CorusIE from .cracked import CrackedIE from .crackle import CrackleIE @@ -1322,7 +1323,6 @@ from .viewlift import ( ViewLiftIE, ViewLiftEmbedIE, ) -from .viewster import ViewsterIE from .viidea import ViideaIE from .vimeo import ( VimeoIE, diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py deleted file mode 100644 index 6e318479c..000000000 --- a/youtube_dl/extractor/viewster.py +++ /dev/null @@ -1,217 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urllib_parse_unquote, -) -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - parse_iso8601, - sanitized_Request, - HEADRequest, - url_basename, -) - - -class ViewsterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)' - _TESTS = [{ - # movie, Type=Movie - 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', - 'md5': 'e642d1b27fcf3a4ffa79f194f5adde36', - 'info_dict': { - 'id': '1140-11855-000', - 'ext': 'mp4', - 'title': 'The listening Project', - 'description': 'md5:bac720244afd1a8ea279864e67baa071', - 'timestamp': 1214870400, - 'upload_date': '20080701', - 'duration': 4680, - }, - }, { - # series episode, Type=Episode - 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/', - 'md5': '9243079a8531809efe1b089db102c069', - 'info_dict': { - 'id': '1284-19427-001', - 'ext': 'mp4', - 'title': 'The World and a Wall', - 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3', - 'timestamp': 1428192000, - 'upload_date': '20150405', - 'duration': 1500, - }, - }, { - # serie, Type=Serie - 'url': 'http://www.viewster.com/serie/1303-19426-000/', - 'info_dict': { - 'id': '1303-19426-000', - 'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?', - 'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11', - }, - 'playlist_count': 13, - }, { - # unfinished serie, no Type - 'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/', - 'info_dict': { - 'id': '1284-19427-000', - 'title': 'Baby Steps—Season 2', - 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1', - }, - 'playlist_mincount': 16, - }, { - # geo restricted series - 'url': 'https://www.viewster.com/serie/1280-18794-002/', - 'only_matching': True, - }, { - # geo restricted video - 'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/', - 'only_matching': True, - }] - - _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' - - def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}): - request = sanitized_Request(url) - request.add_header('Accept', self._ACCEPT_HEADER) - request.add_header('Auth-token', self._AUTH_TOKEN) - return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query) - - def _real_extract(self, url): - video_id = self._match_id(url) - # Get 'api_token' cookie - self._request_webpage( - HEADRequest('http://www.viewster.com/'), - video_id, headers=self.geo_verification_headers()) - cookies = self._get_cookies('http://www.viewster.com/') - self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value) - - info = self._download_json( - 'https://public-api.viewster.com/search/%s' % video_id, - video_id, 'Downloading entry JSON') - - entry_id = info.get('Id') or info['id'] - - # unfinished serie has no Type - if info.get('Type') in ('Serie', None): - try: - episodes = self._download_json( - 'https://public-api.viewster.com/series/%s/episodes' % entry_id, - video_id, 'Downloading series JSON') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.raise_geo_restricted() - else: - raise - entries = [ - self.url_result( - 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster') - for episode in episodes] - title = (info.get('Title') or info['Synopsis']['Title']).strip() - description = info.get('Synopsis', {}).get('Detailed') - return self.playlist_result(entries, video_id, title, description) - - formats = [] - for language_set in info.get('LanguageSets', []): - manifest_url = None - m3u8_formats = [] - audio = language_set.get('Audio') or '' - subtitle = language_set.get('Subtitle') or '' - base_format_id = audio - if subtitle: - base_format_id += '-%s' % subtitle - - def concat(suffix, sep='-'): - return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix - - medias = self._download_json( - 'https://public-api.viewster.com/movies/%s/videos' % entry_id, - video_id, fatal=False, query={ - 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'], - 'language': audio, - 'subtitle': subtitle, - }) - if not medias: - continue - for media in medias: - video_url = media.get('Uri') - if not video_url: - continue - ext = determine_ext(video_url) - if ext == 'f4m': - manifest_url = video_url - video_url += '&' if '?' in video_url else '?' - video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' - formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id=concat('hds'))) - elif ext == 'm3u8': - manifest_url = video_url - m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=concat('hls'), - fatal=False) # m3u8 sometimes fail - if m3u8_formats: - formats.extend(m3u8_formats) - else: - qualities_basename = self._search_regex( - r'/([^/]+)\.csmil/', - manifest_url, 'qualities basename', default=None) - if not qualities_basename: - continue - QUALITIES_RE = r'((,\d+k)+,?)' - qualities = self._search_regex( - QUALITIES_RE, qualities_basename, - 'qualities', default=None) - if not qualities: - continue - qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(','))) - qualities.sort() - http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename) - http_url_basename = url_basename(video_url) - if m3u8_formats: - self._sort_formats(m3u8_formats) - m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - if len(qualities) == len(m3u8_formats): - for q, m3u8_format in zip(qualities, m3u8_formats): - f = m3u8_format.copy() - f.update({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(f) - else: - for q in qualities: - formats.append({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'ext': 'mp4', - 'format_id': 'http-%d' % q, - 'tbr': q, - }) - - if not formats and not info.get('VODSettings'): - self.raise_geo_restricted() - - self._sort_formats(formats) - - synopsis = info.get('Synopsis') or {} - # Prefer title outside synopsis since it's less messy - title = (info.get('Title') or synopsis['Title']).strip() - description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short') - duration = int_or_none(info.get('Duration')) - timestamp = parse_iso8601(info.get('ReleaseDate')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } From 824fa51165d92ceee01589bf995ebbf009df328c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 18 Oct 2019 04:03:53 +0700 Subject: [PATCH 0874/1201] [utils] Improve subtitles_filename (closes #22753) --- test/test_utils.py | 6 ++++++ youtube_dl/YoutubeDL.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 8 ++++---- youtube_dl/utils.py | 4 ++-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 659c6ece5..3920542bb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -74,6 +74,7 @@ from youtube_dl.utils import ( str_to_int, strip_jsonp, strip_or_none, + subtitles_filename, timeconvert, unescapeHTML, unified_strdate, @@ -261,6 +262,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_subtitles_filename(self): + self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') + self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') + self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt') + def test_remove_start(self): self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start('A - B', 'A - '), 'B') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c3d1407f9..f5cb46308 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1814,7 +1814,7 @@ class YoutubeDL(object): ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] - sub_filename = subtitles_filename(filename, sub_lang, sub_format) + sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) else: diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 70416c25e..fd3f921a8 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -393,7 +393,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): sub_ext = sub_info['ext'] if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': sub_langs.append(lang) - sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) + sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) else: if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': webm_vtt_warn = True @@ -606,9 +606,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): self._downloader.to_screen( '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) continue - old_file = subtitles_filename(filename, lang, ext) + old_file = subtitles_filename(filename, lang, ext, info.get('ext')) sub_filenames.append(old_file) - new_file = subtitles_filename(filename, lang, new_ext) + new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) if ext in ('dfxp', 'ttml', 'tt'): self._downloader.report_warning( @@ -616,7 +616,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'which results in style information loss') dfxp_file = old_file - srt_file = subtitles_filename(filename, lang, 'srt') + srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext')) with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 798757241..53117ea90 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2906,8 +2906,8 @@ def determine_ext(url, default_ext='unknown_video'): return default_ext -def subtitles_filename(filename, sub_lang, sub_format): - return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format +def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): + return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) def date_from_str(date_str): From 2297c0d7d977921dca865e6c9cbc7ee5282ba8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 19 Oct 2019 23:56:36 +0700 Subject: [PATCH 0875/1201] [facebook] Bypass download rate limits (closes #21018) --- youtube_dl/extractor/facebook.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index a3dcdca3e..a56f85c21 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -405,6 +405,11 @@ class FacebookIE(InfoExtractor): if not formats: raise ExtractorError('Cannot find video formats') + # Downloads with browser's User-Agent are rate limited. Working around + # with non-browser User-Agent. + for f in formats: + f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' + self._sort_formats(formats) video_title = self._html_search_regex( From b4818e3c7a718428d3366c34da8e21e2f416f5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Oct 2019 00:02:22 +0700 Subject: [PATCH 0876/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index dc5c32a1f..045349b05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version <unreleased> + +Core +* [utils] Improve subtitles_filename (#22753) + +Extractors +* [facebook] Bypass download rate limits (#21018) ++ [contv] Add support for contv.com +- [viewster] Remove extractor +* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239) + * Update the list of domains + + Add support for aa-encoded video data + * Improve jwplayer format extraction + + Add support for Clappr sources +* [mangomolo] Fix video format extraction and add support for player URLs +* [audioboom] Improve metadata extraction +* [twitch] Update VOD URL matching (#22395, #22727) +- [mit] Remove support for video.mit.edu (#22403) +- [servingsys] Remove extractor (#22639) +* [dumpert] Fix extraction (#22428, #22564) +* [atresplayer] Fix extraction (#16277, #16716) + + version 2019.10.16 Core From 820215f0e34813089d559fed24a398d9e91810e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Oct 2019 00:09:02 +0700 Subject: [PATCH 0877/1201] release 2019.10.22 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 6 ++---- youtube_dl/version.py | 2 +- 8 files changed, 16 insertions(+), 18 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 5cd9f0dc0..f1afe704c 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.16 + [debug] youtube-dl version 2019.10.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 6cc34796a..a4dc9b005 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 0b7911e79..5bf86adce 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a6f417d38..7aa5534e5 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.16 + [debug] youtube-dl version 2019.10.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 3fe753b62..5d3645e3d 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.16** +- [ ] I've verified that I'm running youtube-dl version **2019.10.22** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 045349b05..64233b03b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.10.22 Core * [utils] Improve subtitles_filename (#22753) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0cbad28ea..a1b0edeeb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -183,6 +183,7 @@ - **ComedyCentralShortname** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **CONtv** - **Corus** - **Coub** - **Cracked** @@ -784,7 +785,6 @@ - **Seeker** - **SenateISVP** - **SendtoNews** - - **ServingSys** - **Servus** - **Sexu** - **SeznamZpravy** @@ -1005,7 +1005,6 @@ - **Viddler** - **Videa** - **video.google:search**: Google Video search - - **video.mit.edu** - **VideoDetective** - **videofy.me** - **videomore** @@ -1023,7 +1022,6 @@ - **vier:videos** - **ViewLift** - **ViewLiftEmbed** - - **Viewster** - **Viidea** - **viki** - **viki:channel** @@ -1097,7 +1095,7 @@ - **WWE** - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me + - **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing - **XHamster** - **XHamsterEmbed** - **XHamsterUser** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 53889b7cb..39b355b9e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.10.16' +__version__ = '2019.10.22' From 0c2d10d225f61ac1fb534d8ed1788250401465b2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Oct 2019 17:49:50 +0100 Subject: [PATCH 0878/1201] [globo] handle alternative hash signing method --- youtube_dl/extractor/globo.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 9ad1d95fb..60d842d3a 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -132,18 +132,24 @@ class GloboIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, message), expected=True) continue - assert security_hash[:2] in ('04', '14') - received_time = security_hash[3:13] - received_md5 = security_hash[24:] - - sign_time = compat_str(int(received_time) + 86400) + hash_code = security_hash[:2] padding = '%010d' % random.randint(1, 10000000000) + if hash_code in ('04', '14'): + received_time = security_hash[3:13] + received_md5 = security_hash[24:] + hash_prefix = security_hash[:23] + elif hash_code in ('02', '12', '03', '13'): + received_time = security_hash[2:12] + received_md5 = security_hash[22:] + padding += '1' + hash_prefix = '05' + security_hash[:22] - md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode() + padded_sign_time = compat_str(int(received_time) + 86400) + padding + md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = security_hash[:23] + sign_time + padding + signed_md5 - + signed_hash = hash_prefix + padded_sign_time + signed_md5 signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') + if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', From 07154c793065bca816793186590d8d6461e07478 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 22 Oct 2019 17:53:47 +0100 Subject: [PATCH 0879/1201] [facebook] extract subtitles(closes #22777) --- youtube_dl/extractor/ceskatelevize.py | 2 ++ youtube_dl/extractor/facebook.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 1ec58f7d8..7cb4efb74 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor): is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'drmOnly=true' in stream_url: + continue if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index a56f85c21..c723726b7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor): if not video_data: raise ExtractorError('Cannot parse data') + subtitles = {} formats = [] for f in video_data: format_id = f['stream_type'] @@ -402,6 +403,9 @@ class FacebookIE(InfoExtractor): if dash_manifest: formats.extend(self._parse_mpd_formats( compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) + subtitles_src = f[0].get('subtitles_src') + if subtitles_src: + subtitles.setdefault('en', []).append({'url': subtitles_src}) if not formats: raise ExtractorError('Cannot find video formats') @@ -447,6 +451,7 @@ class FacebookIE(InfoExtractor): 'timestamp': timestamp, 'thumbnail': thumbnail, 'view_count': view_count, + 'subtitles': subtitles, } return webpage, info_dict From 162bcc68dc73706699b559fffdd8bed3db6643b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 24 Oct 2019 12:53:33 +0100 Subject: [PATCH 0880/1201] [puhutv] improve extraction - fix subtitles extraction - transform HLS URLs to http URLs - improve metadata extraction --- youtube_dl/extractor/puhutv.py | 90 ++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 5465e8ab7..fb704a3c4 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -25,21 +25,21 @@ class PuhuTVIE(InfoExtractor): _TESTS = [{ # film 'url': 'https://puhutv.com/sut-kardesler-izle', - 'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7', + 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { 'id': '5085', 'display_id': 'sut-kardesler', 'ext': 'mp4', 'title': 'Süt Kardeşler', - 'description': 'md5:405fd024df916ca16731114eb18e511a', + 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 4832.44, 'creator': 'Arzu Film', - 'timestamp': 1469778212, - 'upload_date': '20160729', + 'timestamp': 1561062602, + 'upload_date': '20190620', 'release_year': 1976, 'view_count': int, - 'tags': ['Aile', 'Komedi', 'Klasikler'], + 'tags': list, }, }, { # episode, geo restricted, bypassable with --geo-verification-proxy @@ -64,9 +64,10 @@ class PuhuTVIE(InfoExtractor): display_id)['data'] video_id = compat_str(info['id']) - title = info.get('name') or info['title']['name'] + show = info.get('title') or {} + title = info.get('name') or show['name'] if info.get('display_name'): - title = '%s %s' % (title, info.get('display_name')) + title = '%s %s' % (title, info['display_name']) try: videos = self._download_json( @@ -78,17 +79,36 @@ class PuhuTVIE(InfoExtractor): self.raise_geo_restricted() raise + urls = [] formats = [] + + def add_http_from_hls(m3u8_f): + http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4') + if http_url != m3u8_f['url']: + f = m3u8_f.copy() + f.update({ + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + 'url': http_url, + }) + formats.append(f) + for video in videos['data']['videos']: media_url = url_or_none(video.get('url')) - if not media_url: + if not media_url or media_url in urls: continue + urls.append(media_url) + playlist = video.get('is_playlist') - if video.get('stream_type') == 'hls' and playlist is True: - formats.extend(self._extract_m3u8_formats( + if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: + m3u8_formats = self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False) + for m3u8_f in m3u8_formats: + formats.append(m3u8_f) + add_http_from_hls(m3u8_f) continue + quality = int_or_none(video.get('quality')) f = { 'url': media_url, @@ -96,34 +116,29 @@ class PuhuTVIE(InfoExtractor): 'height': quality } video_format = video.get('video_format') - if video_format == 'hls' and playlist is False: + is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False + if is_hls: format_id = 'hls' f['protocol'] = 'm3u8_native' elif video_format == 'mp4': format_id = 'http' - else: continue if quality: format_id += '-%sp' % quality f['format_id'] = format_id formats.append(f) + if is_hls: + add_http_from_hls(f) self._sort_formats(formats) - description = try_get( - info, lambda x: x['title']['description'], - compat_str) or info.get('description') - timestamp = unified_timestamp(info.get('created_at')) creator = try_get( - info, lambda x: x['title']['producer']['name'], compat_str) + show, lambda x: x['producer']['name'], compat_str) - duration = float_or_none( - try_get(info, lambda x: x['content']['duration_in_ms'], int), - scale=1000) - view_count = try_get(info, lambda x: x['content']['watch_count'], int) + content = info.get('content') or {} images = try_get( - info, lambda x: x['content']['images']['wide'], dict) or {} + content, lambda x: x['images']['wide'], dict) or {} thumbnails = [] for image_id, image_url in images.items(): if not isinstance(image_url, compat_str): @@ -137,14 +152,8 @@ class PuhuTVIE(InfoExtractor): }) thumbnails.append(t) - release_year = try_get(info, lambda x: x['title']['released_at'], int) - - season_number = int_or_none(info.get('season_number')) - season_id = str_or_none(info.get('season_id')) - episode_number = int_or_none(info.get('episode_number')) - tags = [] - for genre in try_get(info, lambda x: x['title']['genres'], list) or []: + for genre in show.get('genres') or []: if not isinstance(genre, dict): continue genre_name = genre.get('name') @@ -152,12 +161,11 @@ class PuhuTVIE(InfoExtractor): tags.append(genre_name) subtitles = {} - for subtitle in try_get( - info, lambda x: x['content']['subtitles'], list) or []: + for subtitle in content.get('subtitles') or []: if not isinstance(subtitle, dict): continue lang = subtitle.get('language') - sub_url = url_or_none(subtitle.get('url')) + sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) if not lang or not isinstance(lang, compat_str) or not sub_url: continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ @@ -168,15 +176,15 @@ class PuhuTVIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': title, - 'description': description, - 'season_id': season_id, - 'season_number': season_number, - 'episode_number': episode_number, - 'release_year': release_year, - 'timestamp': timestamp, + 'description': info.get('description') or show.get('description'), + 'season_id': str_or_none(info.get('season_id')), + 'season_number': int_or_none(info.get('season_number')), + 'episode_number': int_or_none(info.get('episode_number')), + 'release_year': int_or_none(show.get('released_at')), + 'timestamp': unified_timestamp(info.get('created_at')), 'creator': creator, - 'view_count': view_count, - 'duration': duration, + 'view_count': int_or_none(content.get('watch_count')), + 'duration': float_or_none(content.get('duration_in_ms'), 1000), 'tags': tags, 'subtitles': subtitles, 'thumbnails': thumbnails, From 416c3ca7f53dab76b9e5ec46a0c0335698252c2d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 25 Oct 2019 19:27:28 +0100 Subject: [PATCH 0881/1201] [odnoklassniki] add support for Schemeless embed extraction --- youtube_dl/extractor/generic.py | 7 ++++--- youtube_dl/extractor/odnoklassniki.py | 9 +++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5ed952b29..f66cae0eb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -118,6 +118,7 @@ from .foxnews import FoxNewsIE from .viqeo import ViqeoIE from .expressen import ExpressenIE from .zype import ZypeIE +from .odnoklassniki import OdnoklassnikiIE class GenericIE(InfoExtractor): @@ -2627,9 +2628,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'VK') # Look for embedded Odnoklassniki player - mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Odnoklassniki') + odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage) + if odnoklassniki_url: + return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) # Look for embedded ivi player mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 114b93c07..7ed9fac55 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, @@ -121,6 +123,13 @@ class OdnoklassnikiIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): start_time = int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) From 3c989818e7dc7706da069312bbdd040165a97517 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 25 Oct 2019 19:35:07 +0100 Subject: [PATCH 0882/1201] [vk] improve extraction - add support for Odnoklassniki embeds - update tests - extract more video from user lists(closes #4470) - fix wall post audio extraction(closes #18332) - improve error detection(closes #22568) --- youtube_dl/extractor/vk.py | 329 +++++++++++++++++++------------------ 1 file changed, 173 insertions(+), 156 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 8b6dc0e24..c289fcad3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -12,7 +12,6 @@ from ..utils import ( get_element_by_class, int_or_none, orderedSet, - remove_start, str_or_none, str_to_int, unescapeHTML, @@ -21,6 +20,7 @@ from ..utils import ( urlencode_postdata, ) from .dailymotion import DailymotionIE +from .odnoklassniki import OdnoklassnikiIE from .pladform import PladformIE from .vimeo import VimeoIE from .youtube import YoutubeIE @@ -60,6 +60,18 @@ class VKBaseIE(InfoExtractor): def _real_initialize(self): self._login() + def _download_payload(self, path, video_id, data, fatal=True): + data['al'] = 1 + code, payload = self._download_json( + 'https://vk.com/%s.php' % path, video_id, + data=urlencode_postdata(data), fatal=fatal, + headers={'X-Requested-With': 'XMLHttpRequest'})['payload'] + if code == '3': + self.raise_login_required() + elif code == '8': + raise ExtractorError(clean_html(payload[0][1:-1]), expected=True) + return payload + class VKIE(VKBaseIE): IE_NAME = 'vk' @@ -96,7 +108,6 @@ class VKIE(VKBaseIE): }, { 'url': 'http://vk.com/video205387401_165548505', - 'md5': '6c0aeb2e90396ba97035b9cbde548700', 'info_dict': { 'id': '205387401_165548505', 'ext': 'mp4', @@ -110,18 +121,18 @@ class VKIE(VKBaseIE): }, { 'note': 'Embedded video', - 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', - 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', + 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa', + 'md5': '7babad3b85ea2e91948005b1b8b0cb84', 'info_dict': { - 'id': '32194266_162925554', + 'id': '-77521_162222515', 'ext': 'mp4', - 'uploader': 'Vladimir Gavrin', - 'title': 'Lin Dan', - 'duration': 101, - 'upload_date': '20120730', - 'view_count': int, + 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', + 'title': 'ProtivoGunz - Хуёвая песня', + 'duration': 195, + 'upload_date': '20120212', + 'timestamp': 1329049880, + 'uploader_id': '-77521', }, - 'skip': 'This video has been removed from public access.', }, { # VIDEO NOW REMOVED @@ -138,18 +149,19 @@ class VKIE(VKBaseIE): 'upload_date': '20121218', 'view_count': int, }, - 'skip': 'Requires vk account credentials', + 'skip': 'Removed', }, { 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', - 'md5': '4d7a5ef8cf114dfa09577e57b2993202', 'info_dict': { 'id': '-43215063_168067957', 'ext': 'mp4', - 'uploader': 'Киномания - лучшее из мира кино', + 'uploader': 'Bro Mazter', 'title': ' ', 'duration': 7291, 'upload_date': '20140328', + 'uploader_id': '223413403', + 'timestamp': 1396018030, }, 'skip': 'Requires vk account credentials', }, @@ -165,7 +177,7 @@ class VKIE(VKBaseIE): 'upload_date': '20140626', 'view_count': int, }, - 'skip': 'Only works from Russia', + 'skip': 'Removed', }, { # video (removed?) only available with list id @@ -247,6 +259,9 @@ class VKIE(VKBaseIE): 'uploader_id': '-387766', 'timestamp': 1475137527, }, + 'params': { + 'skip_download': True, + }, }, { # live stream, hls and rtmp links, most likely already finished live @@ -288,80 +303,94 @@ class VKIE(VKBaseIE): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') + mv_data = {} if video_id: - info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id + data = { + 'act': 'show_inline', + 'video': video_id, + } # Some videos (removed?) can only be downloaded with list id specified list_id = mobj.group('list_id') if list_id: - info_url += '&list=%s' % list_id + data['list'] = list_id + + payload = self._download_payload('al_video', video_id, data) + info_page = payload[1] + opts = payload[-1] + mv_data = opts.get('mvData') or {} + player = opts.get('player') or {} else: - info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query') video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) - info_page = self._download_webpage(info_url, video_id) + info_page = self._download_webpage( + 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id) - error_message = self._html_search_regex( - [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', - r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'], - info_page, 'error message', default=None) - if error_message: - raise ExtractorError(error_message, expected=True) + error_message = self._html_search_regex( + [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', + r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'], + info_page, 'error message', default=None) + if error_message: + raise ExtractorError(error_message, expected=True) - if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): - raise ExtractorError( - 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', - expected=True) + if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): + raise ExtractorError( + 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', + expected=True) - ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.' + ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.' - ERRORS = { - r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': - ERROR_COPYRIGHT, + ERRORS = { + r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': + ERROR_COPYRIGHT, - r'>The video .*? was removed from public access by request of the copyright holder.<': - ERROR_COPYRIGHT, + r'>The video .*? was removed from public access by request of the copyright holder.<': + ERROR_COPYRIGHT, - r'<!>Please log in or <': - 'Video %s is only available for registered users, ' - 'use --username and --password options to provide account credentials.', + r'<!>Please log in or <': + 'Video %s is only available for registered users, ' + 'use --username and --password options to provide account credentials.', - r'<!>Unknown error': - 'Video %s does not exist.', + r'<!>Unknown error': + 'Video %s does not exist.', - r'<!>Видео временно недоступно': - 'Video %s is temporarily unavailable.', + r'<!>Видео временно недоступно': + 'Video %s is temporarily unavailable.', - r'<!>Access denied': - 'Access denied to video %s.', + r'<!>Access denied': + 'Access denied to video %s.', - r'<!>Видеозапись недоступна, так как её автор был заблокирован.': - 'Video %s is no longer available, because its author has been blocked.', + r'<!>Видеозапись недоступна, так как её автор был заблокирован.': + 'Video %s is no longer available, because its author has been blocked.', - r'<!>This video is no longer available, because its author has been blocked.': - 'Video %s is no longer available, because its author has been blocked.', + r'<!>This video is no longer available, because its author has been blocked.': + 'Video %s is no longer available, because its author has been blocked.', - r'<!>This video is no longer available, because it has been deleted.': - 'Video %s is no longer available, because it has been deleted.', + r'<!>This video is no longer available, because it has been deleted.': + 'Video %s is no longer available, because it has been deleted.', - r'<!>The video .+? is not available in your region.': - 'Video %s is not available in your region.', - } + r'<!>The video .+? is not available in your region.': + 'Video %s is not available in your region.', + } - for error_re, error_msg in ERRORS.items(): - if re.search(error_re, info_page): - raise ExtractorError(error_msg % video_id, expected=True) + for error_re, error_msg in ERRORS.items(): + if re.search(error_re, info_page): + raise ExtractorError(error_msg % video_id, expected=True) + + player = self._parse_json(self._search_regex( + r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', + info_page, 'player params'), video_id) youtube_url = YoutubeIE._extract_url(info_page) if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + return self.url_result(youtube_url, YoutubeIE.ie_key()) vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: - return self.url_result(vimeo_url) + return self.url_result(vimeo_url, VimeoIE.ie_key()) pladform_url = PladformIE._extract_url(info_page) if pladform_url: - return self.url_result(pladform_url) + return self.url_result(pladform_url, PladformIE.ie_key()) m_rutube = re.search( r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page) @@ -374,6 +403,10 @@ class VKIE(VKBaseIE): if dailymotion_urls: return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key()) + odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page) + if odnoklassniki_url: + return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) @@ -383,38 +416,7 @@ class VKIE(VKBaseIE): opts_url = 'http:' + opts_url return self.url_result(opts_url) - # vars does not look to be served anymore since 24.10.2016 - data = self._parse_json( - self._search_regex( - r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'), - video_id, fatal=False) - - # <!json> is served instead - if not data: - data = self._parse_json( - self._search_regex( - [r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'], - info_page, 'json', default='{}'), - video_id) - if data: - data = data['player']['params'][0] - - if not data: - data = self._parse_json( - self._search_regex( - r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page, - 'player params', default='{}'), - video_id) - if data: - data = data['params'][0] - - # <!--{...} - if not data: - data = self._parse_json( - self._search_regex( - r'<!--\s*({.+})', info_page, 'payload'), - video_id)['payload'][-1][-1]['player']['params'][0] - + data = player['params'][0] title = unescapeHTML(data['md_title']) # 2 = live @@ -463,12 +465,12 @@ class VKIE(VKBaseIE): 'title': title, 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), - 'uploader_id': str_or_none(data.get('author_id')), - 'duration': data.get('duration'), + 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')), + 'duration': int_or_none(data.get('duration') or mv_data.get('duration')), 'timestamp': timestamp, 'view_count': view_count, - 'like_count': int_or_none(data.get('liked')), - 'dislike_count': int_or_none(data.get('nolikes')), + 'like_count': int_or_none(mv_data.get('likes')), + 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, } @@ -482,7 +484,6 @@ class VKUserVideosIE(VKBaseIE): 'url': 'http://vk.com/videos205387401', 'info_dict': { 'id': '205387401', - 'title': "Tom Cruise's Videos", }, 'playlist_mincount': 4, }, { @@ -498,22 +499,25 @@ class VKUserVideosIE(VKBaseIE): 'url': 'http://new.vk.com/videos205387401', 'only_matching': True, }] + _VIDEO = collections.namedtuple( + 'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta']) def _real_extract(self, url): page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + l = self._download_payload('al_video', page_id, { + 'act': 'load_videos_silent', + 'oid': page_id, + })[0]['']['list'] - entries = [ - self.url_result( - 'http://vk.com/video' + video_id, 'VK', video_id=video_id) - for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))] + entries = [] + for video in l: + v = self._VIDEO._make(video) + video_id = '%d_%d' % (v.owner_id, v.id) + entries.append(self.url_result( + 'http://vk.com/video' + video_id, 'VK', video_id=video_id)) - title = unescapeHTML(self._search_regex( - r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos', - webpage, 'title', default=page_id)) - - return self.playlist_result(entries, page_id, title) + return self.playlist_result(entries, page_id) class VKWallPostIE(VKBaseIE): @@ -523,15 +527,15 @@ class VKWallPostIE(VKBaseIE): # public page URL, audio playlist 'url': 'https://vk.com/bs.official?w=wall-23538238_35', 'info_dict': { - 'id': '23538238_35', - 'title': 'Black Shadow - Wall post 23538238_35', + 'id': '-23538238_35', + 'title': 'Black Shadow - Wall post -23538238_35', 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', }, 'playlist': [{ 'md5': '5ba93864ec5b85f7ce19a9af4af080f6', 'info_dict': { 'id': '135220665_111806521', - 'ext': 'mp3', + 'ext': 'mp4', 'title': 'Black Shadow - Слепое Верование', 'duration': 370, 'uploader': 'Black Shadow', @@ -542,18 +546,16 @@ class VKWallPostIE(VKBaseIE): 'md5': '4cc7e804579122b17ea95af7834c9233', 'info_dict': { 'id': '135220665_111802303', - 'ext': 'mp3', + 'ext': 'mp4', 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', 'duration': 423, 'uploader': 'Black Shadow', 'artist': 'Black Shadow', 'track': 'Война - Негасимое Бездны Пламя!', }, - 'params': { - 'skip_download': True, - }, }], 'params': { + 'skip_download': True, 'usenetrc': True, }, 'skip': 'Requires vk account credentials', @@ -562,7 +564,7 @@ class VKWallPostIE(VKBaseIE): 'url': 'https://vk.com/wall85155021_6319', 'info_dict': { 'id': '85155021_6319', - 'title': 'Sergey Gorbunov - Wall post 85155021_6319', + 'title': 'Сергей Горбунов - Wall post 85155021_6319', }, 'playlist_count': 1, 'params': { @@ -578,58 +580,73 @@ class VKWallPostIE(VKBaseIE): 'url': 'https://m.vk.com/wall-23538238_35', 'only_matching': True, }] + _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' + _AUDIO = collections.namedtuple( + 'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key']) + + def _decode(self, enc): + dec = '' + e = n = 0 + for c in enc: + r = self._BASE64_CHARS.index(c) + cond = n % 4 + e = 64 * e + r if cond else r + n += 1 + if cond: + dec += chr(255 & e >> (-2 * n & 6)) + return dec + + def _unmask_url(self, mask_url, vk_id): + if 'audio_api_unavailable' in mask_url: + extra = mask_url.split('?extra=')[1].split('#') + func, base = self._decode(extra[1]).split(chr(11)) + assert (func == 'i') + mask_url = list(self._decode(extra[0])) + url_len = len(mask_url) + indexes = [None] * url_len + index = int(base) ^ vk_id + for n in range(url_len - 1, -1, -1): + index = (url_len * (n + 1) ^ index + n) % url_len + indexes[n] = index + for n in range(1, url_len): + c = mask_url[n] + index = indexes[url_len - 1 - n] + mask_url[n] = mask_url[index] + mask_url[index] = c + mask_url = ''.join(mask_url) + return mask_url def _real_extract(self, url): post_id = self._match_id(url) - wall_url = 'https://vk.com/wall%s' % post_id - - post_id = remove_start(post_id, '-') - - webpage = self._download_webpage(wall_url, post_id) - - error = self._html_search_regex( - r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)', - webpage, 'error', default=None) - if error: - raise ExtractorError('VK said: %s' % error, expected=True) + webpage = self._download_payload('wkview', post_id, { + 'act': 'show', + 'w': 'wall' + post_id, + })[1] description = clean_html(get_element_by_class('wall_post_text', webpage)) uploader = clean_html(get_element_by_class('author', webpage)) - thumbnail = self._og_search_thumbnail(webpage) entries = [] - audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) - if audio_ids: - al_audio = self._download_webpage( - 'https://vk.com/al_audio.php', post_id, - note='Downloading audio info', fatal=False, - data=urlencode_postdata({ - 'act': 'reload_audio', - 'al': '1', - 'ids': ','.join(audio_ids) - })) - if al_audio: - Audio = collections.namedtuple( - 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) - audios = self._parse_json( - self._search_regex( - r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'), - post_id, fatal=False, transform_source=unescapeHTML) - if isinstance(audios, list): - for audio in audios: - a = Audio._make(audio[:6]) - entries.append({ - 'id': '%s_%s' % (a.user_id, a.id), - 'url': a.url, - 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id, - 'thumbnail': thumbnail, - 'duration': a.duration, - 'uploader': uploader, - 'artist': a.artist, - 'track': a.track, - }) + for audio in re.findall(r'data-audio="([^"]+)', webpage): + audio = self._parse_json(unescapeHTML(audio), post_id) + a = self._AUDIO._make(audio) + if not a.url: + continue + title = unescapeHTML(a.title) + entries.append({ + 'id': '%s_%s' % (a.owner_id, a.id), + 'url': self._unmask_url(a.url, a.ads['vk_id']), + 'title': '%s - %s' % (a.performer, title) if a.performer else title, + 'thumbnail': a.cover_url.split(',') if a.cover_url else None, + 'duration': a.duration, + 'uploader': uploader, + 'artist': a.performer, + 'track': title, + 'ext': 'mp4', + 'protocol': 'm3u8', + }) for video in re.finditer( r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): From 42cd0824b3975e6ce500d8cecd60e1fc077a758b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Oct 2019 00:06:05 +0100 Subject: [PATCH 0883/1201] [vk] remove assert statement --- youtube_dl/extractor/vk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index c289fcad3..4c8ca4f41 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -600,7 +600,6 @@ class VKWallPostIE(VKBaseIE): if 'audio_api_unavailable' in mask_url: extra = mask_url.split('?extra=')[1].split('#') func, base = self._decode(extra[1]).split(chr(11)) - assert (func == 'i') mask_url = list(self._decode(extra[0])) url_len = len(mask_url) indexes = [None] * url_len From 235dbb434bfa724718c37d8af0a61baf93b775be Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Oct 2019 14:57:42 +0100 Subject: [PATCH 0884/1201] [discoverynetworks] add support for dplay.co.uk --- youtube_dl/extractor/discoverynetworks.py | 63 +++++++---------------- 1 file changed, 19 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py index fba1ef221..607a54948 100644 --- a/youtube_dl/extractor/discoverynetworks.py +++ b/youtube_dl/extractor/discoverynetworks.py @@ -3,63 +3,38 @@ from __future__ import unicode_literals import re -from .brightcove import BrightcoveLegacyIE from .dplay import DPlayIE -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) -from ..utils import smuggle_url class DiscoveryNetworksDeIE(DPlayIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/ - (?: - .*\#(?P<id>\d+)| - (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)| - programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+) - )''' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' _TESTS = [{ - 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', + 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'info_dict': { - 'id': '3235167922001', + 'id': '78867', 'ext': 'mp4', - 'title': 'Breaking Amish: Die Welt da draußen', - 'description': ( - 'Vier Amische und eine Mennonitin wagen in New York' - ' den Sprung in ein komplett anderes Leben. Begleitet sie auf' - ' ihrem spannenden Weg.'), - 'timestamp': 1396598084, - 'upload_date': '20140404', - 'uploader_id': '1659832546', + 'title': 'Die Welt da draußen', + 'description': 'md5:61033c12b73286e409d99a41742ef608', + 'timestamp': 1554069600, + 'upload_date': '20190331', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, }, }, { - 'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/', + 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', 'only_matching': True, }, { - 'url': 'http://www.discovery.de/#5332316765001', + 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', 'only_matching': True, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - alternate_id = mobj.group('alternate_id') - if alternate_id: - self._initialize_geo_bypass({ - 'countries': ['DE'], - }) - return self._get_disco_api_info( - url, '%s/%s' % (mobj.group('programme'), alternate_id), - 'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de') - brightcove_id = mobj.group('id') - if not brightcove_id: - title = mobj.group('title') - webpage = self._download_webpage(url, title) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(compat_urlparse.urlparse( - brightcove_legacy_url).query)['@videoPlayer'][0] - return self.url_result(smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}), - 'BrightcoveNew', brightcove_id) + domain, programme, alternate_id = re.match(self._VALID_URL, url).groups() + country = 'GB' if domain == 'dplay.co.uk' else 'DE' + realm = 'questuk' if country == 'GB' else domain.replace('.', '') + return self._get_disco_api_info( + url, '%s/%s' % (programme, alternate_id), + 'sonic-eu1-prod.disco-api.com', realm, country) From 0b98f3a7517601b7d2aabc789997016b9c3c24f2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 26 Oct 2019 14:58:29 +0100 Subject: [PATCH 0885/1201] [dplay] improve extraction - add support for dplay.fi, dplay.jp and es.dplay.com(closes #16969) - fix it.dplay.com extraction(closes #22826) - update tests - extract creator, tags and thumbnails - handle playback API call errors --- youtube_dl/extractor/dplay.py | 397 ++++++++++------------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 133 insertions(+), 269 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index ebf59512c..d9c3d59cd 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -1,74 +1,68 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re -import time from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urlparse, -) +from ..compat import compat_HTTPError from ..utils import ( determine_ext, ExtractorError, float_or_none, int_or_none, - remove_end, - try_get, - unified_strdate, unified_timestamp, - update_url_query, - urljoin, - USER_AGENTS, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' + _VALID_URL = r'''(?x)https?:// + (?P<domain> + (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))| + (?P<subdomain_country>es|it)\.dplay\.com + )/[^/]+/(?P<id>[^/]+/[^/?#]+)''' _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL - 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', + 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', 'info_dict': { - 'id': '3172', - 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', + 'id': '13628', + 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', 'ext': 'mp4', 'title': 'Svensken lär sig njuta av livet', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', - 'duration': 2650, - 'timestamp': 1365454320, + 'duration': 2649.856, + 'timestamp': 1365453720, 'upload_date': '20130408', - 'creator': 'Kanal 5 (Home)', + 'creator': 'Kanal 5', 'series': 'Nugammalt - 77 händelser som format Sverige', 'season_number': 1, 'episode_number': 1, - 'age_limit': 0, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, }, }, { # geo restricted, via secure api, unsigned download hls URL - 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', + 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', 'info_dict': { - 'id': '70816', - 'display_id': 'mig-og-min-mor/season-6-episode-12', + 'id': '104465', + 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', 'ext': 'mp4', - 'title': 'Episode 12', - 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', - 'duration': 2563, - 'timestamp': 1429696800, - 'upload_date': '20150422', - 'creator': 'Kanal 4 (Home)', - 'series': 'Mig og min mor', - 'season_number': 6, - 'episode_number': 12, - 'age_limit': 0, + 'title': 'Ted Bundy: Mind Of A Monster', + 'description': 'md5:8b780f6f18de4dae631668b8a9637995', + 'duration': 5290.027, + 'timestamp': 1570694400, + 'upload_date': '20191010', + 'creator': 'ID - Investigation Discovery', + 'series': 'Ted Bundy: Mind Of A Monster', + 'season_number': 1, + 'episode_number': 1, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, }, - }, { - # geo restricted, via direct unsigned hls URL - 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', - 'only_matching': True, }, { # disco-api 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', @@ -89,19 +83,59 @@ class DPlayIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, + 'skip': 'Available for Premium users', }, { - - 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', + 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', + 'md5': '2b808ffb00fc47b884a172ca5d13053c', + 'info_dict': { + 'id': '6918', + 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij', + 'ext': 'mp4', + 'title': 'Luigi Di Maio: la psicosi di Stanislawskij', + 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'upload_date': '20160524', + 'timestamp': 1464076800, + 'series': 'Biografie imbarazzanti', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + }, + }, { + 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/', + 'info_dict': { + 'id': '21652', + 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1', + 'ext': 'mp4', + 'title': 'Episodio 1', + 'description': 'md5:b9dcff2071086e003737485210675f69', + 'thumbnail': r're:^https?://.*\.png', + 'upload_date': '20180709', + 'timestamp': 1531173540, + 'series': 'La fiebre del oro', + 'season_number': 8, + 'episode': 'Episode 1', + 'episode_number': 1, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16', 'only_matching': True, }, { - 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', + 'url': 'https://www.dplay.jp/video/gold-rush/24086', 'only_matching': True, }] - def _get_disco_api_info(self, url, display_id, disco_host, realm): - disco_base = 'https://' + disco_host + def _get_disco_api_info(self, url, display_id, disco_host, realm, country): + geo_countries = [country.upper()] + self._initialize_geo_bypass({ + 'countries': geo_countries, + }) + disco_base = 'https://%s/' % disco_host token = self._download_json( - '%s/token' % disco_base, display_id, 'Downloading token', + disco_base + 'token', display_id, 'Downloading token', query={ 'realm': realm, })['data']['attributes']['token'] @@ -110,17 +144,30 @@ class DPlayIE(InfoExtractor): 'Authorization': 'Bearer ' + token, } video = self._download_json( - '%s/content/videos/%s' % (disco_base, display_id), display_id, + disco_base + 'content/videos/' + display_id, display_id, headers=headers, query={ - 'include': 'show' + 'include': 'images,primaryChannel,show,tags' }) video_id = video['data']['id'] info = video['data']['attributes'] - title = info['name'] + title = info['name'].strip() formats = [] - for format_id, format_dict in self._download_json( - '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), - display_id, headers=headers)['data']['attributes']['streaming'].items(): + try: + streaming = self._download_json( + disco_base + 'playback/videoPlaybackInfo/' + video_id, + display_id, headers=headers)['data']['attributes']['streaming'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + info = self._parse_json(e.cause.read().decode('utf-8'), display_id) + error = info['errors'][0] + error_code = error.get('code') + if error_code == 'access.denied.geoblocked': + self.raise_geo_restricted(countries=geo_countries) + elif error_code == 'access.denied.missingpackage': + self.raise_login_required() + raise ExtractorError(info['errors'][0]['detail'], expected=True) + raise + for format_id, format_dict in streaming.items(): if not isinstance(format_dict, dict): continue format_url = format_dict.get('url') @@ -142,235 +189,55 @@ class DPlayIE(InfoExtractor): }) self._sort_formats(formats) - series = None - try: - included = video.get('included') - if isinstance(included, list): - show = next(e for e in included if e.get('type') == 'show') - series = try_get( - show, lambda x: x['attributes']['name'], compat_str) - except StopIteration: - pass + creator = series = None + tags = [] + thumbnails = [] + included = video.get('included') or [] + if isinstance(included, list): + for e in included: + attributes = e.get('attributes') + if not attributes: + continue + e_type = e.get('type') + if e_type == 'channel': + creator = attributes.get('name') + elif e_type == 'image': + src = attributes.get('src') + if src: + thumbnails.append({ + 'url': src, + 'width': int_or_none(attributes.get('width')), + 'height': int_or_none(attributes.get('height')), + }) + if e_type == 'show': + series = attributes.get('name') + elif e_type == 'tag': + name = attributes.get('name') + if name: + tags.append(name) return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': info.get('description'), - 'duration': float_or_none( - info.get('videoDuration'), scale=1000), + 'duration': float_or_none(info.get('videoDuration'), 1000), 'timestamp': unified_timestamp(info.get('publishStart')), 'series': series, 'season_number': int_or_none(info.get('seasonNumber')), 'episode_number': int_or_none(info.get('episodeNumber')), 'age_limit': int_or_none(info.get('minimum_age')), + 'creator': creator, + 'tags': tags, + 'thumbnails': thumbnails, 'formats': formats, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('id') - domain = mobj.group('domain') - - self._initialize_geo_bypass({ - 'countries': [mobj.group('country').upper()], - }) - - webpage = self._download_webpage(url, display_id) - - video_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) - - if not video_id: - host = mobj.group('host') - return self._get_disco_api_info( - url, display_id, 'disco-api.' + host, host.replace('.', '')) - - info = self._download_json( - 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), - video_id)['data'][0] - - title = info['title'] - - PROTOCOLS = ('hls', 'hds') - formats = [] - - def extract_formats(protocol, manifest_url): - if protocol == 'hls': - m3u8_formats = self._extract_m3u8_formats( - manifest_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False) - # Sometimes final URLs inside m3u8 are unsigned, let's fix this - # ourselves. Also fragments' URLs are only served signed for - # Safari user agent. - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query) - for m3u8_format in m3u8_formats: - m3u8_format.update({ - 'url': update_url_query(m3u8_format['url'], query), - 'http_headers': { - 'User-Agent': USER_AGENTS['Safari'], - }, - }) - formats.extend(m3u8_formats) - elif protocol == 'hds': - formats.extend(self._extract_f4m_formats( - manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', - video_id, f4m_id=protocol, fatal=False)) - - domain_tld = domain.split('.')[-1] - if domain_tld in ('se', 'dk', 'no'): - for protocol in PROTOCOLS: - # Providing dsc-geo allows to bypass geo restriction in some cases - self._set_cookie( - 'secure.dplay.%s' % domain_tld, 'dsc-geo', - json.dumps({ - 'countryCode': domain_tld.upper(), - 'expiry': (time.time() + 20 * 60) * 1000, - })) - stream = self._download_json( - 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s' - % (domain_tld, video_id, protocol), video_id, - 'Downloading %s stream JSON' % protocol, fatal=False) - if stream and stream.get(protocol): - extract_formats(protocol, stream[protocol]) - - # The last resort is to try direct unsigned hls/hds URLs from info dictionary. - # Sometimes this does work even when secure API with dsc-geo has failed (e.g. - # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). - if not formats: - for protocol in PROTOCOLS: - if info.get(protocol): - extract_formats(protocol, info[protocol]) - - self._sort_formats(formats) - - subtitles = {} - for lang in ('se', 'sv', 'da', 'nl', 'no'): - for format_id in ('web_vtt', 'vtt', 'srt'): - subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id)) - if subtitle_url: - subtitles.setdefault(lang, []).append({'url': subtitle_url}) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': info.get('video_metadata_longDescription'), - 'duration': int_or_none(info.get('video_metadata_length'), scale=1000), - 'timestamp': int_or_none(info.get('video_publish_date')), - 'creator': info.get('video_metadata_homeChannel'), - 'series': info.get('video_metadata_show'), - 'season_number': int_or_none(info.get('season')), - 'episode_number': int_or_none(info.get('episode')), - 'age_limit': int_or_none(info.get('minimum_age')), - 'formats': formats, - 'subtitles': subtitles, - } - - -class DPlayItIE(InfoExtractor): - _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' - _GEO_COUNTRIES = ['IT'] - _TEST = { - 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', - 'md5': '2b808ffb00fc47b884a172ca5d13053c', - 'info_dict': { - 'id': '6918', - 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', - 'ext': 'mp4', - 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', - 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', - 'thumbnail': r're:^https?://.*\.jpe?g', - 'upload_date': '20160524', - 'series': 'Biografie imbarazzanti', - 'season_number': 1, - 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', - 'episode_number': 1, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - title = remove_end(self._og_search_title(webpage), ' | Dplay') - - video_id = None - - info = self._search_regex( - r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', - webpage, 'playback JSON', default=None) - if info: - for _ in range(2): - info = self._parse_json(info, display_id, fatal=False) - if not info: - break - else: - video_id = try_get(info, lambda x: x['data']['id']) - - if not info: - info_url = self._search_regex( - (r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', - r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'), - webpage, 'info url', group='url') - - info_url = urljoin(url, info_url) - video_id = info_url.rpartition('/')[-1] - - try: - info = self._download_json( - info_url, display_id, headers={ - 'Authorization': 'Bearer %s' % self._get_cookies(url).get( - 'dplayit_token').value, - 'Referer': url, - }) - if isinstance(info, compat_str): - info = self._parse_json(info, display_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): - info = self._parse_json(e.cause.read().decode('utf-8'), display_id) - error = info['errors'][0] - if error.get('code') == 'access.denied.geoblocked': - self.raise_geo_restricted( - msg=error.get('detail'), countries=self._GEO_COUNTRIES) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - raise - - hls_url = info['data']['attributes']['streaming']['hls']['url'] - - formats = self._extract_m3u8_formats( - hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - self._sort_formats(formats) - - series = self._html_search_regex( - r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>', - webpage, 'series', fatal=False) - episode = self._search_regex( - r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)', - webpage, 'episode', fatal=False) - - mobj = re.search( - r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})', - webpage) - if mobj: - season_number = int(mobj.group('season_number')) - episode_number = int(mobj.group('episode_number')) - upload_date = unified_strdate(mobj.group('upload_date')) - else: - season_number = episode_number = upload_date = None - - return { - 'id': compat_str(video_id or display_id), - 'display_id': display_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'upload_date': upload_date, - 'formats': formats, - } + domain = mobj.group('domain').lstrip('www.') + country = mobj.group('country') or mobj.group('subdomain_country') + host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com' + return self._get_disco_api_info( + url, display_id, host, 'dplay' + country, country) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1db21529f..a8fe0de1a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -277,10 +277,7 @@ from .douyutv import ( DouyuShowIE, DouyuTVIE, ) -from .dplay import ( - DPlayIE, - DPlayItIE, -) +from .dplay import DPlayIE from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE From 548c395716b1d5aa215e526fcb052a03926c1573 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Oct 2019 17:52:46 +0100 Subject: [PATCH 0886/1201] [soundcloud] improve extraction - improve format extraction(closes #22123) - extract uploader_id and uploader_url(closes #21916) - extract all known thumbnails(closes #19071)(closes #20659) - fix extration for private playlists(closes #20976) - add support for playlist embeds(#20976) - skip preview formats(closes #22806) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 6 +- youtube_dl/extractor/soundcloud.py | 497 ++++++++++++++--------------- 3 files changed, 248 insertions(+), 256 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a8fe0de1a..388c1ebe6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1033,6 +1033,7 @@ from .snotr import SnotrIE from .sohu import SohuIE from .sonyliv import SonyLIVIE from .soundcloud import ( + SoundcloudEmbedIE, SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f66cae0eb..1c0780e98 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -80,7 +80,7 @@ from .theplatform import ThePlatformIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE -from .soundcloud import SoundcloudIE +from .soundcloud import SoundcloudEmbedIE from .tunein import TuneInBaseIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE @@ -2749,9 +2749,9 @@ class GenericIE(InfoExtractor): return self.url_result(myvi_url) # Look for embedded soundcloud player - soundcloud_urls = SoundcloudIE._extract_urls(webpage) + soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage) if soundcloud_urls: - return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) + return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML) # Look for tunein player tunein_urls = TuneInBaseIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 05538f3d6..875b9d887 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -11,14 +11,13 @@ from .common import ( from ..compat import ( compat_str, compat_urlparse, - compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, float_or_none, + HEADRequest, int_or_none, KNOWN_EXTENSIONS, - merge_dicts, mimetype2ext, str_or_none, try_get, @@ -28,6 +27,20 @@ from ..utils import ( ) +class SoundcloudEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)' + + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', + webpage)] + + def _real_extract(self, url): + return self.url_result(compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query)['url'][0]) + + class SoundcloudIE(InfoExtractor): """Information extractor for soundcloud.com To access the media, the uid of the song and a stream token @@ -44,9 +57,8 @@ class SoundcloudIE(InfoExtractor): (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) - |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) + |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+) (?:/?\?secret_token=(?P<secret_token>[^&]+))?) - |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' IE_NAME = 'soundcloud' @@ -60,6 +72,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music', + 'uploader_id': '1571244', 'timestamp': 1349920598, 'upload_date': '20121011', 'duration': 143.216, @@ -79,6 +92,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Goldrushed', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', + 'uploader_id': '9615865', 'timestamp': 1337635207, 'upload_date': '20120521', 'duration': 30, @@ -92,6 +106,7 @@ class SoundcloudIE(InfoExtractor): # rtmp 'skip_download': True, }, + 'skip': 'Preview', }, # private link { @@ -103,6 +118,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Youtube - Dl Test Video \'\' Ä↭', 'description': 'test chars: \"\'/\\ä↭', 'uploader': 'jaimeMF', + 'uploader_id': '69767071', 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9.927, @@ -123,6 +139,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Youtube - Dl Test Video \'\' Ä↭', 'description': 'test chars: \"\'/\\ä↭', 'uploader': 'jaimeMF', + 'uploader_id': '69767071', 'timestamp': 1386604920, 'upload_date': '20131209', 'duration': 9.927, @@ -143,6 +160,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Bus Brakes', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'uploader': 'oddsamples', + 'uploader_id': '73680509', 'timestamp': 1389232924, 'upload_date': '20140109', 'duration': 17.346, @@ -163,6 +181,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'uploader': 'Ori Uplift Music', + 'uploader_id': '12563093', 'timestamp': 1504206263, 'upload_date': '20170831', 'duration': 7449.096, @@ -183,6 +202,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'uploader': 'garyvee', + 'uploader_id': '2366352', 'timestamp': 1488152409, 'upload_date': '20170226', 'duration': 207.012, @@ -207,6 +227,7 @@ class SoundcloudIE(InfoExtractor): 'title': 'Mezzo Valzer', 'description': 'md5:4138d582f81866a530317bae316e8b61', 'uploader': 'Giovanni Sarani', + 'uploader_id': '3352531', 'timestamp': 1551394171, 'upload_date': '20190228', 'duration': 180.157, @@ -221,114 +242,81 @@ class SoundcloudIE(InfoExtractor): } ] + _API_BASE = 'https://api.soundcloud.com/' + _API_V2_BASE = 'https://api-v2.soundcloud.com/' + _BASE_URL = 'https://soundcloud.com/' _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' + _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' - @staticmethod - def _extract_urls(webpage): - return [m.group('url') for m in re.finditer( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', - webpage)] + _ARTWORK_MAP = { + 'mini': 16, + 'tiny': 20, + 'small': 32, + 'badge': 47, + 't67x67': 67, + 'large': 100, + 't300x300': 300, + 'crop': 400, + 't500x500': 500, + 'original': 0, + } @classmethod def _resolv_url(cls, url): - return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID + return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID - def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): + def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): track_id = compat_str(info['id']) title = info['title'] - name = full_title or track_id - if quiet: - self.report_extraction(name) - thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') - if isinstance(thumbnail, compat_str): - thumbnail = thumbnail.replace('-large', '-t500x500') - username = try_get(info, lambda x: x['user']['username'], compat_str) - - def extract_count(key): - return int_or_none(info.get('%s_count' % key)) - - like_count = extract_count('favoritings') - if like_count is None: - like_count = extract_count('likes') - - result = { - 'id': track_id, - 'uploader': username, - 'timestamp': unified_timestamp(info.get('created_at')), - 'title': title, - 'description': info.get('description'), - 'thumbnail': thumbnail, - 'duration': float_or_none(info.get('duration'), 1000), - 'webpage_url': info.get('permalink_url'), - 'license': info.get('license'), - 'view_count': extract_count('playback'), - 'like_count': like_count, - 'comment_count': extract_count('comment'), - 'repost_count': extract_count('reposts'), - 'genre': info.get('genre'), - } + track_base_url = self._API_BASE + 'tracks/%s' % track_id format_urls = set() formats = [] query = {'client_id': self._CLIENT_ID} - if secret_token is not None: + if secret_token: query['secret_token'] = secret_token - if info.get('downloadable', False): - # We can build a direct link to the song + + if info.get('downloadable'): format_url = update_url_query( - 'https://api.soundcloud.com/tracks/%s/download' % track_id, query) + info.get('download_url') or track_base_url + '/download', query) format_urls.add(format_url) + if version == 2: + v1_info = self._download_json( + track_base_url, track_id, query=query, fatal=False) or {} + else: + v1_info = info formats.append({ 'format_id': 'download', - 'ext': info.get('original_format', 'mp3'), + 'ext': v1_info.get('original_format') or 'mp3', + 'filesize': int_or_none(v1_info.get('original_content_size')), 'url': format_url, - 'vcodec': 'none', 'preference': 10, }) - # Old API, does not work for some tracks (e.g. - # https://soundcloud.com/giovannisarani/mezzo-valzer) - format_dict = self._download_json( - 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, - track_id, 'Downloading track url', query=query, fatal=False) + def invalid_url(url): + return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url) - if format_dict: - for key, stream_url in format_dict.items(): - if stream_url in format_urls: - continue - format_urls.add(stream_url) - ext, abr = 'mp3', None - mobj = re.search(r'_([^_]+)_(\d+)_url', key) - if mobj: - ext, abr = mobj.groups() - abr = int(abr) - if key.startswith('http'): - stream_formats = [{ - 'format_id': key, - 'ext': ext, - 'url': stream_url, - }] - elif key.startswith('rtmp'): - # The url doesn't have an rtmp app, we have to extract the playpath - url, path = stream_url.split('mp3:', 1) - stream_formats = [{ - 'format_id': key, - 'url': url, - 'play_path': 'mp3:' + path, - 'ext': 'flv', - }] - elif key.startswith('hls'): - stream_formats = self._extract_m3u8_formats( - stream_url, track_id, ext, entry_protocol='m3u8_native', - m3u8_id=key, fatal=False) - else: - continue - - if abr: - for f in stream_formats: - f['abr'] = abr - - formats.extend(stream_formats) + def add_format(f, protocol): + mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url) + if mobj: + for k, v in mobj.groupdict().items(): + if not f.get(k): + f[k] = v + format_id_list = [] + if protocol: + format_id_list.append(protocol) + for k in ('ext', 'abr'): + v = f.get(k) + if v: + format_id_list.append(v) + abr = f.get('abr') + if abr: + f['abr'] = int(abr) + f.update({ + 'format_id': '_'.join(format_id_list), + 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', + }) + formats.append(f) # New API transcodings = try_get( @@ -337,129 +325,165 @@ class SoundcloudIE(InfoExtractor): if not isinstance(t, dict): continue format_url = url_or_none(t.get('url')) - if not format_url: + if not format_url or t.get('snipped') or '/preview/' in format_url: continue stream = self._download_json( - update_url_query(format_url, query), track_id, fatal=False) + format_url, track_id, query=query, fatal=False) if not isinstance(stream, dict): continue stream_url = url_or_none(stream.get('url')) - if not stream_url: - continue - if stream_url in format_urls: + if invalid_url(stream_url): continue format_urls.add(stream_url) - protocol = try_get(t, lambda x: x['format']['protocol'], compat_str) + stream_format = t.get('format') or {} + protocol = stream_format.get('protocol') if protocol != 'hls' and '/hls' in format_url: protocol = 'hls' ext = None preset = str_or_none(t.get('preset')) if preset: ext = preset.split('_')[0] - if ext not in KNOWN_EXTENSIONS: - mimetype = try_get( - t, lambda x: x['format']['mime_type'], compat_str) - ext = mimetype2ext(mimetype) or 'mp3' - format_id_list = [] - if protocol: - format_id_list.append(protocol) - format_id_list.append(ext) - format_id = '_'.join(format_id_list) - formats.append({ + if ext not in KNOWN_EXTENSIONS: + ext = mimetype2ext(stream_format.get('mime_type')) + add_format({ 'url': stream_url, - 'format_id': format_id, 'ext': ext, - 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', - }) + }, 'http' if protocol == 'progressive' else protocol) + + if not formats: + # Old API, does not work for some tracks (e.g. + # https://soundcloud.com/giovannisarani/mezzo-valzer) + # and might serve preview URLs (e.g. + # http://www.soundcloud.com/snbrn/ele) + format_dict = self._download_json( + track_base_url + '/streams', track_id, + 'Downloading track url', query=query, fatal=False) or {} + + for key, stream_url in format_dict.items(): + if invalid_url(stream_url): + continue + format_urls.add(stream_url) + mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key) + if mobj: + protocol, ext, abr = mobj.groups() + add_format({ + 'abr': abr, + 'ext': ext, + 'url': stream_url, + }, protocol) if not formats: # We fallback to the stream_url in the original info, this # cannot be always used, sometimes it can give an HTTP 404 error - formats.append({ - 'format_id': 'fallback', - 'url': update_url_query(info['stream_url'], query), - 'ext': 'mp3', - }) - self._check_formats(formats, track_id) + urlh = self._request_webpage( + HEADRequest(info.get('stream_url') or track_base_url + '/stream'), + track_id, query=query, fatal=False) + if urlh: + stream_url = urlh.geturl() + if not invalid_url(stream_url): + add_format({'url': stream_url}, 'http') for f in formats: f['vcodec'] = 'none' self._sort_formats(formats) - result['formats'] = formats - return result + user = info.get('user') or {} + + thumbnails = [] + artwork_url = info.get('artwork_url') + thumbnail = artwork_url or user.get('avatar_url') + if isinstance(thumbnail, compat_str): + if re.search(self._IMAGE_REPL_RE, thumbnail): + for image_id, size in self._ARTWORK_MAP.items(): + i = { + 'id': image_id, + 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail), + } + if image_id == 'tiny' and not artwork_url: + size = 18 + elif image_id == 'original': + i['preference'] = 10 + if size: + i.update({ + 'width': size, + 'height': size, + }) + thumbnails.append(i) + else: + thumbnails = [{'url': thumbnail}] + + def extract_count(key): + return int_or_none(info.get('%s_count' % key)) + + return { + 'id': track_id, + 'uploader': user.get('username'), + 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'), + 'uploader_url': user.get('permalink_url'), + 'timestamp': unified_timestamp(info.get('created_at')), + 'title': title, + 'description': info.get('description'), + 'thumbnails': thumbnails, + 'duration': float_or_none(info.get('duration'), 1000), + 'webpage_url': info.get('permalink_url'), + 'license': info.get('license'), + 'view_count': extract_count('playback'), + 'like_count': extract_count('favoritings') or extract_count('likes'), + 'comment_count': extract_count('comment'), + 'repost_count': extract_count('reposts'), + 'genre': info.get('genre'), + 'formats': formats + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) - if mobj is None: - raise ExtractorError('Invalid URL: %s' % url) + mobj = re.match(self._VALID_URL, url) track_id = mobj.group('track_id') - new_info = {} - if track_id is not None: - info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID + query = { + 'client_id': self._CLIENT_ID, + } + if track_id: + info_json_url = self._API_V2_BASE + 'tracks/' + track_id full_title = track_id token = mobj.group('secret_token') if token: - info_json_url += '&secret_token=' + token - elif mobj.group('player'): - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - real_url = query['url'][0] - # If the token is in the query of the original url we have to - # manually add it - if 'secret_token' in query: - real_url += '?secret_token=' + query['secret_token'][0] - return self.url_result(real_url) + query['secret_token'] = token else: - # extract uploader (which is in the url) - uploader = mobj.group('uploader') - # extract simple title (uploader + slug of song title) - slug_title = mobj.group('title') + full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title') token = mobj.group('token') - full_title = resolve_title = '%s/%s' % (uploader, slug_title) if token: resolve_title += '/%s' % token + info_json_url = self._resolv_url(self._BASE_URL + resolve_title) - webpage = self._download_webpage(url, full_title, fatal=False) - if webpage: - entries = self._parse_json( - self._search_regex( - r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage, - 'data', default='[]'), full_title, fatal=False) - if entries: - for e in entries: - if not isinstance(e, dict): - continue - if e.get('id') != 67: - continue - data = try_get(e, lambda x: x['data'][0], dict) - if data: - new_info = data - break - info_json_url = self._resolv_url( - 'https://soundcloud.com/%s' % resolve_title) - - # Contains some additional info missing from new_info + version = 2 info = self._download_json( - info_json_url, full_title, 'Downloading info JSON') + info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False) + if not info: + info = self._download_json( + info_json_url.replace(self._API_V2_BASE, self._API_BASE), + full_title, 'Downloading info JSON', query=query) + version = 1 - return self._extract_info_dict( - merge_dicts(info, new_info), full_title, secret_token=token) + return self._extract_info_dict(info, full_title, token, version) class SoundcloudPlaylistBaseIE(SoundcloudIE): - @staticmethod - def _extract_id(e): - return compat_str(e['id']) if e.get('id') else None - - def _extract_track_entries(self, tracks): - return [ - self.url_result( - track['permalink_url'], SoundcloudIE.ie_key(), - video_id=self._extract_id(track)) - for track in tracks if track.get('permalink_url')] + def _extract_track_entries(self, tracks, token=None): + entries = [] + for track in tracks: + track_id = str_or_none(track.get('id')) + url = track.get('permalink_url') + if not url: + if not track_id: + continue + url = self._API_V2_BASE + 'tracks/' + track_id + if token: + url += '?secret_token=' + token + entries.append(self.url_result( + url, SoundcloudIE.ie_key(), track_id)) + return entries class SoundcloudSetIE(SoundcloudPlaylistBaseIE): @@ -480,41 +504,28 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - # extract uploader (which is in the url) - uploader = mobj.group('uploader') - # extract simple title (uploader + slug of song title) - slug_title = mobj.group('slug_title') - full_title = '%s/sets/%s' % (uploader, slug_title) - url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title) - + full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title') token = mobj.group('token') if token: full_title += '/' + token - url += '/' + token - resolv_url = self._resolv_url(url) - info = self._download_json(resolv_url, full_title) + info = self._download_json(self._resolv_url( + self._BASE_URL + full_title), full_title) if 'errors' in info: msgs = (compat_str(err['error_message']) for err in info['errors']) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) - entries = self._extract_track_entries(info['tracks']) + entries = self._extract_track_entries(info['tracks'], token) - return { - '_type': 'playlist', - 'entries': entries, - 'id': '%s' % info['id'], - 'title': info['title'], - } + return self.playlist_result( + entries, str_or_none(info.get('id')), info.get('title')) class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): - _API_V2_BASE = 'https://api-v2.soundcloud.com' - def _extract_playlist(self, base_url, playlist_id, playlist_title): COMMON_QUERY = { - 'limit': 50, + 'limit': 2000000000, 'client_id': self._CLIENT_ID, 'linked_partitioning': '1', } @@ -522,12 +533,13 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): query = COMMON_QUERY.copy() query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse_urlencode(query) + next_href = base_url entries = [] for i in itertools.count(): response = self._download_json( - next_href, playlist_id, 'Downloading track page %s' % (i + 1)) + next_href, playlist_id, + 'Downloading track page %s' % (i + 1), query=query) collection = response['collection'] @@ -546,9 +558,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): continue return self.url_result( permalink_url, - ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=self._extract_id(cand), - video_title=cand.get('title')) + SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + str_or_none(cand.get('id')), cand.get('title')) for e in collection: entry = resolve_entry((e, e.get('track'), e.get('playlist'))) @@ -559,11 +570,10 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): if not next_href: break - parsed_next_href = compat_urlparse.urlparse(response['next_href']) - qs = compat_urlparse.parse_qs(parsed_next_href.query) - qs.update(COMMON_QUERY) - next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + next_href = response['next_href'] + parsed_next_href = compat_urlparse.urlparse(next_href) + query = compat_urlparse.parse_qs(parsed_next_href.query) + query.update(COMMON_QUERY) return { '_type': 'playlist', @@ -609,7 +619,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): 'url': 'https://soundcloud.com/jcv246/sets', 'info_dict': { 'id': '12982173', - 'title': 'Jordi / cv (Playlists)', + 'title': 'Jordi / cv (Sets)', }, 'playlist_mincount': 2, }, { @@ -636,39 +646,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): }] _BASE_URL_MAP = { - 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - } - - _TITLE_MAP = { - 'all': 'All', - 'tracks': 'Tracks', - 'albums': 'Albums', - 'sets': 'Playlists', - 'reposts': 'Reposts', - 'likes': 'Likes', - 'spotlight': 'Spotlight', + 'all': 'stream/users/%s', + 'tracks': 'users/%s/tracks', + 'albums': 'users/%s/albums', + 'sets': 'users/%s/playlists', + 'reposts': 'stream/users/%s/reposts', + 'likes': 'users/%s/likes', + 'spotlight': 'users/%s/spotlight', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') - url = 'https://soundcloud.com/%s/' % uploader - resolv_url = self._resolv_url(url) user = self._download_json( - resolv_url, uploader, 'Downloading user info') + self._resolv_url(self._BASE_URL + uploader), + uploader, 'Downloading user info') resource = mobj.group('rsrc') or 'all' return self._extract_playlist( - self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), - '%s (%s)' % (user['username'], self._TITLE_MAP[resource])) + self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'], + str_or_none(user.get('id')), + '%s (%s)' % (user['username'], resource.capitalize())) class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): @@ -678,7 +678,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', 'info_dict': { 'id': '286017854', - 'title': 'Track station: your-text', + 'title': 'Track station: your text', }, 'playlist_mincount': 47, }] @@ -686,19 +686,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): track_name = self._match_id(url) - webpage = self._download_webpage(url, track_name) - + track = self._download_json(self._resolv_url(url), track_name) track_id = self._search_regex( - r'soundcloud:track-stations:(\d+)', webpage, 'track id') + r'soundcloud:track-stations:(\d+)', track['id'], 'track id') return self._extract_playlist( - '%s/stations/soundcloud:track-stations:%s/tracks' - % (self._API_V2_BASE, track_id), - track_id, 'Track station: %s' % track_name) + self._API_V2_BASE + 'stations/%s/tracks' % track['id'], + track_id, 'Track station: %s' % track['title']) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): - _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' + _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ 'url': 'https://api.soundcloud.com/playlists/4110309', @@ -713,29 +711,22 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) playlist_id = mobj.group('id') - base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) - data_dict = { + query = { 'client_id': self._CLIENT_ID, } token = mobj.group('token') - if token: - data_dict['secret_token'] = token + query['secret_token'] = token - data = compat_urllib_parse_urlencode(data_dict) data = self._download_json( - base_url + data, playlist_id, 'Downloading playlist') + self._API_V2_BASE + 'playlists/' + playlist_id, + playlist_id, 'Downloading playlist', query=query) - entries = self._extract_track_entries(data['tracks']) + entries = self._extract_track_entries(data['tracks'], token) - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': data.get('title'), - 'description': data.get('description'), - 'entries': entries, - } + return self.playlist_result( + entries, playlist_id, data.get('title'), data.get('description')) class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): @@ -753,18 +744,18 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): _SEARCH_KEY = 'scsearch' _MAX_RESULTS_PER_PAGE = 200 _DEFAULT_RESULTS_PER_PAGE = 50 - _API_V2_BASE = 'https://api-v2.soundcloud.com' def _get_collection(self, endpoint, collection_id, **query): limit = min( query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), self._MAX_RESULTS_PER_PAGE) - query['limit'] = limit - query['client_id'] = self._CLIENT_ID - query['linked_partitioning'] = '1' - query['offset'] = 0 - data = compat_urllib_parse_urlencode(query) - next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) + query.update({ + 'limit': limit, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': 1, + 'offset': 0, + }) + next_url = update_url_query(self._API_V2_BASE + endpoint, query) collected_results = 0 @@ -791,5 +782,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): break def _get_n_results(self, query, n): - tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + tracks = self._get_collection('search/tracks', query, limit=n, q=query) return self.playlist_result(tracks, playlist_title=query) From dd90451f0f4867480c5ed8cb3588b30312204e3f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 27 Oct 2019 22:02:46 +0100 Subject: [PATCH 0887/1201] [tenplay] Add new extractor(closes #21446) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tenplay.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/tenplay.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 388c1ebe6..339a141a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1133,6 +1133,7 @@ from .telequebec import ( from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .tennistv import TennisTVIE +from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py new file mode 100644 index 000000000..dff44a4e2 --- /dev/null +++ b/youtube_dl/extractor/tenplay.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_age_limit, + parse_iso8601, + smuggle_url, +) + + +class TenPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/[^/]+/episodes/[^/]+/[^/]+/(?P<id>tpv\d{6}[a-z]{5})' + _TEST = { + 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga', + 'info_dict': { + 'id': '6060533435001', + 'ext': 'mp4', + 'title': 'MasterChef - S1 Ep. 1', + 'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c', + 'age_limit': 10, + 'timestamp': 1240828200, + 'upload_date': '20090427', + 'uploader_id': '2199827728001', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + } + } + BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s' + + def _real_extract(self, url): + content_id = self._match_id(url) + data = self._download_json( + 'https://10play.com.au/api/video/' + content_id, content_id) + video = data.get('video') or {} + metadata = data.get('metaData') or {} + brightcove_id = video.get('videoId') or metadata['showContentVideoId'] + brightcove_url = smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['AU']}) + + return { + '_type': 'url_transparent', + 'url': brightcove_url, + 'id': content_id, + 'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'), + 'description': video.get('description'), + 'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')), + 'series': metadata.get('showName'), + 'season': metadata.get('showContentSeason'), + 'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')), + 'ie_key': 'BrightcoveNew', + } From 71fa0b04f9099090f43f6747632a9bdc3a4b1015 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 13:30:30 +0100 Subject: [PATCH 0888/1201] [makertv] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/makertv.py | 32 ------------------------------ 2 files changed, 33 deletions(-) delete mode 100644 youtube_dl/extractor/makertv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 339a141a5..4229518fd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -604,7 +604,6 @@ from .mailru import ( MailRuMusicIE, MailRuMusicSearchIE, ) -from .makertv import MakerTVIE from .malltv import MallTVIE from .mangomolo import ( MangomoloVideoIE, diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py deleted file mode 100644 index 8eda69cfc..000000000 --- a/youtube_dl/extractor/makertv.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' - _TEST = { - 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', - 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', - 'info_dict': { - 'id': 'Fh3QgymL9gsc', - 'ext': 'mp4', - 'title': 'Maze Runner: The Scorch Trials Official Movie Review', - 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a', - 'upload_date': '20150918', - 'timestamp': 1442549540, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id') - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': 'jwplatform:%s' % jwplatform_id, - 'ie_key': 'JWPlatform', - } From 80c2126e80bc41f7b66d325c4c67c61887c58fb0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 13:32:35 +0100 Subject: [PATCH 0889/1201] [thesun] fix extraction(closes #16966) --- youtube_dl/extractor/thesun.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/thesun.py b/youtube_dl/extractor/thesun.py index 22d003776..15d4a6932 100644 --- a/youtube_dl/extractor/thesun.py +++ b/youtube_dl/extractor/thesun.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import extract_attributes class TheSunIE(InfoExtractor): @@ -16,6 +16,7 @@ class TheSunIE(InfoExtractor): }, 'playlist_count': 2, } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): article_id = self._match_id(url) @@ -23,10 +24,15 @@ class TheSunIE(InfoExtractor): webpage = self._download_webpage(url, article_id) entries = [] - for ooyala_id in re.findall( - r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)', + for video in re.findall( + r'<video[^>]+data-video-id-pending=[^>]+>', webpage): - entries.append(OoyalaIE._build_url_result(ooyala_id)) + attrs = extract_attributes(video) + video_id = attrs['data-video-id-pending'] + account_id = attrs.get('data-account', '5067014667001') + entries.append(self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), + 'BrightcoveNew', video_id)) return self.playlist_result( entries, article_id, self._og_search_title(webpage, fatal=False)) From 0f9d53566a5956854af77173c0e910ed7454aadf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 15:17:06 +0100 Subject: [PATCH 0890/1201] [la7] update Kaltura service URL(closes #22358) --- youtube_dl/extractor/la7.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index 6373268c4..c3b4ffa7e 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -20,7 +20,7 @@ class LA7IE(InfoExtractor): 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'info_dict': { - 'id': 'inccool8-02-10-2015-163722', + 'id': '0_42j6wd36', 'ext': 'mp4', 'title': 'Inc.Cool8', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', @@ -57,7 +57,7 @@ class LA7IE(InfoExtractor): return { '_type': 'url_transparent', 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], { - 'service_url': 'http://kdam.iltrovatore.it', + 'service_url': 'http://nkdam.iltrovatore.it', }), 'id': video_id, 'title': player_data['title'], From 3e252cca0e81aef55b0288f86991bb566878a9fc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 28 Oct 2019 17:39:01 +0100 Subject: [PATCH 0891/1201] [macgamestore] remove extractor Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/macgamestore.py | 42 ---------------------------- 2 files changed, 43 deletions(-) delete mode 100644 youtube_dl/extractor/macgamestore.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4229518fd..1807744be 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -598,7 +598,6 @@ from .lynda import ( LyndaCourseIE ) from .m6 import M6IE -from .macgamestore import MacGameStoreIE from .mailru import ( MailRuIE, MailRuMusicIE, diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py deleted file mode 100644 index 43db9929c..000000000 --- a/youtube_dl/extractor/macgamestore.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class MacGameStoreIE(InfoExtractor): - IE_NAME = 'macgamestore' - IE_DESC = 'MacGameStore trailers' - _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', - 'md5': '8649b8ea684b6666b4c5be736ecddc61', - 'info_dict': { - 'id': '2450', - 'ext': 'm4v', - 'title': 'Crow', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, 'Downloading trailer page') - - if '>Missing Media<' in webpage: - raise ExtractorError( - 'Trailer %s does not exist' % video_id, expected=True) - - video_title = self._html_search_regex( - r'<title>MacGameStore: (.*?) Trailer', webpage, 'title') - - video_url = self._html_search_regex( - r'(?s)', - webpage, 'video URL') - - return { - 'id': video_id, - 'url': video_url, - 'title': video_title - } From 831b732da1d0796a1927af8767d76af780cc90f0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 17:41:17 +0100 Subject: [PATCH 0892/1201] [learnr] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/learnr.py | 33 ------------------------------ 2 files changed, 34 deletions(-) delete mode 100644 youtube_dl/extractor/learnr.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1807744be..9f3a5f8a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -546,7 +546,6 @@ from .lcp import ( LcpPlayIE, LcpIE, ) -from .learnr import LearnrIE from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py deleted file mode 100644 index 1435e090e..000000000 --- a/youtube_dl/extractor/learnr.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class LearnrIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript', - 'md5': '3719fdf0a68397f49899e82c308a89de', - 'info_dict': { - 'id': '51624', - 'ext': 'mp4', - 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript', - 'description': 'md5:b36dbfa92350176cdf12b4d388485503', - 'uploader': 'LearnCode.academy', - 'uploader_id': 'learncodeacademy', - 'upload_date': '20131021', - }, - 'add_ie': ['Youtube'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - return { - '_type': 'url_transparent', - 'url': self._search_regex( - r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'), - 'id': video_id, - } From b3c2fa6dad607da6455a13d232461d4380e4b53c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 17:42:33 +0100 Subject: [PATCH 0893/1201] [tutv] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/tutv.py | 36 ------------------------------ 2 files changed, 37 deletions(-) delete mode 100644 youtube_dl/extractor/tutv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f3a5f8a5..39282b785 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1183,7 +1183,6 @@ from .tunein import ( ) from .tunepk import TunePkIE from .turbo import TurboIE -from .tutv import TutvIE from .tv2 import ( TV2IE, TV2ArticleIE, diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py deleted file mode 100644 index 362318b24..000000000 --- a/youtube_dl/extractor/tutv.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_parse_qs, -) - - -class TutvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' - _TEST = { - 'url': 'http://tu.tv/videos/robots-futbolistas', - 'md5': '0cd9e28ad270488911b0d2a72323395d', - 'info_dict': { - 'id': '2973058', - 'ext': 'mp4', - 'title': 'Robots futbolistas', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') - - data_content = self._download_webpage( - 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') - video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') - - return { - 'id': internal_id, - 'url': video_url, - 'title': self._og_search_title(webpage), - } From 702984eca955f61811078c33337faf9eebeb48c8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 17:49:05 +0100 Subject: [PATCH 0894/1201] [hark] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/hark.py | 33 ------------------------------ 2 files changed, 34 deletions(-) delete mode 100644 youtube_dl/extractor/hark.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 39282b785..114ede8b9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -428,7 +428,6 @@ from .googlesearch import GoogleSearchIE from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE -from .hark import HarkIE from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py deleted file mode 100644 index 342a6130e..000000000 --- a/youtube_dl/extractor/hark.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class HarkIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P.+?)-.+' - _TEST = { - 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', - 'md5': '6783a58491b47b92c7c1af5a77d4cbee', - 'info_dict': { - 'id': 'mmbzyhkgny', - 'ext': 'mp3', - 'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013', - 'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.', - 'duration': 11, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_json( - 'http://www.hark.com/clips/%s.json' % video_id, video_id) - - return { - 'id': video_id, - 'url': data['url'], - 'title': data['name'], - 'description': data.get('description'), - 'thumbnail': data.get('image_original'), - 'duration': data.get('duration'), - } From 895e5c03db310ee97d585360ef8e6ae117e4cbd6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 19:31:20 +0100 Subject: [PATCH 0895/1201] [nbcnews] fix extraction closes #12569 closes #12576 closes #21703 closes #21923 --- youtube_dl/extractor/nbc.py | 86 +++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 10680b202..5bc39d002 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,9 +9,13 @@ from .theplatform import ThePlatformIE from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( - smuggle_url, - update_url_query, int_or_none, + js_to_json, + parse_duration, + smuggle_url, + try_get, + unified_timestamp, + update_url_query, ) @@ -285,13 +289,12 @@ class NBCNewsIE(ThePlatformIE): _TESTS = [ { 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', - 'md5': 'af1adfa51312291a017720403826bb64', + 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf', 'info_dict': { 'id': '269389891880', 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', - 'uploader': 'NBCU-NEWS', 'timestamp': 1401363060, 'upload_date': '20140529', }, @@ -309,28 +312,26 @@ class NBCNewsIE(ThePlatformIE): }, { 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', - 'md5': '73135a2e0ef819107bbb55a5a9b2a802', + 'md5': '8eb831eca25bfa7d25ddd83e85946548', 'info_dict': { 'id': '394064451844', 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', 'timestamp': 1423104900, - 'uploader': 'NBCU-NEWS', 'upload_date': '20150205', }, }, { 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', - 'md5': 'a49e173825e5fcd15c13fc297fced39d', + 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0', 'info_dict': { - 'id': '529953347624', + 'id': 'n431456', 'ext': 'mp4', - 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', - 'description': 'md5:c8be487b2d80ff0594c005add88d8351', + 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'", + 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', 'upload_date': '20150922', 'timestamp': 1442917800, - 'uploader': 'NBCU-NEWS', }, }, { @@ -343,7 +344,6 @@ class NBCNewsIE(ThePlatformIE): 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'upload_date': '20160420', 'timestamp': 1461152093, - 'uploader': 'NBCU-NEWS', }, }, { @@ -357,7 +357,6 @@ class NBCNewsIE(ThePlatformIE): 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1406937606, 'upload_date': '20140802', - 'uploader': 'NBCU-NEWS', }, }, { @@ -373,20 +372,61 @@ class NBCNewsIE(ThePlatformIE): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'window\.__data\s*=\s*({.+});', webpage, - 'bootstrap json'), video_id) - video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id'] + data = self._parse_json(self._search_regex( + r'window\.__data\s*=\s*({.+});', webpage, + 'bootstrap json'), video_id, js_to_json) + video_data = try_get(data, lambda x: x['video']['current'], dict) + if not video_data: + video_data = data['article']['content'][0]['primaryMedia']['video'] + title = video_data['headline']['primary'] + + formats = [] + for va in video_data.get('videoAssets', []): + public_url = va.get('publicUrl') + if not public_url: + continue + if '://link.theplatform.com/' in public_url: + public_url = update_url_query(public_url, {'format': 'redirect'}) + format_id = va.get('format') + if format_id == 'M3U': + formats.extend(self._extract_m3u8_formats( + public_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False)) + continue + tbr = int_or_none(va.get('bitrate'), 1000) + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': public_url, + 'width': int_or_none(va.get('width')), + 'height': int_or_none(va.get('height')), + 'tbr': tbr, + 'ext': 'mp4', + }) + self._sort_formats(formats) + + subtitles = {} + closed_captioning = video_data.get('closedCaptioning') + if closed_captioning: + for cc_url in closed_captioning.values(): + if not cc_url: + continue + subtitles.setdefault('en', []).append({ + 'url': cc_url, + }) return { - '_type': 'url_transparent', 'id': video_id, - # http://feed.theplatform.com/f/2E2eJC/nbcnews also works - 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}), - 'ie_key': 'ThePlatformFeed', + 'title': title, + 'description': try_get(video_data, lambda x: x['description']['primary']), + 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']), + 'duration': parse_duration(video_data.get('duration')), + 'timestamp': unified_timestamp(video_data.get('datePublished')), + 'formats': formats, + 'subtitles': subtitles, } From 83e49259bfd4e0b54a4b53c30742109555087e3a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 19:45:42 +0100 Subject: [PATCH 0896/1201] [internetvideoarchive] fix extraction --- youtube_dl/extractor/internetvideoarchive.py | 92 ++++++-------------- 1 file changed, 28 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 76cc5ec3e..59b0a90c3 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -1,15 +1,13 @@ from __future__ import unicode_literals +import json +import re + from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, ) -from ..utils import ( - determine_ext, - int_or_none, - xpath_text, -) class InternetVideoArchiveIE(InfoExtractor): @@ -20,7 +18,7 @@ class InternetVideoArchiveIE(InfoExtractor): 'info_dict': { 'id': '194487', 'ext': 'mp4', - 'title': 'KICK-ASS 2', + 'title': 'Kick-Ass 2', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', }, 'params': { @@ -33,68 +31,34 @@ class InternetVideoArchiveIE(InfoExtractor): def _build_json_url(query): return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query - @staticmethod - def _build_xml_url(query): - return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query - def _real_extract(self, url): - query = compat_urlparse.urlparse(url).query - query_dic = compat_parse_qs(query) - video_id = query_dic['publishedid'][0] - - if '/player/' in url: - configuration = self._download_json(url, video_id) - - # There are multiple videos in the playlist whlie only the first one - # matches the video played in browsers - video_info = configuration['playlist'][0] - title = video_info['title'] - - formats = [] - for source in video_info['sources']: - file_url = source['file'] - if determine_ext(file_url) == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) - file_url = m3u8_formats[0]['url'] - formats.extend(self._extract_f4m_formats( - file_url.replace('.m3u8', '.f4m'), - video_id, f4m_id='hds', fatal=False)) - formats.extend(self._extract_mpd_formats( - file_url.replace('.m3u8', '.mpd'), - video_id, mpd_id='dash', fatal=False)) - else: - a_format = { - 'url': file_url, - } - - if source.get('label') and source['label'][-4:] == ' kbs': - tbr = int_or_none(source['label'][:-4]) - a_format.update({ - 'tbr': tbr, - 'format_id': 'http-%d' % tbr, - }) - formats.append(a_format) - - self._sort_formats(formats) - - description = video_info.get('description') - thumbnail = video_info.get('image') - else: - configuration = self._download_xml(url, video_id) - formats = [{ - 'url': xpath_text(configuration, './file', 'file URL', fatal=True), - }] - thumbnail = xpath_text(configuration, './image', 'thumbnail') - title = 'InternetVideoArchive video %s' % video_id - description = None + query = compat_parse_qs(compat_urlparse.urlparse(url).query) + video_id = query['publishedid'][0] + data = self._download_json( + 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx', + video_id, data=json.dumps({ + 'customerid': query['customerid'][0], + 'publishedid': video_id, + }).encode()) + title = data['Title'] + formats = self._extract_m3u8_formats( + data['VideoUrl'], video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + file_url = formats[0]['url'] + if '.ism/' in file_url: + replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url) + formats.extend(self._extract_f4m_formats( + replace_url('.f4m'), video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + replace_url('.mpd'), video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_ism_formats( + replace_url('Manifest'), video_id, ism_id='mss', fatal=False)) + self._sort_formats(formats) return { 'id': video_id, 'title': title, 'formats': formats, - 'thumbnail': thumbnail, - 'description': description, + 'thumbnail': data.get('PosterUrl'), + 'description': data.get('Description'), } From 0086726e8674e9edec0682e7a84275c3c25ce646 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 19:48:34 +0100 Subject: [PATCH 0897/1201] [videodetective] fix extraction --- youtube_dl/extractor/videodetective.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index a19411a05..fe70db713 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse from .internetvideoarchive import InternetVideoArchiveIE @@ -13,7 +12,7 @@ class VideoDetectiveIE(InfoExtractor): 'info_dict': { 'id': '194487', 'ext': 'mp4', - 'title': 'KICK-ASS 2', + 'title': 'Kick-Ass 2', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', }, 'params': { @@ -24,7 +23,7 @@ class VideoDetectiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage) - query = compat_urlparse.urlparse(og_video).query - return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key()) + query = 'customerid=69249&publishedid=' + video_id + return self.url_result( + InternetVideoArchiveIE._build_json_url(query), + ie=InternetVideoArchiveIE.ie_key()) From cfabc505984acb3830aeac7759d913bb885d64b6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 22:55:01 +0100 Subject: [PATCH 0898/1201] [mtv] fix extraction for mtv.de (closes #22113) --- youtube_dl/extractor/mtv.py | 51 ++++++++++++++----------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7a3b57abd..7e95ca18e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -425,14 +425,14 @@ class MTVVideoIE(MTVServicesInfoExtractor): class MTVDEIE(MTVServicesInfoExtractor): IE_NAME = 'mtv.de' - _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P\d+)-[^/#?]+/*(?:[#?].*)?$' + _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P[0-9a-z]+)' _TESTS = [{ - 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', + 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum', 'info_dict': { - 'id': 'music_video-a50bc5f0b3aa4b3190aa', - 'ext': 'flv', - 'title': 'MusicVideo_cro-traum', - 'description': 'Cro - Traum', + 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5', + 'ext': 'mp4', + 'title': 'Traum', + 'description': 'Traum', }, 'params': { # rtmp download @@ -441,11 +441,12 @@ class MTVDEIE(MTVServicesInfoExtractor): 'skip': 'Blocked at Travis CI', }, { # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) - 'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', + 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1', 'info_dict': { - 'id': 'local_playlist-f5ae778b9832cc837189', - 'ext': 'flv', - 'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', + 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285', + 'ext': 'mp4', + 'title': 'Teen Mom 2', + 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7', }, 'params': { # rtmp download @@ -453,7 +454,7 @@ class MTVDEIE(MTVServicesInfoExtractor): }, 'skip': 'Blocked at Travis CI', }, { - 'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', + 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3', 'info_dict': { 'id': 'local_playlist-4e760566473c4c8c5344', 'ext': 'mp4', @@ -466,25 +467,11 @@ class MTVDEIE(MTVServicesInfoExtractor): }, 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.', }] + _GEO_COUNTRIES = ['DE'] + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - playlist = self._parse_json( - self._search_regex( - r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'), - video_id) - - def _mrss_url(item): - return item['mrss'] + item.get('mrssvars', '') - - # news pages contain single video in playlist with different id - if len(playlist) == 1: - return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id) - - for item in playlist: - item_id = item.get('id') - if item_id and compat_str(item_id) == video_id: - return self._get_videos_info_from_url(_mrss_url(item), video_id) + def _get_feed_query(self, uri): + return { + 'arcEp': 'mtv.de', + 'mgid': uri, + } From 3cdcebf5470a56df7d52e6f8acbcde5b4b9f0241 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 23:31:14 +0100 Subject: [PATCH 0899/1201] [mtv] add support for mtvjapan.com --- youtube_dl/extractor/mtv.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7e95ca18e..fedd5f46b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -349,33 +350,29 @@ class MTVIE(MTVServicesInfoExtractor): }] -class MTV81IE(InfoExtractor): - IE_NAME = 'mtv81' - _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P[^/?#.]+)' +class MTVJapanIE(MTVServicesInfoExtractor): + IE_NAME = 'mtvjapan' + _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P[0-9a-z]+)' _TEST = { - 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', - 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade', 'info_dict': { - 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5', 'ext': 'mp4', - 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', - 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', - 'timestamp': 1468846800, - 'upload_date': '20160718', + 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition', + }, + 'params': { + 'skip_download': True, }, } + _GEO_COUNTRIES = ['JP'] + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - def _extract_mgid(self, webpage): - return self._search_regex( - r'getTheVideo\((["\'])(?Pmgid:.+?)\1', webpage, - 'mgid', group='id') - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - mgid = self._extract_mgid(webpage) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + def _get_feed_query(self, uri): + return { + 'arcEp': 'mtvjapan.com', + 'mgid': uri, + } class MTVVideoIE(MTVServicesInfoExtractor): From 01358b9fc198cafb619a03ed5ad7865a74805611 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 28 Oct 2019 23:34:31 +0100 Subject: [PATCH 0900/1201] [extractors] add import for MTVJapanIE --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 114ede8b9..c10bcbcc1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -666,7 +666,7 @@ from .mtv import ( MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, - MTV81IE, + MTVJapanIE, ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE From dd90a21c28cb1ec592e5961a5f67556edfb3ce87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 05:49:36 +0700 Subject: [PATCH 0901/1201] [go] Add support for abc.com and freeform.com (closes #22823, closes #22864) --- youtube_dl/extractor/go.py | 44 ++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 03e48f4ea..107059023 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -40,8 +40,8 @@ class GoIE(AdobePassIE): 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pdisneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ - % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pabc|freeform|disneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + % '|'.join(list(_SITE_INFO.keys())) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'info_dict': { @@ -54,6 +54,7 @@ class GoIE(AdobePassIE): # m3u8 download 'skip_download': True, }, + 'skip': 'This content is no longer available.', }, { 'url': 'http://watchdisneyxd.go.com/doraemon', 'info_dict': { @@ -61,6 +62,34 @@ class GoIE(AdobePassIE): 'id': 'SH55574025', }, 'playlist_mincount': 51, + }, { + 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood', + 'info_dict': { + 'id': 'VDKA3609139', + 'ext': 'mp4', + 'title': 'This Guilty Blood', + 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292', + 'age_limit': 14, + }, + 'params': { + 'geo_bypass_ip_block': '3.244.239.0/24', + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet', + 'info_dict': { + 'id': 'VDKA13435179', + 'ext': 'mp4', + 'title': 'The Bet', + 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404', + 'age_limit': 14, + }, + 'params': { + 'geo_bypass_ip_block': '3.244.239.0/24', + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'only_matching': True, @@ -95,10 +124,13 @@ class GoIE(AdobePassIE): if not video_id or not site_info: webpage = self._download_webpage(url, display_id or video_id) video_id = self._search_regex( - # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" - # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood - r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', - default=video_id) + ( + # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" + # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood + r'data-video-id=["\']*(VDKA\w+)', + # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet + r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)' + ), webpage, 'video id', default=video_id) if not site_info: brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', From aef9f87ea4dcfe483c5b776f1c37310766ad818d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 05:52:15 +0700 Subject: [PATCH 0902/1201] [go] Improve and beautify _VALID_URL --- youtube_dl/extractor/go.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 107059023..03cfba91f 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -40,8 +40,17 @@ class GoIE(AdobePassIE): 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pabc|freeform|disneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ - % '|'.join(list(_SITE_INFO.keys())) + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?P%s)\.)?go| + (?Pabc|freeform|disneynow) + )\.com/ + (?: + (?:[^/]+/)*(?P[Vv][Dd][Kk][Aa]\w+)| + (?:[^/]+/)*(?P[^/?\#]+) + ) + ''' % '|'.join(list(_SITE_INFO.keys())) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'info_dict': { From 0d7392e68b7ebb7215651da0784e859d7bdff826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 05:54:32 +0700 Subject: [PATCH 0903/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/ChangeLog b/ChangeLog index 64233b03b..b664368a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,48 @@ +version + +Extractors ++ [go] Add support for abc.com and freeform.com (#22823, #22864) ++ [mtv] Add support for mtvjapan.com +* [mtv] Fix extraction for mtv.de (#22113) +* [videodetective] Fix extraction +* [internetvideoarchive] Fix extraction +* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923) +- [hark] Remove extractor +- [tutv] Remove extractor +- [learnr] Remove extractor +- [macgamestore] Remove extractor +* [la7] Update Kaltura service URL (#22358) +* [thesun] Fix extraction (#16966) +- [makertv] Remove extractor ++ [tenplay] Add support for 10play.com.au (#21446) +* [soundcloud] Improve extraction + * Improve format extraction (#22123) + + Extract uploader_id and uploader_url (#21916) + + Extract all known thumbnails (#19071, #20659) + * Fix extration for private playlists (#20976) + + Add support for playlist embeds (#20976) + * Skip preview formats (#22806) +* [dplay] Improve extraction + + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969) + * Fix it.dplay.com extraction (#22826) + + Extract creator, tags and thumbnails + * Handle playback API call errors ++ [discoverynetworks] Add support for dplay.co.uk +* [vk] Improve extraction + + Add support for Odnoklassniki embeds + + Extract more videos from user lists (#4470) + + Fix wall post audio extraction (#18332) + * Improve error detection (#22568) ++ [odnoklassniki] Add support for embeds +* [puhutv] Improve extraction + * Fix subtitles extraction + * Transform HLS URLs to HTTP URLs + * Improve metadata extraction +* [ceskatelevize] Skip DRM media ++ [facebook] Extract subtitles (#22777) +* [globo] Handle alternative hash signing method + + version 2019.10.22 Core From 53896ca5be9a629c2cbaceb3fe43c707bb217437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 06:10:20 +0700 Subject: [PATCH 0904/1201] [utils] Actualize major IPv4 address blocks per country --- youtube_dl/utils.py | 71 +++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 53117ea90..aed988b88 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4979,7 +4979,7 @@ class ISO3166Utils(object): class GeoUtils(object): # Major IPv4 address blocks per country _country_ip_map = { - 'AD': '85.94.160.0/19', + 'AD': '46.172.224.0/19', 'AE': '94.200.0.0/13', 'AF': '149.54.0.0/17', 'AG': '209.59.64.0/18', @@ -4987,28 +4987,30 @@ class GeoUtils(object): 'AL': '46.99.0.0/16', 'AM': '46.70.0.0/15', 'AO': '105.168.0.0/13', - 'AP': '159.117.192.0/21', + 'AP': '182.50.184.0/21', + 'AQ': '23.154.160.0/24', 'AR': '181.0.0.0/12', 'AS': '202.70.112.0/20', - 'AT': '84.112.0.0/13', + 'AT': '77.116.0.0/14', 'AU': '1.128.0.0/11', 'AW': '181.41.0.0/18', - 'AZ': '5.191.0.0/16', + 'AX': '185.217.4.0/22', + 'AZ': '5.197.0.0/16', 'BA': '31.176.128.0/17', 'BB': '65.48.128.0/17', 'BD': '114.130.0.0/16', 'BE': '57.0.0.0/8', - 'BF': '129.45.128.0/17', + 'BF': '102.178.0.0/15', 'BG': '95.42.0.0/15', 'BH': '37.131.0.0/17', 'BI': '154.117.192.0/18', 'BJ': '137.255.0.0/16', - 'BL': '192.131.134.0/24', + 'BL': '185.212.72.0/23', 'BM': '196.12.64.0/18', 'BN': '156.31.0.0/16', 'BO': '161.56.0.0/16', 'BQ': '161.0.80.0/20', - 'BR': '152.240.0.0/12', + 'BR': '191.128.0.0/12', 'BS': '24.51.64.0/18', 'BT': '119.2.96.0/19', 'BW': '168.167.0.0/16', @@ -5016,20 +5018,20 @@ class GeoUtils(object): 'BZ': '179.42.192.0/18', 'CA': '99.224.0.0/11', 'CD': '41.243.0.0/16', - 'CF': '196.32.200.0/21', - 'CG': '197.214.128.0/17', + 'CF': '197.242.176.0/21', + 'CG': '160.113.0.0/16', 'CH': '85.0.0.0/13', - 'CI': '154.232.0.0/14', + 'CI': '102.136.0.0/14', 'CK': '202.65.32.0/19', 'CL': '152.172.0.0/14', - 'CM': '165.210.0.0/15', + 'CM': '102.244.0.0/14', 'CN': '36.128.0.0/10', 'CO': '181.240.0.0/12', 'CR': '201.192.0.0/12', 'CU': '152.206.0.0/15', 'CV': '165.90.96.0/19', 'CW': '190.88.128.0/17', - 'CY': '46.198.0.0/15', + 'CY': '31.153.0.0/16', 'CZ': '88.100.0.0/14', 'DE': '53.0.0.0/8', 'DJ': '197.241.0.0/17', @@ -5046,6 +5048,7 @@ class GeoUtils(object): 'EU': '2.16.0.0/13', 'FI': '91.152.0.0/13', 'FJ': '144.120.0.0/16', + 'FK': '80.73.208.0/21', 'FM': '119.252.112.0/20', 'FO': '88.85.32.0/19', 'FR': '90.0.0.0/9', @@ -5055,8 +5058,8 @@ class GeoUtils(object): 'GE': '31.146.0.0/16', 'GF': '161.22.64.0/18', 'GG': '62.68.160.0/19', - 'GH': '45.208.0.0/14', - 'GI': '85.115.128.0/19', + 'GH': '154.160.0.0/12', + 'GI': '95.164.0.0/16', 'GL': '88.83.0.0/19', 'GM': '160.182.0.0/15', 'GN': '197.149.192.0/18', @@ -5085,13 +5088,13 @@ class GeoUtils(object): 'JE': '87.244.64.0/18', 'JM': '72.27.0.0/17', 'JO': '176.29.0.0/16', - 'JP': '126.0.0.0/8', + 'JP': '133.0.0.0/8', 'KE': '105.48.0.0/12', 'KG': '158.181.128.0/17', 'KH': '36.37.128.0/17', 'KI': '103.25.140.0/22', 'KM': '197.255.224.0/20', - 'KN': '198.32.32.0/19', + 'KN': '198.167.192.0/19', 'KP': '175.45.176.0/22', 'KR': '175.192.0.0/10', 'KW': '37.36.0.0/14', @@ -5099,10 +5102,10 @@ class GeoUtils(object): 'KZ': '2.72.0.0/13', 'LA': '115.84.64.0/18', 'LB': '178.135.0.0/16', - 'LC': '192.147.231.0/24', + 'LC': '24.92.144.0/20', 'LI': '82.117.0.0/19', 'LK': '112.134.0.0/15', - 'LR': '41.86.0.0/19', + 'LR': '102.183.0.0/16', 'LS': '129.232.0.0/17', 'LT': '78.56.0.0/13', 'LU': '188.42.0.0/16', @@ -5127,7 +5130,7 @@ class GeoUtils(object): 'MT': '46.11.0.0/16', 'MU': '105.16.0.0/12', 'MV': '27.114.128.0/18', - 'MW': '105.234.0.0/16', + 'MW': '102.70.0.0/15', 'MX': '187.192.0.0/11', 'MY': '175.136.0.0/13', 'MZ': '197.218.0.0/15', @@ -5158,23 +5161,23 @@ class GeoUtils(object): 'PW': '202.124.224.0/20', 'PY': '181.120.0.0/14', 'QA': '37.210.0.0/15', - 'RE': '139.26.0.0/16', + 'RE': '102.35.0.0/16', 'RO': '79.112.0.0/13', - 'RS': '178.220.0.0/14', + 'RS': '93.86.0.0/15', 'RU': '5.136.0.0/13', - 'RW': '105.178.0.0/15', + 'RW': '41.186.0.0/16', 'SA': '188.48.0.0/13', 'SB': '202.1.160.0/19', 'SC': '154.192.0.0/11', - 'SD': '154.96.0.0/13', + 'SD': '102.120.0.0/13', 'SE': '78.64.0.0/12', - 'SG': '152.56.0.0/14', + 'SG': '8.128.0.0/10', 'SI': '188.196.0.0/14', 'SK': '78.98.0.0/15', - 'SL': '197.215.0.0/17', + 'SL': '102.143.0.0/17', 'SM': '89.186.32.0/19', 'SN': '41.82.0.0/15', - 'SO': '197.220.64.0/19', + 'SO': '154.115.192.0/18', 'SR': '186.179.128.0/17', 'SS': '105.235.208.0/21', 'ST': '197.159.160.0/19', @@ -5197,15 +5200,15 @@ class GeoUtils(object): 'TV': '202.2.96.0/19', 'TW': '120.96.0.0/11', 'TZ': '156.156.0.0/14', - 'UA': '93.72.0.0/13', - 'UG': '154.224.0.0/13', - 'US': '3.0.0.0/8', + 'UA': '37.52.0.0/14', + 'UG': '102.80.0.0/13', + 'US': '6.0.0.0/8', 'UY': '167.56.0.0/13', - 'UZ': '82.215.64.0/18', + 'UZ': '84.54.64.0/18', 'VA': '212.77.0.0/19', - 'VC': '24.92.144.0/20', + 'VC': '207.191.240.0/21', 'VE': '186.88.0.0/13', - 'VG': '172.103.64.0/18', + 'VG': '66.81.192.0/20', 'VI': '146.226.0.0/16', 'VN': '14.160.0.0/11', 'VU': '202.80.32.0/20', @@ -5214,8 +5217,8 @@ class GeoUtils(object): 'YE': '134.35.0.0/16', 'YT': '41.242.116.0/22', 'ZA': '41.0.0.0/11', - 'ZM': '165.56.0.0/13', - 'ZW': '41.85.192.0/19', + 'ZM': '102.144.0.0/13', + 'ZW': '102.177.192.0/18', } @classmethod From cae0bbc53831eed38c4af3755de43e223c503270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 06:11:09 +0700 Subject: [PATCH 0905/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index b664368a1..2957b7ced 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ version +Core +* [utils] Actualize major IPv4 address blocks per country + Extractors + [go] Add support for abc.com and freeform.com (#22823, #22864) + [mtv] Add support for mtvjapan.com From c4bd9cb7bb57c6e4bbc04fb054dfea14d4ecb171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Oct 2019 06:12:33 +0700 Subject: [PATCH 0906/1201] release 2019.10.29 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 10 +++------- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 21 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f1afe704c..f82502bd1 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.22 + [debug] youtube-dl version 2019.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a4dc9b005..5ef983d43 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 5bf86adce..8f05aa79f 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 7aa5534e5..e90900d8d 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.22 + [debug] youtube-dl version 2019.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5d3645e3d..7021d7397 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.22** +- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 2957b7ced..fcab1102c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.10.29 Core * [utils] Actualize major IPv4 address blocks per country diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a1b0edeeb..af905db5a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -232,7 +232,6 @@ - **DouyuShow** - **DouyuTV**: 斗鱼 - **DPlay** - - **DPlayIt** - **DRBonanza** - **Dropbox** - **DrTuber** @@ -339,7 +338,6 @@ - **Goshgay** - **GPUTechConf** - **Groupon** - - **Hark** - **hbo** - **HearThisAt** - **Heise** @@ -432,7 +430,6 @@ - **Lcp** - **LcpPlay** - **Le**: 乐视网 - - **Learnr** - **Lecture2Go** - **Lecturio** - **LecturioCourse** @@ -466,11 +463,9 @@ - **lynda**: lynda.com videos - **lynda:course**: lynda.com online courses - **m6** - - **macgamestore**: MacGameStore trailers - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru - - **MakerTV** - **MallTV** - **mangomolo:live** - **mangomolo:video** @@ -526,8 +521,8 @@ - **mtg**: MTG services - **mtv** - **mtv.de** - - **mtv81** - **mtv:video** + - **mtvjapan** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** @@ -815,6 +810,7 @@ - **soundcloud:set** - **soundcloud:trackstation** - **soundcloud:user** + - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** - **southpark.cc.com** @@ -887,6 +883,7 @@ - **TeleTask** - **Telewebion** - **TennisTV** + - **TenPlay** - **TF1** - **TFO** - **TheIntercept** @@ -925,7 +922,6 @@ - **tunein:topic** - **TunePk** - **Turbo** - - **Tutv** - **tv.dfb.de** - **TV2** - **tv2.hu** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 39b355b9e..924f26ca8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.10.22' +__version__ = '2019.10.29' From 7455832f311843663b416968b9e5a0a0c6134d8d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 29 Oct 2019 09:43:17 +0100 Subject: [PATCH 0907/1201] [fox9] fix extraction --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/fox9.py | 43 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c10bcbcc1..15f96fb8f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -367,7 +367,10 @@ from .fourtube import ( FuxIE, ) from .fox import FOXIE -from .fox9 import FOX9IE +from .fox9 import ( + FOX9IE, + FOX9NewsIE, +) from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py index 17dfffa7b..91f8f7b8a 100644 --- a/youtube_dl/extractor/fox9.py +++ b/youtube_dl/extractor/fox9.py @@ -1,13 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals -from .anvato import AnvatoIE +from .common import InfoExtractor -class FOX9IE(AnvatoIE): - _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P\d+)-story' - _TESTS = [{ - 'url': 'http://www.fox9.com/news/215123287-story', +class FOX9IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id, + 'Anvato', video_id) + + +class FOX9NewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P[^/?&#]+)' + _TEST = { + 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota', 'md5': 'd6e1b2572c3bab8a849c9103615dd243', 'info_dict': { 'id': '314473', @@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE): 'categories': ['News', 'Sports'], 'tags': ['news', 'video'], }, - }, { - 'url': 'http://www.fox9.com/news/investigators/214070684-story', - 'only_matching': True, - }] + } def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_id = self._parse_json( - self._search_regex( - r"this\.videosJson\s*=\s*'(\[.+?\])';", - webpage, 'anvato playlist'), - video_id)[0]['video'] - - return self._get_anvato_videos( - 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b', - video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + anvato_id = self._search_regex( + r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id') + return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9') From 8989349e6dcaa98204f77fb9f1e15a86eecb823d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 29 Oct 2019 09:44:07 +0100 Subject: [PATCH 0908/1201] [onet] improve extraction - add support for onet100.vod.pl domain - extract m3u8 formats - correct audio only format info --- youtube_dl/extractor/onet.py | 54 ++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 58da1bc27..e55b2ac89 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -20,6 +20,8 @@ from ..utils import ( class OnetBaseIE(InfoExtractor): + _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/' + def _search_mvp_id(self, webpage): return self._search_regex( r'id=(["\'])mvp:(?P.+?)\1', webpage, 'mvp id', group='id') @@ -45,7 +47,7 @@ class OnetBaseIE(InfoExtractor): video = response['result'].get('0') formats = [] - for _, formats_dict in video['formats'].items(): + for format_type, formats_dict in video['formats'].items(): if not isinstance(formats_dict, dict): continue for format_id, format_list in formats_dict.items(): @@ -56,21 +58,31 @@ class OnetBaseIE(InfoExtractor): if not video_url: continue ext = determine_ext(video_url) - if format_id == 'ism': + if format_id.startswith('ism'): formats.extend(self._extract_ism_formats( video_url, video_id, 'mss', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) + elif format_id.startswith('hls'): + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) else: - formats.append({ + http_f = { 'url': video_url, 'format_id': format_id, - 'height': int_or_none(f.get('vertical_resolution')), - 'width': int_or_none(f.get('horizontal_resolution')), 'abr': float_or_none(f.get('audio_bitrate')), - 'vbr': float_or_none(f.get('video_bitrate')), - }) + } + if format_type == 'audio': + http_f['vcodec'] = 'none' + else: + http_f.update({ + 'height': int_or_none(f.get('vertical_resolution')), + 'width': int_or_none(f.get('horizontal_resolution')), + 'vbr': float_or_none(f.get('video_bitrate')), + }) + formats.append(http_f) self._sort_formats(formats) meta = video.get('meta', {}) @@ -105,12 +117,12 @@ class OnetMVPIE(OnetBaseIE): class OnetIE(OnetBaseIE): - _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)' + _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)' IE_NAME = 'onet.tv' - _TEST = { + _TESTS = [{ 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', - 'md5': 'e3ffbf47590032ac3f27249204173d50', + 'md5': '436102770fb095c75b8bb0392d3da9ff', 'info_dict': { 'id': 'qbpyqc', 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', @@ -120,7 +132,10 @@ class OnetIE(OnetBaseIE): 'upload_date': '20160705', 'timestamp': 1467721580, }, - } + }, { + 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -140,18 +155,21 @@ class OnetIE(OnetBaseIE): class OnetChannelIE(OnetBaseIE): - _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P[a-z]+)(?:[?#]|$)' + _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P[a-z]+)(?:[?#]|$)' IE_NAME = 'onet.tv:channel' - _TEST = { + _TESTS = [{ 'url': 'http://onet.tv/k/openerfestival', 'info_dict': { 'id': 'openerfestival', - 'title': 'Open\'er Festival Live', - 'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.', + 'title': "Open'er Festival", + 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.", }, - 'playlist_mincount': 46, - } + 'playlist_mincount': 35, + }, { + 'url': 'https://onet100.vod.pl/k/openerfestival', + 'only_matching': True, + }] def _real_extract(self, url): channel_id = self._match_id(url) @@ -173,7 +191,7 @@ class OnetChannelIE(OnetBaseIE): 'Downloading channel %s - add --no-playlist to just download video %s' % ( channel_id, video_name)) matches = re.findall( - r']+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)', + r']+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE, webpage) entries = [ self.url_result(video_link, OnetIE.ie_key()) From c56b2ac43ca27b32fb4f7b230d851a61b5fc7cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Oct 2019 02:21:03 +0700 Subject: [PATCH 0909/1201] [tv2dk] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tv2dk.py | 82 ++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 youtube_dl/extractor/tv2dk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15f96fb8f..5d20ba863 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1189,6 +1189,7 @@ from .tv2 import ( TV2IE, TV2ArticleIE, ) +from .tv2dk import TV2DKIE from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py new file mode 100644 index 000000000..eb39424df --- /dev/null +++ b/youtube_dl/extractor/tv2dk.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class TV2DKIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + tvsyd| + tv2ostjylland| + tvmidtvest| + tv2fyn| + tv2east| + tv2lorry| + tv2nord + )\.dk/ + (:[^/]+/)* + (?P[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player', + 'info_dict': { + 'id': '0_52jmwa0p', + 'ext': 'mp4', + 'title': '19:30 - 28. okt. 2019', + 'timestamp': 1572290248, + 'upload_date': '20191028', + 'uploader_id': 'tvsyd', + 'duration': 1347, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Kaltura'], + }, { + 'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi', + 'only_matching': True, + }, { + 'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player', + 'only_matching': True, + }, { + 'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019', + 'only_matching': True, + }, { + 'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof', + 'only_matching': True, + }, { + 'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy', + 'only_matching': True, + }, { + 'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player', + 'only_matching': True, + }, { + 'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + entries = [] + for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage): + video = extract_attributes(video_el) + kaltura_id = video.get('data-entryid') + if not kaltura_id: + continue + partner_id = video.get('data-partnerid') + if not partner_id: + continue + entries.append(self.url_result( + 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', + video_id=kaltura_id)) + return self.playlist_result(entries) From 9a621ddc3a42769f107f8bd0d67b2c7073ea8256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Oct 2019 02:21:52 +0700 Subject: [PATCH 0910/1201] [tv2] Fix and improve extraction (closes #22787) --- youtube_dl/extractor/tv2.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index d5071e8a5..1b6590767 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -11,6 +11,7 @@ from ..utils import ( js_to_json, parse_iso8601, remove_end, + try_get, ) @@ -44,7 +45,14 @@ class TV2IE(InfoExtractor): data = self._download_json( 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), video_id, 'Downloading play JSON')['playback'] - for item in data['items']['item']: + items = try_get(data, lambda x: x['items']['item']) + if not items: + continue + if not isinstance(items, list): + items = [items] + for item in items: + if not isinstance(item, dict): + continue video_url = item.get('url') if not video_url or video_url in format_urls: continue From 45f4a433894556301204b704caca7d6a14286287 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 30 Oct 2019 23:07:35 +0100 Subject: [PATCH 0911/1201] [yahoo] improve extraction - add support for live streams(closes #3597)(closes #3779)(closes #22178) - bypass cookie consent page for european domains(closes #16948)(closes #22576) - add generic support for embeds(closes #20332) --- youtube_dl/extractor/yahoo.py | 672 +++++++++++++--------------------- 1 file changed, 264 insertions(+), 408 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e5ebdd180..ee68096d0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -3,453 +3,309 @@ from __future__ import unicode_literals import hashlib import itertools -import json import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_str, compat_urllib_parse, - compat_urlparse, ) from ..utils import ( clean_html, - determine_ext, - ExtractorError, - extract_attributes, int_or_none, mimetype2ext, + parse_iso8601, smuggle_url, try_get, - unescapeHTML, url_or_none, ) -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .nbc import NBCSportsVPlayerIE +from .brightcove import BrightcoveNewIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P.+)?-)?(?P[0-9]+)(?:-[a-z]+)?(?:\.html)?' - _TESTS = [ - { - 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - 'info_dict': { - 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', - 'ext': 'mp4', - 'title': 'Julian Smith & Travis Legg Watch Julian Smith', - 'description': 'Julian and Travis watch Julian Smith', - 'duration': 6863, - }, + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)\.html)' + _TESTS = [{ + 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', + 'info_dict': { + 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', + 'ext': 'mp4', + 'title': 'Julian Smith & Travis Legg Watch Julian Smith', + 'description': 'Julian and Travis watch Julian Smith', + 'duration': 6863, + 'timestamp': 1369812016, + 'upload_date': '20130529', }, - { - 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', - 'md5': '251af144a19ebc4a033e8ba91ac726bb', - 'info_dict': { - 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', - 'ext': 'mp4', - 'title': 'Codefellas - The Cougar Lies with Spanish Moss', - 'description': 'md5:66b627ab0a282b26352136ca96ce73c1', - 'duration': 151, - }, - 'skip': 'HTTP Error 404', + }, { + 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', + 'md5': '7993e572fac98e044588d0b5260f4352', + 'info_dict': { + 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', + 'ext': 'mp4', + 'title': "Yahoo Saves 'Community'", + 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', + 'duration': 170, + 'timestamp': 1406838636, + 'upload_date': '20140731', }, - { - 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', - 'md5': '7993e572fac98e044588d0b5260f4352', - 'info_dict': { - 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', - 'ext': 'mp4', - 'title': "Yahoo Saves 'Community'", - 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', - 'duration': 170, - } - }, - { - 'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html', - 'md5': '45c024bad51e63e9b6f6fad7a43a8c23', - 'info_dict': { - 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f', - 'ext': 'mp4', - 'title': '敢問市長/黃秀霜批賴清德「非常高傲」', - 'description': '直言台南沒捷運 交通居五都之末', - 'duration': 396, - }, - }, - { - 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', - 'md5': '71298482f7c64cbb7fa064e4553ff1c1', - 'info_dict': { - 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', - 'ext': 'webm', - 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', - 'description': 'md5:f66c890e1490f4910a9953c941dee944', - 'duration': 97, - } - }, - { - 'url': 'https://ca.sports.yahoo.com/video/program-makes-hockey-more-affordable-013127711.html', - 'md5': '57e06440778b1828a6079d2f744212c4', - 'info_dict': { - 'id': 'c9fa2a36-0d4d-3937-b8f6-cc0fb1881e73', - 'ext': 'mp4', - 'title': 'Program that makes hockey more affordable not offered in Manitoba', - 'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4', - 'duration': 121, - }, - 'skip': 'Video gone', - }, { - 'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html', - 'info_dict': { - 'id': '154609075', - }, - 'playlist': [{ - 'md5': '000887d0dc609bc3a47c974151a40fb8', - 'info_dict': { - 'id': 'e624c4bc-3389-34de-9dfc-025f74943409', - 'ext': 'mp4', - 'title': '\'The Interview\' TV Spot: War', - 'description': 'The Interview', - 'duration': 30, - }, - }, { - 'md5': '81bc74faf10750fe36e4542f9a184c66', - 'info_dict': { - 'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9', - 'ext': 'mp4', - 'title': '\'The Interview\' TV Spot: Guys', - 'description': 'The Interview', - 'duration': 30, - }, - }], - }, { - 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', - 'md5': '88e209b417f173d86186bef6e4d1f160', - 'info_dict': { - 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521', - 'ext': 'mp4', - 'title': 'China Moses Is Crazy About the Blues', - 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', - 'duration': 128, - } - }, { - 'url': 'https://in.lifestyle.yahoo.com/video/connect-dots-dark-side-virgo-090247395.html', - 'md5': 'd9a083ccf1379127bf25699d67e4791b', - 'info_dict': { - 'id': '52aeeaa3-b3d1-30d8-9ef8-5d0cf05efb7c', - 'ext': 'mp4', - 'title': 'Connect the Dots: Dark Side of Virgo', - 'description': 'md5:1428185051cfd1949807ad4ff6d3686a', - 'duration': 201, - }, - 'skip': 'Domain name in.lifestyle.yahoo.com gone', - }, { - 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': '989396ae73d20c6f057746fb226aa215', - 'info_dict': { - 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', - 'ext': 'mp4', - 'title': '\'True Story\' Trailer', - 'description': 'True Story', - 'duration': 150, - }, - }, { - 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', - 'only_matching': True, - }, { - 'note': 'NBC Sports embeds', - 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', - 'info_dict': { - 'id': '9CsDKds0kvHI', - 'ext': 'flv', - 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', - 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', - 'upload_date': '20150313', - 'uploader': 'NBCU-SPORTS', - 'timestamp': 1426270238, - } - }, { - 'url': 'https://tw.news.yahoo.com/-100120367.html', - 'only_matching': True, - }, { - # Query result is embedded in webpage, but explicit request to video API fails with geo restriction - 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', - 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', - 'info_dict': { - 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', - 'ext': 'mp4', - 'title': 'Communitary - Community Episode 1: Ladders', - 'description': 'md5:8fc39608213295748e1e289807838c97', - 'duration': 1646, - }, - }, { - # it uses an alias to get the video_id - 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html', - 'info_dict': { - 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737', - 'ext': 'mp4', - 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking', - 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', - }, - }, - { - # config['models']['applet_model']['data']['sapi'] has no query - 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016', - 'md5': 'dac0c72d502bc5facda80c9e6d5c98db', - 'info_dict': { - 'id': 'a6015640-e9e5-3efb-bb60-05589a183919', - 'ext': 'mp4', - 'description': 'Galactic', - 'title': 'Dolla Diva (feat. Maggie Koerner)', - }, - 'skip': 'redirect to https://www.yahoo.com/music', - }, - { - # yahoo://article/ - 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html', - 'info_dict': { - 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', - 'ext': 'mp4', - 'title': "'True Story' Trailer", - 'description': 'True Story', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # ytwnews://cavideo/ - 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', - 'info_dict': { - 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff', - 'ext': 'mp4', - 'title': '單車天使 - 中文版預', - 'description': '中文版預', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # custom brightcove - 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/', - 'info_dict': { - 'id': '5575377707001', - 'ext': 'mp4', - 'title': "Clown entertainers say 'It' is hurting their business", - 'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.', - 'timestamp': 1505341164, - 'upload_date': '20170913', - 'uploader_id': '2376984109001', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # custom brightcove, geo-restricted to Australia, bypassable - 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/', - 'only_matching': True, + }, { + 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', + 'md5': '0b51660361f0e27c9789e7037ef76f4b', + 'info_dict': { + 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', + 'ext': 'mp4', + 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', + 'description': 'md5:f66c890e1490f4910a9953c941dee944', + 'duration': 97, + 'timestamp': 1414489862, + 'upload_date': '20141028', } - ] + }, { + 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', + 'md5': '88e209b417f173d86186bef6e4d1f160', + 'info_dict': { + 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521', + 'ext': 'mp4', + 'title': 'China Moses Is Crazy About the Blues', + 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', + 'duration': 128, + 'timestamp': 1385722202, + 'upload_date': '20131129', + } + }, { + 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', + 'md5': '2a9752f74cb898af5d1083ea9f661b58', + 'info_dict': { + 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', + 'ext': 'mp4', + 'title': '\'True Story\' Trailer', + 'description': 'True Story', + 'duration': 150, + 'timestamp': 1418919206, + 'upload_date': '20141218', + }, + }, { + 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', + 'only_matching': True, + }, { + 'note': 'NBC Sports embeds', + 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', + 'info_dict': { + 'id': '9CsDKds0kvHI', + 'ext': 'flv', + 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', + 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + 'upload_date': '20150313', + 'uploader': 'NBCU-SPORTS', + 'timestamp': 1426270238, + }, + }, { + 'url': 'https://tw.news.yahoo.com/-100120367.html', + 'only_matching': True, + }, { + # Query result is embedded in webpage, but explicit request to video API fails with geo restriction + 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', + 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', + 'info_dict': { + 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', + 'ext': 'mp4', + 'title': 'Communitary - Community Episode 1: Ladders', + 'description': 'md5:8fc39608213295748e1e289807838c97', + 'duration': 1646, + 'timestamp': 1440436550, + 'upload_date': '20150824', + 'series': 'Communitary', + 'season_number': 6, + 'episode_number': 1, + }, + }, { + # ytwnews://cavideo/ + 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', + 'info_dict': { + 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff', + 'ext': 'mp4', + 'title': '單車天使 - 中文版預', + 'description': '中文版預', + 'timestamp': 1476696196, + 'upload_date': '20161017', + }, + 'params': { + 'skip_download': True, + }, + }, { + # Contains both a Yahoo hosted video and multiple Youtube embeds + 'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html', + 'info_dict': { + 'id': '46c5d95a-528f-3d03-b732-732fcadd51de', + 'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead', + 'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.', + }, + 'playlist': [{ + 'info_dict': { + 'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6', + 'ext': 'mp4', + 'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs', + 'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.', + 'timestamp': 1572406500, + 'upload_date': '20191030', + }, + }, { + 'info_dict': { + 'id': '352CFDOQrKg', + 'ext': 'mp4', + 'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019', + 'description': 'md5:35b61e94c2ae214bc965ff4245f80d11', + 'uploader': 'The Voice', + 'uploader_id': 'NBCTheVoice', + 'upload_date': '20191029', + }, + }], + 'params': { + 'playlistend': 2, + }, + }, { + 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html', + 'only_matching': True, + }, { + 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - page_id = mobj.group('id') - display_id = mobj.group('display_id') or page_id - host = mobj.group('host') - webpage, urlh = self._download_webpage_handle(url, display_id) - if 'err=404' in urlh.geturl(): - raise ExtractorError('Video gone', expected=True) - - # Look for iframed media first - entries = [] - iframe_urls = re.findall(r']+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage) - for idx, iframe_url in enumerate(iframe_urls): - entries.append(self.url_result(host + iframe_url, 'Yahoo')) - if entries: - return self.playlist_result(entries, page_id) - - # Look for NBCSports iframes - nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) - if nbc_sports_url: - return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key()) - - # Look for Brightcove Legacy Studio embeds - bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - if bc_url: - return self.url_result(bc_url, BrightcoveLegacyIE.ie_key()) - - def brightcove_url_result(bc_url): - return self.url_result( - smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}), - BrightcoveNewIE.ie_key()) - - # Look for Brightcove New Studio embeds - bc_url = BrightcoveNewIE._extract_url(self, webpage) - if bc_url: - return brightcove_url_result(bc_url) - - brightcove_iframe = self._search_regex( - r'(]+data-video-id=["\']\d+[^>]+>)', webpage, - 'brightcove iframe', default=None) - if brightcove_iframe: - attr = extract_attributes(brightcove_iframe) - src = attr.get('src') - if src: - parsed_src = compat_urlparse.urlparse(src) - qs = compat_urlparse.parse_qs(parsed_src.query) - account_id = qs.get('accountId', ['2376984109001'])[0] - brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0] - if account_id and brightcove_id: - return brightcove_url_result( - 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' - % (account_id, brightcove_id)) - - # Query result is often embedded in webpage as JSON. Sometimes explicit requests - # to video API results in a failure with geo restriction reason therefore using - # embedded query result when present sounds reasonable. - config_json = self._search_regex( - r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:|$)', - webpage, 'videoplayer applet', default=None) - if config_json: - config = self._parse_json(config_json, display_id, fatal=False) - if config: - sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') - if sapi and 'query' in sapi: - info = self._extract_info(display_id, sapi, webpage) - self._sort_formats(info['formats']) - return info - - items_json = self._search_regex( - r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, - default=None) - if items_json is None: - alias = self._search_regex( - r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None) - if alias is not None: - alias_info = self._download_json( - 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias, - display_id, 'Downloading alias info') - video_id = alias_info[0]['id'] - else: - CONTENT_ID_REGEXES = [ - r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', - r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', - r'"first_videoid"\s*:\s*"([^"]+)"', - r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), - r']data-uuid=["\']([^"\']+)', - r']+yahoo://article/view\?.*\buuid=([^&"\']+)', - r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']', - ] - video_id = self._search_regex( - CONTENT_ID_REGEXES, webpage, 'content ID') + url, country, display_id = re.match(self._VALID_URL, url).groups() + if not country: + country = 'us' else: - items = json.loads(items_json) - info = items['mediaItems']['query']['results']['mediaObj'][0] - # The 'meta' field is not always in the video webpage, we request it - # from another page - video_id = info['id'] - return self._get_info(video_id, display_id, webpage) + country = country.split('-')[0] + api_base = 'https://%s.yahoo.com/_td/api/resource/' % country - def _extract_info(self, display_id, query, webpage): - info = query['query']['results']['mediaObj'][0] - meta = info.get('meta') - video_id = info.get('id') + for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]): + content = self._download_json( + api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid, + display_id, 'Downloading content JSON metadata', fatal=i == 1) + if content: + item = content['items'][0] + break - if not meta: - msg = info['status'].get('msg') - if msg: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, msg), expected=True) - raise ExtractorError('Unable to extract media object meta') + if item.get('type') != 'video': + entries = [] + cover = item.get('cover') or {} + if cover.get('type') == 'yvideo': + cover_url = cover.get('url') + if cover_url: + entries.append(self.url_result( + cover_url, 'Yahoo', cover.get('uuid'))) + + for e in item.get('body', []): + if e.get('type') == 'videoIframe': + iframe_url = e.get('url') + if not iframe_url: + continue + entries.append(self.url_result(iframe_url)) + + return self.playlist_result( + entries, item.get('uuid'), + item.get('title'), item.get('summary')) + + video_id = item['uuid'] + video = self._download_json( + api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id, + video_id, 'Downloading video JSON metadata')[0] + title = video['title'] + + if country == 'malaysia': + country = 'my' + + is_live = video.get('live_state') == 'live' + fmts = ('m3u8',) if is_live else ('web', 'mp4') + + urls = [] formats = [] - for s in info['streams']: - tbr = int_or_none(s.get('bitrate')) - format_info = { - 'width': int_or_none(s.get('width')), - 'height': int_or_none(s.get('height')), - 'tbr': tbr, - } - - host = s['host'] - path = s['path'] - if host.startswith('rtmp'): - fmt = 'rtmp' - format_info.update({ - 'url': host, - 'play_path': path, - 'ext': 'flv', - }) - else: - if s.get('format') == 'm3u8_playlist': - fmt = 'hls' - format_info.update({ - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - else: - fmt = format_info['ext'] = determine_ext(path) - format_url = compat_urlparse.urljoin(host, path) - format_info['url'] = format_url - format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '') - formats.append(format_info) - - closed_captions = self._html_search_regex( - r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', - default='[]') - - cc_json = self._parse_json(closed_captions, video_id, fatal=False) subtitles = {} - if cc_json: - for closed_caption in cc_json: - lang = closed_caption['lang'] - if lang not in subtitles: - subtitles[lang] = [] - subtitles[lang].append({ - 'url': closed_caption['url'], - 'ext': mimetype2ext(closed_caption['content_type']), + for fmt in fmts: + media_obj = self._download_json( + 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id, + video_id, 'Downloading %s JSON metadata' % fmt, + headers=self.geo_verification_headers(), query={ + 'format': fmt, + 'region': country.upper(), + })['query']['results']['mediaObj'][0] + msg = media_obj.get('status', {}).get('msg') + + for s in media_obj.get('streams', []): + host = s.get('host') + path = s.get('path') + if not host or not path: + continue + s_url = host + path + if s.get('format') == 'm3u8': + formats.extend(self._extract_m3u8_formats( + s_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + continue + tbr = int_or_none(s.get('bitrate')) + formats.append({ + 'url': s_url, + 'format_id': fmt + ('-%d' % tbr if tbr else ''), + 'width': int_or_none(s.get('width')), + 'height': int_or_none(s.get('height')), + 'tbr': tbr, + 'fps': int_or_none(s.get('framerate')), }) + for cc in media_obj.get('closedcaptions', []): + cc_url = cc.get('url') + if not cc_url or cc_url in urls: + continue + urls.append(cc_url) + subtitles.setdefault(cc.get('lang') or 'en-US', []).append({ + 'url': cc_url, + 'ext': mimetype2ext(cc.get('content_type')), + }) + + streaming_url = video.get('streaming_url') + if streaming_url and not is_live: + formats.extend(self._extract_m3u8_formats( + streaming_url, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + + if not formats and msg == 'geo restricted': + self.raise_geo_restricted() + + self._sort_formats(formats) + + thumbnails = [] + for thumb in video.get('thumbnails', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'id': thumb.get('tag'), + 'url': thumb.get('url'), + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + series_info = video.get('series_info') or {} + return { 'id': video_id, - 'display_id': display_id, - 'title': unescapeHTML(meta['title']), + 'title': self._live_title(title) if is_live else title, 'formats': formats, - 'description': clean_html(meta['description']), - 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), - 'duration': int_or_none(meta.get('duration')), + 'display_id': display_id, + 'thumbnails': thumbnails, + 'description': clean_html(video.get('description')), + 'timestamp': parse_iso8601(video.get('publish_time')), 'subtitles': subtitles, + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('view_count')), + 'is_live': is_live, + 'series': video.get('show_name'), + 'season_number': int_or_none(series_info.get('season_number')), + 'episode_number': int_or_none(series_info.get('episode_number')), } - def _get_info(self, video_id, display_id, webpage): - region = self._search_regex( - r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', - webpage, 'region', fatal=False, default='US').upper() - formats = [] - info = {} - for fmt in ('webm', 'mp4'): - query_result = self._download_json( - 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id, - display_id, 'Downloading %s video info' % fmt, query={ - 'protocol': 'http', - 'region': region, - 'format': fmt, - }) - info = self._extract_info(display_id, query_result, webpage) - formats.extend(info['formats']) - formats.extend(self._extract_m3u8_formats( - 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - info['formats'] = formats - return info - class YahooSearchIE(SearchInfoExtractor): IE_DESC = 'Yahoo screen search' From 8040a0d35e11f7b2bf6d698175ab0b12424d696f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 30 Oct 2019 23:52:09 +0100 Subject: [PATCH 0912/1201] [yahoo] fix typo --- youtube_dl/extractor/yahoo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index ee68096d0..6c6bd76e8 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -51,10 +51,10 @@ class YahooIE(InfoExtractor): }, }, { 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', - 'md5': '0b51660361f0e27c9789e7037ef76f4b', + 'md5': '71298482f7c64cbb7fa064e4553ff1c1', 'info_dict': { 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', 'description': 'md5:f66c890e1490f4910a9953c941dee944', 'duration': 97, @@ -164,6 +164,7 @@ class YahooIE(InfoExtractor): 'params': { 'playlistend': 2, }, + 'expected_warnings': ['HTTP Error 404'], }, { 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html', 'only_matching': True, @@ -219,7 +220,7 @@ class YahooIE(InfoExtractor): country = 'my' is_live = video.get('live_state') == 'live' - fmts = ('m3u8',) if is_live else ('web', 'mp4') + fmts = ('m3u8',) if is_live else ('webm', 'mp4') urls = [] formats = [] From 237513e801671a51cc45d6a2fe5e7df69517958e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 31 Oct 2019 07:38:53 +0100 Subject: [PATCH 0913/1201] [yahoo] restore support for cbs suffixed URLs --- test/test_all_urls.py | 6 ------ youtube_dl/extractor/yahoo.py | 5 ++++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 465ce0050..81056a999 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -123,12 +123,6 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) - def test_yahoo_https(self): - # https://github.com/ytdl-org/youtube-dl/issues/2701 - self.assertMatch( - 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', - ['Yahoo']) - def test_no_duplicated_ie_names(self): name_accu = collections.defaultdict(list) for ie in self.ies: diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 6c6bd76e8..f041cf5de 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -25,7 +25,7 @@ from .brightcove import BrightcoveNewIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)\.html)' + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)(?:-[a-z]+)?\.html)' _TESTS = [{ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'info_dict': { @@ -171,6 +171,9 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html', 'only_matching': True, + }, { + 'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html', + 'only_matching': True, }] def _real_extract(self, url): From 3cf70bf1590ce364dc223197ba804cb70e704760 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 31 Oct 2019 07:44:21 +0100 Subject: [PATCH 0914/1201] [yahoo] make cbs URL suffix part of the media alias --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index f041cf5de..b9a9e88a0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -25,7 +25,7 @@ from .brightcove import BrightcoveNewIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?Phttps?://(?:(?P[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)' _TESTS = [{ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'info_dict': { From e993f1a0959fc04507b1cb2efeb610ae628d6d98 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 31 Oct 2019 08:13:10 +0100 Subject: [PATCH 0915/1201] [mixcloud] fix cloudcast data extraction(closes #22821) --- youtube_dl/extractor/mixcloud.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bf5353ef9..e5f631506 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -86,9 +86,10 @@ class MixcloudIE(InfoExtractor): r'', webpage, 'play info'), 'play info') for item in full_info_json: - item_data = try_get( - item, lambda x: x['cloudcast']['data']['cloudcastLookup'], - dict) + item_data = try_get(item, [ + lambda x: x['cloudcast']['data']['cloudcastLookup'], + lambda x: x['cloudcastLookup']['data']['cloudcastLookup'], + ], dict) if try_get(item_data, lambda x: x['streamInfo']['url']): info_json = item_data break From 274bf5e4c58bceed4ff8c283d77457bf1cb76d3e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 11:37:41 +0100 Subject: [PATCH 0916/1201] [kakao] improve extraction - support embed URLs - support Kakao Legacy vid based embed URLs - only extract fields used for extraction - strip description and extract tags --- youtube_dl/extractor/kakao.py | 45 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 7fa140b0c..96f918b75 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -6,14 +6,15 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + strip_or_none, unified_timestamp, update_url_query, ) class KakaoIE(InfoExtractor): - _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P\d+)/cliplink/(?P\d+)' - _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' + _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P\d+|[^?#&]+@my)' + _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor): 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', 'uploader_id': 2653210, - 'uploader': '쇼 음악중심', + 'uploader': '쇼! 음악중심', 'timestamp': 1485684628, 'upload_date': '20170129', } @@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + display_id = video_id.rstrip('@my') + api_base = self._API_BASE_TMPL % video_id player_header = { 'Referer': update_url_query( @@ -55,20 +58,22 @@ class KakaoIE(InfoExtractor): }) } - QUERY_COMMON = { + query = { 'player': 'monet_html5', 'referer': url, 'uuid': '', 'service': 'kakao_tv', 'section': '', 'dteType': 'PC', + 'fields': ','.join([ + '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title', + 'description', 'channelId', 'createTime', 'duration', 'playCount', + 'likeCount', 'commentCount', 'tagList', 'channel', 'name', + 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault']) } - query = QUERY_COMMON.copy() - query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' impress = self._download_json( - '%s/%s/impress' % (self._API_BASE, video_id), - video_id, 'Downloading video info', + api_base + 'impress', display_id, 'Downloading video info', query=query, headers=player_header) clip_link = impress['clipLink'] @@ -78,30 +83,27 @@ class KakaoIE(InfoExtractor): tid = impress.get('tid', '') - query = QUERY_COMMON.copy() query.update({ + 'fields': '-*,outputList,profile,width,height,label,filesize', 'tid': tid, 'profile': 'HIGH', }) raw = self._download_json( - '%s/%s/raw' % (self._API_BASE, video_id), - video_id, 'Downloading video formats info', + api_base + 'raw', display_id, 'Downloading video formats info', query=query, headers=player_header) formats = [] for fmt in raw.get('outputList', []): try: profile_name = fmt['profile'] + query.update({ + 'profile': profile_name, + 'fields': '-*,url', + }) fmt_url_json = self._download_json( - '%s/%s/raw/videolocation' % (self._API_BASE, video_id), - video_id, + api_base + 'raw/videolocation', display_id, 'Downloading video URL for profile %s' % profile_name, - query={ - 'service': 'kakao_tv', - 'section': '', - 'tid': tid, - 'profile': profile_name - }, headers=player_header, fatal=False) + query=query, headers=player_header, fatal=False) if fmt_url_json is None: continue @@ -134,9 +136,9 @@ class KakaoIE(InfoExtractor): }) return { - 'id': video_id, + 'id': display_id, 'title': title, - 'description': clip.get('description'), + 'description': strip_or_none(clip.get('description')), 'uploader': clip_link.get('channel', {}).get('name'), 'uploader_id': clip_link.get('channelId'), 'thumbnails': thumbs, @@ -146,4 +148,5 @@ class KakaoIE(InfoExtractor): 'like_count': int_or_none(clip.get('likeCount')), 'comment_count': int_or_none(clip.get('commentCount')), 'formats': formats, + 'tags': clip.get('tagList'), } From d439989215fcb1672bc2ac18d4fb6206e12c387a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 11:43:18 +0100 Subject: [PATCH 0917/1201] [daum] fix VOD and Clip extracton(closes #15015) --- youtube_dl/extractor/daum.py | 106 +++++++++++------------------------ 1 file changed, 32 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 76f021892..137095577 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -2,25 +2,21 @@ from __future__ import unicode_literals -import re import itertools from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, compat_urlparse, ) -from ..utils import ( - int_or_none, - str_to_int, - xpath_text, - unescapeHTML, -) -class DaumIE(InfoExtractor): +class DaumBaseIE(InfoExtractor): + _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/' + + +class DaumIE(DaumBaseIE): _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P[^?#&]+)' IE_NAME = 'daum.net' @@ -36,6 +32,9 @@ class DaumIE(InfoExtractor): 'duration': 2117, 'view_count': int, 'comment_count': int, + 'uploader_id': 186139, + 'uploader': '콘간지', + 'timestamp': 1387310323, }, }, { 'url': 'http://m.tvpot.daum.net/v/65139429', @@ -44,11 +43,14 @@ class DaumIE(InfoExtractor): 'ext': 'mp4', 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118', 'description': 'md5:79794514261164ff27e36a21ad229fc5', - 'upload_date': '20150604', + 'upload_date': '20150118', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 154, 'view_count': int, 'comment_count': int, + 'uploader': 'MBC 예능', + 'uploader_id': 132251, + 'timestamp': 1421604228, }, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', @@ -59,12 +61,15 @@ class DaumIE(InfoExtractor): 'id': 'vwIpVpCQsT8$', 'ext': 'flv', 'title': '01-Korean War ( Trouble on the horizon )', - 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름', + 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름', 'upload_date': '20080223', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 249, 'view_count': int, 'comment_count': int, + 'uploader': '까칠한 墮落始祖 황비홍님의', + 'uploader_id': 560824, + 'timestamp': 1203770745, }, }, { # Requires dte_type=WEB (#9972) @@ -73,60 +78,24 @@ class DaumIE(InfoExtractor): 'info_dict': { 'id': 's3794Uf1NZeZ1qMpGpeqeRU', 'ext': 'mp4', - 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611', - 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회', - 'upload_date': '20160611', + 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', + 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', + 'upload_date': '20170129', + 'uploader': '쇼! 음악중심', + 'uploader_id': 2653210, + 'timestamp': 1485684628, }, }] def _real_extract(self, url): video_id = compat_urllib_parse_unquote(self._match_id(url)) - movie_data = self._download_json( - 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json', - video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'}) - - # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid - if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): - return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) - - info = self._download_xml( - 'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id, - 'Downloading video info', query={'vid': video_id}) - - formats = [] - for format_el in movie_data['output_list']['output_list']: - profile = format_el['profile'] - format_query = compat_urllib_parse_urlencode({ - 'vid': video_id, - 'profile': profile, - }) - url_doc = self._download_xml( - 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, - video_id, note='Downloading video data for %s format' % profile) - format_url = url_doc.find('result/url').text - formats.append({ - 'url': format_url, - 'format_id': profile, - 'width': int_or_none(format_el.get('width')), - 'height': int_or_none(format_el.get('height')), - 'filesize': int_or_none(format_el.get('filesize')), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': info.find('TITLE').text, - 'formats': formats, - 'thumbnail': xpath_text(info, 'THUMB_URL'), - 'description': xpath_text(info, 'CONTENTS'), - 'duration': int_or_none(xpath_text(info, 'DURATION')), - 'upload_date': info.find('REGDTTM').text[:8], - 'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')), - 'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')), - } + if not video_id.isdigit(): + video_id += '@my' + return self.url_result( + self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) -class DaumClipIE(InfoExtractor): +class DaumClipIE(DaumBaseIE): _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P\d+)' IE_NAME = 'daum.net:clip' _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s' @@ -142,6 +111,9 @@ class DaumClipIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'duration': 3868, 'view_count': int, + 'uploader': 'GOMeXP', + 'uploader_id': 6667, + 'timestamp': 1377911092, }, }, { 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', @@ -154,22 +126,8 @@ class DaumClipIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - clip_info = self._download_json( - 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, - video_id, 'Downloading clip info')['clip_bean'] - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'], - 'title': unescapeHTML(clip_info['title']), - 'thumbnail': clip_info.get('thumb_url'), - 'description': clip_info.get('contents'), - 'duration': int_or_none(clip_info.get('duration')), - 'upload_date': clip_info.get('up_date')[:8], - 'view_count': int_or_none(clip_info.get('play_count')), - 'ie_key': 'Daum', - } + return self.url_result( + self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) class DaumListIE(InfoExtractor): From e987ce4bda476a387937e4af5b46f4a412a67830 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 12:40:41 +0100 Subject: [PATCH 0918/1201] [kakao] remove raw request and extract format total bitrate --- youtube_dl/extractor/kakao.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 96f918b75..32935bb28 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -69,7 +69,8 @@ class KakaoIE(InfoExtractor): '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title', 'description', 'channelId', 'createTime', 'duration', 'playCount', 'likeCount', 'commentCount', 'tagList', 'channel', 'name', - 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault']) + 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault', + 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label']) } impress = self._download_json( @@ -81,21 +82,14 @@ class KakaoIE(InfoExtractor): title = clip.get('title') or clip_link.get('displayTitle') - tid = impress.get('tid', '') - - query.update({ - 'fields': '-*,outputList,profile,width,height,label,filesize', - 'tid': tid, - 'profile': 'HIGH', - }) - raw = self._download_json( - api_base + 'raw', display_id, 'Downloading video formats info', - query=query, headers=player_header) + query['tid'] = impress.get('tid', '') formats = [] - for fmt in raw.get('outputList', []): + for fmt in clip.get('videoOutputList', []): try: profile_name = fmt['profile'] + if profile_name == 'AUDIO': + continue query.update({ 'profile': profile_name, 'fields': '-*,url', @@ -115,7 +109,8 @@ class KakaoIE(InfoExtractor): 'width': int_or_none(fmt.get('width')), 'height': int_or_none(fmt.get('height')), 'format_note': fmt.get('label'), - 'filesize': int_or_none(fmt.get('filesize')) + 'filesize': int_or_none(fmt.get('filesize')), + 'tbr': int_or_none(fmt.get('kbps')), }) except KeyError: pass From 20cc7c082b82e82050a4e1f1bb815fee51f6c1c2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 16:36:35 +0100 Subject: [PATCH 0919/1201] [go90] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/go90.py | 149 ----------------------------- 2 files changed, 150 deletions(-) delete mode 100644 youtube_dl/extractor/go90.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5d20ba863..e9b59ce52 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -422,7 +422,6 @@ from .globo import ( GloboArticleIE, ) from .go import GoIE -from .go90 import Go90IE from .godtube import GodTubeIE from .golem import GolemIE from .googledrive import GoogleDriveIE diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py deleted file mode 100644 index c3ea717bc..000000000 --- a/youtube_dl/extractor/go90.py +++ /dev/null @@ -1,149 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - parse_age_limit, - parse_iso8601, -) - - -class Go90IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'https://www.go90.com/videos/84BUqjLpf9D', - 'md5': 'efa7670dbbbf21a7b07b360652b24a32', - 'info_dict': { - 'id': '84BUqjLpf9D', - 'ext': 'mp4', - 'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention', - 'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.', - 'timestamp': 1491868800, - 'upload_date': '20170411', - 'age_limit': 14, - } - }, { - 'url': 'https://www.go90.com/embed/261MflWkD3N', - 'only_matching': True, - }] - _GEO_BYPASS = False - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - headers = self.geo_verification_headers() - headers.update({ - 'Content-Type': 'application/json; charset=utf-8', - }) - video_data = self._download_json( - 'https://www.go90.com/api/view/items/' + video_id, video_id, - headers=headers, data=b'{"client":"web","device_type":"pc"}') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - message = self._parse_json(e.cause.read().decode(), None)['error']['message'] - if 'region unavailable' in message: - self.raise_geo_restricted(countries=['US']) - raise ExtractorError(message, expected=True) - raise - - if video_data.get('requires_drm'): - raise ExtractorError('This video is DRM protected.', expected=True) - main_video_asset = video_data['main_video_asset'] - - episode_number = int_or_none(video_data.get('episode_number')) - series = None - season = None - season_id = None - season_number = None - for metadata in video_data.get('__children', {}).get('Item', {}).values(): - if metadata.get('type') == 'show': - series = metadata.get('title') - elif metadata.get('type') == 'season': - season = metadata.get('title') - season_id = metadata.get('id') - season_number = int_or_none(metadata.get('season_number')) - - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - thumbnails = [] - formats = [] - subtitles = {} - for asset in video_data.get('assets'): - if asset.get('id') == main_video_asset: - for source in asset.get('sources', []): - source_location = source.get('location') - if not source_location: - continue - source_type = source.get('type') - if source_type == 'hls': - m3u8_formats = self._extract_m3u8_formats( - source_location, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - for f in m3u8_formats: - mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url']) - if mobj: - height, tbr = mobj.groups() - height = int_or_none(height) - f.update({ - 'height': f.get('height') or height, - 'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None), - 'tbr': f.get('tbr') or int_or_none(tbr), - }) - formats.extend(m3u8_formats) - elif source_type == 'dash': - formats.extend(self._extract_mpd_formats( - source_location, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': source.get('name'), - 'url': source_location, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - 'tbr': int_or_none(source.get('bitrate')), - }) - - for caption in asset.get('caption_metadata', []): - caption_url = caption.get('source_url') - if not caption_url: - continue - subtitles.setdefault(caption.get('language', 'en'), []).append({ - 'url': caption_url, - 'ext': determine_ext(caption_url, 'vtt'), - }) - elif asset.get('type') == 'image': - asset_location = asset.get('location') - if not asset_location: - continue - thumbnails.append({ - 'url': asset_location, - 'width': int_or_none(asset.get('width')), - 'height': int_or_none(asset.get('height')), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': video_data.get('short_description'), - 'like_count': int_or_none(video_data.get('like_count')), - 'timestamp': parse_iso8601(video_data.get('released_at')), - 'series': series, - 'episode': episode, - 'season': season, - 'season_id': season_id, - 'season_number': season_number, - 'episode_number': episode_number, - 'subtitles': subtitles, - 'age_limit': parse_age_limit(video_data.get('rating')), - } From 152f22920d73bb0dc24fa357d5904a8dd97a5bf6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 17:44:34 +0100 Subject: [PATCH 0920/1201] [wistia] reduce embed extraction false positives and support inline embeds(closes #22931) --- youtube_dl/extractor/wistia.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index fa142b974..0fbc888ec 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -12,7 +12,7 @@ from ..utils import ( class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]{10})' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -43,25 +43,26 @@ class WistiaIE(InfoExtractor): 'only_matching': True, }] + # https://wistia.com/support/embed-and-share/video-on-your-website @staticmethod def _extract_url(webpage): match = re.search( - r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) + r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage) if match: return unescapeHTML(match.group('url')) - match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) - if match: - return 'wistia:%s' % match.group('id') - match = re.search( r'''(?sx) ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? - ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]{10})\b.*?\2 ''', webpage) if match: return 'wistia:%s' % match.group('id') + match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P[a-z0-9]{10})', webpage) + if match: + return 'wistia:%s' % match.group('id') + def _real_extract(self, url): video_id = self._match_id(url) From 4c95fcf9e8fa2ed113698d13df55df4aaecd8433 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 1 Nov 2019 21:16:47 +0100 Subject: [PATCH 0921/1201] [bambuser] remove extractor https://web.archive.org/web/20190808014227/https://go.bambuser.com/shutdown-announcement --- youtube_dl/extractor/bambuser.py | 142 ----------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 143 deletions(-) delete mode 100644 youtube_dl/extractor/bambuser.py diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py deleted file mode 100644 index 4400ff9c1..000000000 --- a/youtube_dl/extractor/bambuser.py +++ /dev/null @@ -1,142 +0,0 @@ -from __future__ import unicode_literals - -import re -import itertools - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - sanitized_Request, - urlencode_postdata, -) - - -class BambuserIE(InfoExtractor): - IE_NAME = 'bambuser' - _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' - _API_KEY = '005f64509e19a868399060af746a00aa' - _LOGIN_URL = 'https://bambuser.com/user' - _NETRC_MACHINE = 'bambuser' - - _TEST = { - 'url': 'http://bambuser.com/v/4050584', - # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388 - # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641', - 'info_dict': { - 'id': '4050584', - 'ext': 'flv', - 'title': 'Education engineering days - lightning talks', - 'duration': 3741, - 'uploader': 'pixelversity', - 'uploader_id': '344706', - 'timestamp': 1382976692, - 'upload_date': '20131028', - 'view_count': int, - }, - 'params': { - # It doesn't respect the 'Range' header, it would download the whole video - # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59 - 'skip_download': True, - }, - } - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_form = { - 'form_id': 'user_login', - 'op': 'Log in', - 'name': username, - 'pass': password, - } - - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Referer', self._LOGIN_URL) - response = self._download_webpage( - request, None, 'Logging in') - - login_error = self._html_search_regex( - r'(?s)
    (.+?)
    ', - response, 'login error', default=None) - if login_error: - raise ExtractorError( - 'Unable to login: %s' % login_error, expected=True) - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - video_id = self._match_id(url) - - info = self._download_json( - 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s' - % (self._API_KEY, video_id), video_id) - - error = info.get('error') - if error: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error), expected=True) - - result = info['result'] - - return { - 'id': video_id, - 'title': result['title'], - 'url': result['url'], - 'thumbnail': result.get('preview'), - 'duration': int_or_none(result.get('length')), - 'uploader': result.get('username'), - 'uploader_id': compat_str(result.get('owner', {}).get('uid')), - 'timestamp': int_or_none(result.get('created')), - 'fps': float_or_none(result.get('framerate')), - 'view_count': int_or_none(result.get('views_total')), - 'comment_count': int_or_none(result.get('comment_count')), - } - - -class BambuserChannelIE(InfoExtractor): - IE_NAME = 'bambuser:channel' - _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' - # The maximum number we can get with each request - _STEP = 50 - _TEST = { - 'url': 'http://bambuser.com/channel/pixelversity', - 'info_dict': { - 'title': 'pixelversity', - }, - 'playlist_mincount': 60, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user = mobj.group('user') - urls = [] - last_id = '' - for i in itertools.count(1): - req_url = ( - 'http://bambuser.com/xhr-api/index.php?username={user}' - '&sort=created&access_mode=0%2C1%2C2&limit={count}' - '&method=broadcast&format=json&vid_older_than={last}' - ).format(user=user, count=self._STEP, last=last_id) - req = sanitized_Request(req_url) - # Without setting this header, we wouldn't get any result - req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) - data = self._download_json( - req, user, 'Downloading page %d' % i) - results = data['result'] - if not results: - break - last_id = results[-1]['vid'] - urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) - - return { - '_type': 'playlist', - 'title': user, - 'entries': urls, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e9b59ce52..af3fff601 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -80,7 +80,6 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bbc import ( BBCCoUkIE, From 836bfcb54e4d1664815ebffb753a9dc7c9c7d72c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 2 Nov 2019 11:08:51 +0100 Subject: [PATCH 0922/1201] [flipagram] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/flipagram.py | 115 ----------------------------- 2 files changed, 116 deletions(-) delete mode 100644 youtube_dl/extractor/flipagram.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index af3fff601..33fb461a0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -355,7 +355,6 @@ from .firsttv import FirstTVIE from .fivemin import FiveMinIE from .fivetv import FiveTVIE from .flickr import FlickrIE -from .flipagram import FlipagramIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py deleted file mode 100644 index b7be40f1b..000000000 --- a/youtube_dl/extractor/flipagram.py +++ /dev/null @@ -1,115 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - float_or_none, - try_get, - unified_timestamp, -) - - -class FlipagramIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P[^/?#&]+)' - _TEST = { - 'url': 'https://flipagram.com/f/nyvTSJMKId', - 'md5': '888dcf08b7ea671381f00fab74692755', - 'info_dict': { - 'id': 'nyvTSJMKId', - 'ext': 'mp4', - 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', - 'description': 'md5:d55e32edc55261cae96a41fa85ff630e', - 'duration': 35.571, - 'timestamp': 1461244995, - 'upload_date': '20160421', - 'uploader': 'kitty juria', - 'uploader_id': 'sjuria101', - 'creator': 'kitty juria', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - 'comments': list, - 'formats': 'mincount:2', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_data = self._parse_json( - self._search_regex( - r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'), - video_id) - - flipagram = video_data['flipagram'] - video = flipagram['video'] - - json_ld = self._search_json_ld(webpage, video_id, default={}) - title = json_ld.get('title') or flipagram['captionText'] - description = json_ld.get('description') or flipagram.get('captionText') - - formats = [{ - 'url': video['url'], - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'filesize': int_or_none(video_data.get('size')), - }] - - preview_url = try_get( - flipagram, lambda x: x['music']['track']['previewUrl'], compat_str) - if preview_url: - formats.append({ - 'url': preview_url, - 'ext': 'm4a', - 'vcodec': 'none', - }) - - self._sort_formats(formats) - - counts = flipagram.get('counts', {}) - user = flipagram.get('user', {}) - video_data = flipagram.get('video', {}) - - thumbnails = [{ - 'url': self._proto_relative_url(cover['url']), - 'width': int_or_none(cover.get('width')), - 'height': int_or_none(cover.get('height')), - 'filesize': int_or_none(cover.get('size')), - } for cover in flipagram.get('covers', []) if cover.get('url')] - - # Note that this only retrieves comments that are initially loaded. - # For videos with large amounts of comments, most won't be retrieved. - comments = [] - for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): - text = comment.get('comment') - if not text or not isinstance(text, list): - continue - comments.append({ - 'author': comment.get('user', {}).get('name'), - 'author_id': comment.get('user', {}).get('username'), - 'id': comment.get('id'), - 'text': text[0], - 'timestamp': unified_timestamp(comment.get('created')), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': float_or_none(flipagram.get('duration'), 1000), - 'thumbnails': thumbnails, - 'timestamp': unified_timestamp(flipagram.get('iso8601Created')), - 'uploader': user.get('name'), - 'uploader_id': user.get('username'), - 'creator': user.get('name'), - 'view_count': int_or_none(counts.get('plays')), - 'like_count': int_or_none(counts.get('likes')), - 'repost_count': int_or_none(counts.get('reflips')), - 'comment_count': int_or_none(counts.get('comments')), - 'comments': comments, - 'formats': formats, - } From 79b35e7c15f4a285525b5ec52035ff0f8fc6150d Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Sat, 2 Nov 2019 18:32:49 +0700 Subject: [PATCH 0923/1201] [gameone] Remove extractor (#21778) --- youtube_dl/extractor/extractors.py | 4 - youtube_dl/extractor/gameone.py | 134 ----------------------------- 2 files changed, 138 deletions(-) delete mode 100644 youtube_dl/extractor/gameone.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 33fb461a0..dce08e077 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -401,10 +401,6 @@ from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE from .gameinformer import GameInformerIE -from .gameone import ( - GameOneIE, - GameOnePlaylistIE, -) from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py deleted file mode 100644 index a07d69841..000000000 --- a/youtube_dl/extractor/gameone.py +++ /dev/null @@ -1,134 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - xpath_with_ns, - parse_iso8601, - float_or_none, - int_or_none, -) - -NAMESPACE_MAP = { - 'media': 'http://search.yahoo.com/mrss/', -} - -# URL prefix to download the mp4 files directly instead of streaming via rtmp -# Credits go to XBox-Maniac -# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 -RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' - - -class GameOneIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' - _TESTS = [ - { - 'url': 'http://www.gameone.de/tv/288', - 'md5': '136656b7fb4c9cb4a8e2d500651c499b', - 'info_dict': { - 'id': '288', - 'ext': 'mp4', - 'title': 'Game One - Folge 288', - 'duration': 1238, - 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', - 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', - 'age_limit': 16, - 'upload_date': '20140513', - 'timestamp': 1399980122, - } - }, - { - 'url': 'http://gameone.de/tv/220', - 'md5': '5227ca74c4ae6b5f74c0510a7c48839e', - 'info_dict': { - 'id': '220', - 'ext': 'mp4', - 'upload_date': '20120918', - 'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker', - 'timestamp': 1347971451, - 'title': 'Game One - Folge 220', - 'duration': 896.62, - 'age_limit': 16, - } - } - - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage, secure=False) - description = self._html_search_meta('description', webpage) - age_limit = int( - self._search_regex( - r'age=(\d+)', - self._html_search_meta( - 'age-de-meta-label', - webpage), - 'age_limit', - '0')) - mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') - - mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') - title = mrss.find('.//item/title').text - thumbnail = mrss.find('.//item/image').get('url') - timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') - content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) - content_url = content.get('url') - - content = self._download_xml( - content_url, - video_id, - 'Downloading media:content') - rendition_items = content.findall('.//rendition') - duration = float_or_none(rendition_items[0].get('duration')) - formats = [ - { - 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), - 'width': int_or_none(r.get('width')), - 'height': int_or_none(r.get('height')), - 'tbr': int_or_none(r.get('bitrate')), - } - for r in rendition_items - ] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'description': description, - 'age_limit': age_limit, - 'timestamp': timestamp, - } - - -class GameOnePlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$' - IE_NAME = 'gameone:playlist' - _TEST = { - 'url': 'http://www.gameone.de/tv', - 'info_dict': { - 'title': 'GameOne', - }, - 'playlist_mincount': 294, - } - - def _real_extract(self, url): - webpage = self._download_webpage('http://www.gameone.de/tv', 'TV') - max_id = max(map(int, re.findall(r' Date: Sat, 2 Nov 2019 13:09:44 +0100 Subject: [PATCH 0924/1201] [keek] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/keek.py | 39 ------------------------------ 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/keek.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dce08e077..08facf8d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -515,7 +515,6 @@ from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .kinopoisk import KinoPoiskIE -from .keek import KeekIE from .konserthusetplay import KonserthusetPlayIE from .kontrtube import KontrTubeIE from .krasview import KrasViewIE diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py deleted file mode 100644 index 94a03d277..000000000 --- a/youtube_dl/extractor/keek.py +++ /dev/null @@ -1,39 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class KeekIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P\w+)' - IE_NAME = 'keek' - _TEST = { - 'url': 'https://www.keek.com/keek/NODfbab', - 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', - 'info_dict': { - 'id': 'NODfbab', - 'ext': 'mp4', - 'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', - 'uploader': 'ytdl', - 'uploader_id': 'eGT5bab', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - return { - 'id': video_id, - 'url': self._og_search_video_url(webpage), - 'ext': 'mp4', - 'title': self._og_search_description(webpage).strip(), - 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader': self._search_regex( - r'data-username=(["\'])(?P.+?)\1', webpage, - 'uploader', fatal=False, group='uploader'), - 'uploader_id': self._search_regex( - r'data-user-id=(["\'])(?P.+?)\1', webpage, - 'uploader id', fatal=False, group='uploader_id'), - } From 5e36b63486794750aca0ee6b9b83f27abf6332dc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 2 Nov 2019 13:25:39 +0100 Subject: [PATCH 0925/1201] [iconosquare] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/iconosquare.py | 85 ----------------------------- 2 files changed, 86 deletions(-) delete mode 100644 youtube_dl/extractor/iconosquare.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 08facf8d3..dd5f68ca3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -455,7 +455,6 @@ from .hungama import ( HungamaSongIE, ) from .hypem import HypemIE -from .iconosquare import IconosquareIE from .ign import ( IGNIE, OneUPIE, diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py deleted file mode 100644 index a39f422e9..000000000 --- a/youtube_dl/extractor/iconosquare.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - get_element_by_id, - remove_end, -) - - -class IconosquareIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P[^/]+)' - _TEST = { - 'url': 'http://statigr.am/p/522207370455279102_24101272', - 'md5': '6eb93b882a3ded7c378ee1d6884b1814', - 'info_dict': { - 'id': '522207370455279102_24101272', - 'ext': 'mp4', - 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', - 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', - 'timestamp': 1376471991, - 'upload_date': '20130814', - 'uploader': 'aguynamedpatrick', - 'uploader_id': '24101272', - 'comment_count': int, - 'like_count': int, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - media = self._parse_json( - get_element_by_id('mediaJson', webpage), - video_id) - - formats = [{ - 'url': f['url'], - 'format_id': format_id, - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')) - } for format_id, f in media['videos'].items()] - self._sort_formats(formats) - - title = remove_end(self._og_search_title(webpage), ' - via Iconosquare') - - timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) - description = media.get('caption', {}).get('text') - - uploader = media.get('user', {}).get('username') - uploader_id = media.get('user', {}).get('id') - - comment_count = int_or_none(media.get('comments', {}).get('count')) - like_count = int_or_none(media.get('likes', {}).get('count')) - - thumbnails = [{ - 'url': t['url'], - 'id': thumbnail_id, - 'width': int_or_none(t.get('width')), - 'height': int_or_none(t.get('height')) - } for thumbnail_id, t in media.get('images', {}).items()] - - comments = [{ - 'id': comment.get('id'), - 'text': comment['text'], - 'timestamp': int_or_none(comment.get('created_time')), - 'author': comment.get('from', {}).get('full_name'), - 'author_id': comment.get('from', {}).get('username'), - } for comment in media.get('comments', {}).get('data', []) if 'text' in comment] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'comment_count': comment_count, - 'like_count': like_count, - 'formats': formats, - 'comments': comments, - } From e54924c46fac6a9745868424dc14011da2572178 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 2 Nov 2019 18:13:31 +0100 Subject: [PATCH 0926/1201] [stv] fix extraction(closes #22928) --- youtube_dl/extractor/stv.py | 89 +++++++++++++------------------------ 1 file changed, 31 insertions(+), 58 deletions(-) diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py index ccb074cd4..bae8b71f4 100644 --- a/youtube_dl/extractor/stv.py +++ b/youtube_dl/extractor/stv.py @@ -4,15 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse -) from ..utils import ( - extract_attributes, + compat_str, float_or_none, int_or_none, - str_or_none, ) @@ -20,20 +15,20 @@ class STVPlayerIE(InfoExtractor): IE_NAME = 'stv:player' _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' _TEST = { - 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', - 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/', + 'md5': '5adf9439c31d554f8be0707c7abe7e0a', 'info_dict': { - 'id': '6016487034001', + 'id': '5333973339001', 'ext': 'mp4', - 'upload_date': '20190321', - 'title': 'Interview with the cast ahead of new Victoria', - 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', - 'timestamp': 1553179628, + 'upload_date': '20170301', + 'title': '60 seconds on set with Laura Norton', + 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!", + 'timestamp': 1488388054, 'uploader_id': '1486976045', }, 'skip': 'this resource is unavailable outside of the UK', } - _PUBLISHER_ID = '1486976045' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s' _PTYPE_MAP = { 'episode': 'episodes', 'video': 'shortform', @@ -41,54 +36,32 @@ class STVPlayerIE(InfoExtractor): def _real_extract(self, url): ptype, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id), + video_id) - qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( - r'itemprop="embedURL"[^>]+href="([^"]+)', - webpage, 'embed URL', default=None)).query) - publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + result = resp['results'] + video = result['video'] + video_id = compat_str(video['id']) - player_attr = extract_attributes(self._search_regex( - r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) - info = {} - duration = ref_id = series = video_id = None - api_ref_id = player_attr.get('data-player-api-refid') - if api_ref_id: - resp = self._download_json( - 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), - api_ref_id, fatal=False) - if resp: - result = resp.get('results') or {} - video = result.get('video') or {} - video_id = str_or_none(video.get('id')) - ref_id = video.get('guid') - duration = video.get('length') - programme = result.get('programme') or {} - series = programme.get('name') or programme.get('shortName') - subtitles = {} - _subtitles = result.get('_subtitles') or {} - for ext, sub_url in _subtitles.items(): - subtitles.setdefault('en', []).append({ - 'ext': 'vtt' if ext == 'webvtt' else ext, - 'url': sub_url, - }) - info.update({ - 'description': result.get('summary'), - 'subtitles': subtitles, - 'view_count': int_or_none(result.get('views')), - }) - if not video_id: - video_id = qs.get('videoId', [None])[0] or self._search_regex( - r' Date: Sat, 2 Nov 2019 22:33:51 +0100 Subject: [PATCH 0927/1201] [bellmedia] add support for marilyn.ca videos(#22193) --- youtube_dl/extractor/bellmedia.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index f36a2452d..485173774 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -22,7 +22,8 @@ class BellMediaIE(InfoExtractor): bravo| mtv| space| - etalk + etalk| + marilyn )\.ca| much\.com )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' @@ -70,6 +71,7 @@ class BellMediaIE(InfoExtractor): 'animalplanet': 'aniplan', 'etalk': 'ctv', 'bnnbloomberg': 'bnn', + 'marilyn': 'ctv_marilyn', } def _real_extract(self, url): From 564275e26fc963fb920236e37c6c19e8e2b046f0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 3 Nov 2019 22:04:03 +0100 Subject: [PATCH 0928/1201] [telegraaf] fix extraction --- youtube_dl/extractor/telegraaf.py | 75 ++++++++++++++++++------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py index 0f576c1ab..2dc020537 100644 --- a/youtube_dl/extractor/telegraaf.py +++ b/youtube_dl/extractor/telegraaf.py @@ -4,21 +4,25 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( determine_ext, - remove_end, + int_or_none, + parse_iso8601, + try_get, ) class TelegraafIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P\d+)/[^/]+\.html' + _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P\d+)' _TEST = { - 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', + 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los', 'info_dict': { - 'id': '24353229', + 'id': 'gaMItuoSeUg2', 'ext': 'mp4', - 'title': 'Tikibad ontruimd wegens brand', - 'description': 'md5:05ca046ff47b931f9b04855015e163a4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 33, + 'title': 'Historisch scheepswrak slaat na 100 jaar los', + 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 55, + 'timestamp': 1572805527, + 'upload_date': '20191103', }, 'params': { # m3u8 download @@ -27,23 +31,30 @@ class TelegraafIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + article_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_id = self._download_json( + 'https://www.telegraaf.nl/graphql', article_id, query={ + 'query': '''{ + article(uid: %s) { + videos { + videoId + } + } +}''' % article_id, + })['data']['article']['videos'][0]['videoId'] - player_url = self._html_search_regex( - r']+src="([^"]+")', webpage, 'player URL') - player_page = self._download_webpage( - player_url, video_id, note='Download player webpage') - playlist_url = self._search_regex( - r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL') - playlist_data = self._download_json(playlist_url, video_id) + item = self._download_json( + 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id, + video_id)['items'][0] + title = item['title'] - item = playlist_data['items'][0] formats = [] - locations = item['locations'] + locations = item.get('locations') or {} for location in locations.get('adaptive', []): - manifest_url = location['src'] + manifest_url = location.get('src') + if not manifest_url: + continue ext = determine_ext(manifest_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -54,25 +65,25 @@ class TelegraafIE(InfoExtractor): else: self.report_warning('Unknown adaptive format %s' % ext) for location in locations.get('progressive', []): + src = try_get(location, lambda x: x['sources'][0]['src']) + if not src: + continue + label = location.get('label') formats.append({ - 'url': location['sources'][0]['src'], - 'width': location.get('width'), - 'height': location.get('height'), - 'format_id': 'http-%s' % location['label'], + 'url': src, + 'width': int_or_none(location.get('width')), + 'height': int_or_none(location.get('height')), + 'format_id': 'http' + ('-%s' % label if label else ''), }) self._sort_formats(formats) - title = remove_end(self._og_search_title(webpage), ' - VIDEO') - description = self._og_search_description(webpage) - duration = item.get('duration') - thumbnail = item.get('poster') - return { 'id': video_id, 'title': title, - 'description': description, + 'description': item.get('description'), 'formats': formats, - 'duration': duration, - 'thumbnail': thumbnail, + 'duration': int_or_none(item.get('duration')), + 'thumbnail': item.get('poster'), + 'timestamp': parse_iso8601(item.get('datecreated'), ' '), } From a6e6673e825f6225c3a316b164ddca03fd20b5d2 Mon Sep 17 00:00:00 2001 From: Manu Cornet Date: Sun, 3 Nov 2019 21:23:27 +0000 Subject: [PATCH 0929/1201] [README.md] Also read permission to the binary in how to update section (#22903) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c39b13616..01f975958 100644 --- a/README.md +++ b/README.md @@ -752,8 +752,8 @@ As a last resort, you can also uninstall the version installed by your package m Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html): ``` -sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl -sudo chmod a+x /usr/local/bin/youtube-dl +sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl +sudo chmod a+rx /usr/local/bin/youtube-dl hash -r ``` From ef382405c6dc79d2b7e3f81a527232941e2c0b2d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Nov 2019 02:01:01 +0100 Subject: [PATCH 0930/1201] [mediaset] extract unprotected M3U and MPD manifests(closes #17204) --- youtube_dl/extractor/mediaset.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index df3748798..fcbc064ff 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -62,7 +62,6 @@ class MediasetIE(ThePlatformBaseIE): 'uploader': 'Canale 5', 'uploader_id': 'C5', }, - 'expected_warnings': ['HTTP Error 403: Forbidden'], }, { # clip 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', @@ -109,6 +108,11 @@ class MediasetIE(ThePlatformBaseIE): entries.append(embed_url) return entries + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): + for video in smil.findall(self._xpath_ns('.//video', namespace)): + video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src']) + return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url) + def _real_extract(self, url): guid = self._match_id(url) tp_path = 'PR1GhC/media/guid/2702976343/' + guid @@ -118,14 +122,15 @@ class MediasetIE(ThePlatformBaseIE): subtitles = {} first_e = None for asset_type in ('SD', 'HD'): - for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): + # TODO: fixup ISM+none manifest URLs + for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'): try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { 'mbr': 'true', 'formats': f, 'assetTypes': asset_type, - }), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) + }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type)) except ExtractorError as e: if not first_e: first_e = e From bf45295c5387d0d90b97ca34d377cdaa07c71bcb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Nov 2019 11:13:14 +0100 Subject: [PATCH 0931/1201] [mediaset] relax URL guid matching(closes #18352) --- youtube_dl/extractor/mediaset.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index fcbc064ff..f976506f4 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -27,7 +27,7 @@ class MediasetIE(ThePlatformBaseIE): (?:video|on-demand)/(?:[^/]+/)+[^/]+_| player/index\.html\?.*?\bprogramGuid= ) - )(?P[0-9A-Z]{16}) + )(?P[0-9A-Z]{16,}) ''' _TESTS = [{ # full episode @@ -77,6 +77,18 @@ class MediasetIE(ThePlatformBaseIE): }, { 'url': 'mediaset:FAFU000000665924', 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295', + 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02', + 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01', + 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135', + 'only_matching': True, }] @staticmethod From e452345fc5cee5e79d2cad6be575da563987a4ff Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Nov 2019 15:43:52 +0100 Subject: [PATCH 0932/1201] [jamendo] improve extraction - fix album extraction(closes #18564) - improve metadata extraction(closes #18565)(closes #21379) --- youtube_dl/extractor/jamendo.py | 162 +++++++++++++++++++------------- 1 file changed, 99 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index c21827618..12e21eb6f 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -1,38 +1,26 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import hashlib +import random -from ..compat import compat_urlparse +from ..compat import compat_str from .common import InfoExtractor -from ..utils import parse_duration +from ..utils import ( + clean_html, + int_or_none, + try_get, +) -class JamendoBaseIE(InfoExtractor): - def _extract_meta(self, webpage, fatal=True): - title = self._og_search_title( - webpage, default=None) or self._search_regex( - r'([^<]+)', webpage, - 'title', default=None) - if title: - title = self._search_regex( - r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) - if not title: - title = self._html_search_meta( - 'name', webpage, 'title', fatal=fatal) - mobj = re.search(r'(.+) - (.+)', title or '') - artist, second = mobj.groups() if mobj else [None] * 2 - return title, artist, second - - -class JamendoIE(JamendoBaseIE): +class JamendoIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: licensing\.jamendo\.com/[^/]+| (?:www\.)?jamendo\.com ) - /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) + /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))? ''' _TESTS = [{ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', @@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE): 'artist': 'Maya Filipič', 'track': 'Stories from Emona I', 'duration': 210, - 'thumbnail': r're:^https?://.*\.jpg' + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1217438117, + 'upload_date': '20080730', } }, { 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', @@ -53,15 +43,19 @@ class JamendoIE(JamendoBaseIE): }] def _real_extract(self, url): - mobj = self._VALID_URL_RE.match(url) - track_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage( - 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), - display_id) - - title, artist, track = self._extract_meta(webpage) + track_id, display_id = self._VALID_URL_RE.match(url).groups() + webpage = self._download_webpage(url, track_id) + models = self._parse_json(self._html_search_regex( + r"data-bundled-models='([^']+)", + webpage, 'bundled models'), track_id) + track = models['track']['models'][0] + title = track_name = track['name'] + get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {} + artist = get_model('artist') + artist_name = artist.get('name') + if artist_name: + title = '%s - %s' % (artist_name, title) + album = get_model('album') formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -77,31 +71,58 @@ class JamendoIE(JamendoBaseIE): ))] self._sort_formats(formats) - thumbnail = self._html_search_meta( - 'image', webpage, 'thumbnail', fatal=False) - duration = parse_duration(self._search_regex( - r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', - webpage, 'duration', fatal=False)) + urls = [] + thumbnails = [] + for _, covers in track.get('cover', {}).items(): + for cover_id, cover_url in covers.items(): + if not cover_url or cover_url in urls: + continue + urls.append(cover_url) + size = int_or_none(cover_id.lstrip('size')) + thumbnails.append({ + 'id': cover_id, + 'url': cover_url, + 'width': size, + 'height': size, + }) + + tags = [] + for tag in track.get('tags', []): + tag_name = tag.get('name') + if not tag_name: + continue + tags.append(tag_name) + + stats = track.get('stats') or {} return { 'id': track_id, 'display_id': display_id, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'title': title, - 'duration': duration, - 'artist': artist, - 'track': track, - 'formats': formats + 'description': track.get('description'), + 'duration': int_or_none(track.get('duration')), + 'artist': artist_name, + 'track': track_name, + 'album': album.get('name'), + 'formats': formats, + 'license': '-'.join(track.get('licenseCC', [])) or None, + 'timestamp': int_or_none(track.get('dateCreated')), + 'view_count': int_or_none(stats.get('listenedAll')), + 'like_count': int_or_none(stats.get('favorited')), + 'average_rating': int_or_none(stats.get('averageNote')), + 'tags': tags, } -class JamendoAlbumIE(JamendoBaseIE): - _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' +class JamendoAlbumIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)' _TEST = { 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 'info_dict': { 'id': '121486', - 'title': 'Shearer - Duck On Cover' + 'title': 'Duck On Cover', + 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239', }, 'playlist': [{ 'md5': 'e1a2fcb42bda30dfac990212924149a8', @@ -111,6 +132,8 @@ class JamendoAlbumIE(JamendoBaseIE): 'title': 'Shearer - Warmachine', 'artist': 'Shearer', 'track': 'Warmachine', + 'timestamp': 1368089771, + 'upload_date': '20130509', } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', @@ -120,6 +143,8 @@ class JamendoAlbumIE(JamendoBaseIE): 'title': 'Shearer - Without Your Ghost', 'artist': 'Shearer', 'track': 'Without Your Ghost', + 'timestamp': 1368089771, + 'upload_date': '20130509', } }], 'params': { @@ -127,24 +152,35 @@ class JamendoAlbumIE(JamendoBaseIE): } } + def _call_api(self, resource, resource_id): + path = '/api/%ss' % resource + rand = compat_str(random.random()) + return self._download_json( + 'https://www.jamendo.com' + path, resource_id, query={ + 'id[]': resource_id, + }, headers={ + 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) + })[0] + def _real_extract(self, url): - mobj = self._VALID_URL_RE.match(url) - album_id = mobj.group('id') + album_id = self._match_id(url) + album = self._call_api('album', album_id) + album_name = album.get('name') - webpage = self._download_webpage(url, mobj.group('display_id')) + entries = [] + for track in album.get('tracks', []): + track_id = track.get('id') + if not track_id: + continue + track_id = compat_str(track_id) + entries.append({ + '_type': 'url_transparent', + 'url': 'https://www.jamendo.com/track/' + track_id, + 'ie_key': JamendoIE.ie_key(), + 'id': track_id, + 'album': album_name, + }) - title, artist, album = self._extract_meta(webpage, fatal=False) - - entries = [{ - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, m.group('path')), - 'ie_key': JamendoIE.ie_key(), - 'id': self._search_regex( - r'/track/(\d+)', m.group('path'), 'track id', default=None), - 'artist': artist, - 'album': album, - } for m in re.finditer( - r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', - webpage)] - - return self.playlist_result(entries, album_id, title) + return self.playlist_result( + entries, album_id, album_name, + clean_html(try_get(album, lambda x: x['description']['en'], compat_str))) From 2349255abdf822e0bb9508d510db926cae777f8c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 4 Nov 2019 15:51:44 +0100 Subject: [PATCH 0933/1201] [jamendo] restore track url modification --- youtube_dl/extractor/jamendo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 12e21eb6f..490efa8fb 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -44,7 +44,8 @@ class JamendoIE(InfoExtractor): def _real_extract(self, url): track_id, display_id = self._VALID_URL_RE.match(url).groups() - webpage = self._download_webpage(url, track_id) + webpage = self._download_webpage( + 'https://www.jamendo.com/track/' + track_id, track_id) models = self._parse_json(self._html_search_regex( r"data-bundled-models='([^']+)", webpage, 'bundled models'), track_id) From 3e4908360417bc29e1446bfa85145193fa2c8462 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 4 Nov 2019 20:05:27 +0100 Subject: [PATCH 0934/1201] [myspass] fix video URL extraction and improve metadata extraction(closes #22448) --- youtube_dl/extractor/myspass.py | 75 +++++++++++++-------------------- 1 file changed, 29 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 2afe535b5..db7ebc94c 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -1,73 +1,56 @@ +# coding: utf-8 from __future__ import unicode_literals -import os.path + +import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( - ExtractorError, + int_or_none, + parse_duration, + xpath_text, ) class MySpassIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' + _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', 'info_dict': { 'id': '11741', 'ext': 'mp4', - 'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?', - 'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2', + 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?', + 'title': '17.02.2013 - Die Highlights, Teil 2', }, } def _real_extract(self, url): - META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' + video_id = self._match_id(url) - # video id is the last path element of the URL - # usually there is a trailing slash, so also try the second but last - url_path = compat_urllib_parse_urlparse(url).path - url_parent_path, video_id = os.path.split(url_path) - if not video_id: - _, video_id = os.path.split(url_parent_path) - - # get metadata - metadata_url = META_DATA_URL_TEMPLATE % video_id metadata = self._download_xml( - metadata_url, video_id, transform_source=lambda s: s.strip()) + 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id, + video_id) - # extract values from metadata - url_flv_el = metadata.find('url_flv') - if url_flv_el is None: - raise ExtractorError('Unable to extract download url') - video_url = url_flv_el.text - title_el = metadata.find('title') - if title_el is None: - raise ExtractorError('Unable to extract title') - title = title_el.text - format_id_el = metadata.find('format_id') - if format_id_el is None: - format = 'mp4' - else: - format = format_id_el.text - description_el = metadata.find('description') - if description_el is not None: - description = description_el.text - else: - description = None - imagePreview_el = metadata.find('imagePreview') - if imagePreview_el is not None: - thumbnail = imagePreview_el.text - else: - thumbnail = None + title = xpath_text(metadata, 'title', fatal=True) + video_url = xpath_text(metadata, 'url_flv', 'download url', True) + video_id_int = int(video_id) + for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups(): + group_int = int(group) + if group_int > video_id_int: + video_url = video_url.replace( + group, compat_str(group_int // video_id_int)) return { 'id': video_id, 'url': video_url, 'title': title, - 'format': format, - 'thumbnail': thumbnail, - 'description': description, + 'thumbnail': xpath_text(metadata, 'imagePreview'), + 'description': xpath_text(metadata, 'description'), + 'duration': parse_duration(xpath_text(metadata, 'duration')), + 'series': xpath_text(metadata, 'format'), + 'season_number': int_or_none(xpath_text(metadata, 'season')), + 'season_id': xpath_text(metadata, 'season_id'), + 'episode': title, + 'episode_number': int_or_none(xpath_text(metadata, 'episode')), } From c69e71733d9619cb1a2bee769b9a381b52901de3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 4 Nov 2019 22:21:00 +0100 Subject: [PATCH 0935/1201] [msn] add support for Vidible and AOL embeds(closes #22195)(closes #22227) --- youtube_dl/extractor/msn.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index 0460cf4d5..0c3813dda 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -41,6 +41,14 @@ class MSNIE(InfoExtractor): }, { 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6', 'only_matching': True, + }, { + # Vidible(AOL) Embed + 'url': 'https://www.msn.com/en-us/video/animals/yellowstone-park-staffers-catch-deer-engaged-in-behavior-they-cant-explain/vi-AAGfdg1', + 'only_matching': True, + }, { + # Dailymotion Embed + 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L', + 'only_matching': True, }] def _real_extract(self, url): @@ -61,6 +69,18 @@ class MSNIE(InfoExtractor): webpage, 'error', group='error')) raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + player_name = video.get('playerName') + if player_name: + provider_id = video.get('providerId') + if provider_id: + if player_name == 'AOL': + return self.url_result( + 'aol-video:' + provider_id, 'Aol', provider_id) + elif player_name == 'Dailymotion': + return self.url_result( + 'https://www.dailymotion.com/video/' + provider_id, + 'Dailymotion', provider_id) + title = video['title'] formats = [] From 20218040db2b1e063191cc470ce403d35d394e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Nov 2019 05:21:16 +0700 Subject: [PATCH 0936/1201] [scte] Add extractor (closes #22975) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/scte.py | 144 +++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 youtube_dl/extractor/scte.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd5f68ca3..9f43b284d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -980,6 +980,10 @@ from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE +from .scte import ( + SCTEIE, + SCTECourseIE, +) from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE diff --git a/youtube_dl/extractor/scte.py b/youtube_dl/extractor/scte.py new file mode 100644 index 000000000..ca1de63b6 --- /dev/null +++ b/youtube_dl/extractor/scte.py @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + decode_packed_codes, + ExtractorError, + urlencode_postdata, +) + + +class SCTEBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' + _NETRC_MACHINE = 'scte' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']welcome\b', r'>Sign Out<')) + + # already logged in + if is_logged(login_popup): + return + + login_form = self._hidden_inputs(login_popup) + + login_form.update({ + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', + }) + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + if '|pageRedirect|' not in response and not is_logged(response): + error = self._html_search_regex( + r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + +class SCTEIE(SCTEBaseIE): + _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', + 'info_dict': { + 'title': 'Introduction to DOCSIS Engineering Professional', + 'id': '31484', + }, + 'playlist_count': 5, + 'skip': 'Requires account credentials', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title') + + context_id = self._search_regex(r'context-(\d+)', webpage, video_id) + content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id + context = decode_packed_codes(self._download_webpage( + '%smobile/data.js' % content_base, video_id)) + + data = self._parse_xml( + self._search_regex( + r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), + video_id) + + entries = [] + for asset in data.findall('.//asset'): + asset_url = asset.get('url') + if not asset_url or not asset_url.endswith('.mp4'): + continue + asset_id = self._search_regex( + r'video_([^_]+)_', asset_url, 'asset id', default=None) + if not asset_id: + continue + entries.append({ + 'id': asset_id, + 'title': title, + 'url': content_base + asset_url, + }) + + return self.playlist_result(entries, video_id, title) + + +class SCTECourseIE(SCTEBaseIE): + _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3639', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3073', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + title = self._search_regex( + r'<h1>(.+?)</h1>', webpage, 'title', default=None) + + entries = [] + for mobj in re.finditer( + r'''(?x) + <a[^>]+ + href=(["\']) + (?P<url> + https?://learning\.scte\.org/mod/ + (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*? + \bid=\d+ + ) + ''', + webpage): + item_url = mobj.group('url') + if item_url == url: + continue + ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' + else SCTECourseIE.ie_key()) + entries.append(self.url_result(item_url, ie=ie)) + + return self.playlist_result(entries, course_id, title) From 1a4e4b0bfeb83b24755f80630d1e7f3427a5bf48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Nov 2019 05:31:40 +0700 Subject: [PATCH 0937/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/ChangeLog b/ChangeLog index fcab1102c..338dd456b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,47 @@ +version <unreleased> + +Extractors ++ [scte] Add support for learning.scte.org (#22975) ++ [msn] Add support for Vidible and AOL embeds (#22195, #22227) +* [myspass] Fix video URL extraction and improve metadata extraction (#22448) +* [jamendo] Improve extraction + * Fix album extraction (#18564) + * Improve metadata extraction (#18565, #21379) +* [mediaset] Relax URL guid matching (#18352) ++ [mediaset] Extract unprotected M3U and MPD manifests (#17204) +* [telegraaf] Fix extraction ++ [bellmedia] Add support for marilyn.ca videos (#22193) +* [stv] Fix extraction (#22928) +- [iconosquare] Remove extractor +- [keek] Remove extractor +- [gameone] Remove extractor (#21778) +- [flipagram] Remove extractor +- [bambuser] Remove extractor +* [wistia] Reduce embed extraction false positives ++ [wistia] Add support for inline embeds (#22931) +- [go90] Remove extractor +* [kakao] Remove raw request ++ [kakao] Extract format total bitrate +* [daum] Fix VOD and Clip extracton (#15015) +* [kakao] Improve extraction + + Add support for embed URLs + + Add support for Kakao Legacy vid based embed URLs + * Only extract fields used for extraction + * Strip description and extract tags +* [mixcloud] Fix cloudcast data extraction (#22821) +* [yahoo] Improve extraction + + Add support for live streams (#3597, #3779, #22178) + * Bypass cookie consent page for european domains (#16948, #22576) + + Add generic support for embeds (#20332) +* [tv2] Fix and improve extraction (#22787) ++ [tv2dk] Add support for TV2 DK sites +* [onet] Improve extraction … + + Add support for onet100.vod.pl + + Extract m3u8 formats + * Correct audio only format info +* [fox9] Fix extraction + + version 2019.10.29 Core From ea07412ebf6fff7c17bcac9960cfe4e92ed62f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Nov 2019 05:32:56 +0700 Subject: [PATCH 0938/1201] release 2019.11.05 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 12 ++++-------- youtube_dl/version.py | 2 +- 8 files changed, 18 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f82502bd1..12de9add2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.29 + [debug] youtube-dl version 2019.11.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 5ef983d43..8a6202cf6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 8f05aa79f..83f91d5fe 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index e90900d8d..be8e70f1e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.10.29 + [debug] youtube-dl version 2019.11.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 7021d7397..7544d171c 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.10.29** +- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 338dd456b..d46d20082 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.11.05 Extractors + [scte] Add support for learning.scte.org (#22975) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index af905db5a..536b87479 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -76,8 +76,6 @@ - **awaan:video** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 - - **bambuser** - - **bambuser:channel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:weekly** @@ -284,12 +282,12 @@ - **FiveThirtyEight** - **FiveTV** - **Flickr** - - **Flipagram** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** - **Formula1** - **FOX** - **FOX9** + - **FOX9News** - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** @@ -315,8 +313,6 @@ - **FXNetworks** - **Gaia** - **GameInformer** - - **GameOne** - - **gameone:playlist** - **GameSpot** - **GameStar** - **Gaskrank** @@ -331,7 +327,6 @@ - **Globo** - **GloboArticle** - **Go** - - **Go90** - **GodTube** - **Golem** - **GoogleDrive** @@ -366,7 +361,6 @@ - **Hungama** - **HungamaSong** - **Hypem** - - **Iconosquare** - **ign.com** - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists @@ -406,7 +400,6 @@ - **Kankan** - **Karaoketv** - **KarriereVideos** - - **keek** - **KeezMovies** - **Ketnet** - **KhanAcademy** @@ -777,6 +770,8 @@ - **Screencast** - **ScreencastOMatic** - **scrippsnetworks:watch** + - **SCTE** + - **SCTECourse** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -926,6 +921,7 @@ - **TV2** - **tv2.hu** - **TV2Article** + - **TV2DK** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **TVA** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 924f26ca8..8012a66db 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.10.29' +__version__ = '2019.11.05' From e9b95167af3f9cacd16e379a40bacb27999840b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 10:03:38 +0100 Subject: [PATCH 0939/1201] [roosterteeth] fix login request(closes #16094)(closes #22689) --- youtube_dl/extractor/roosterteeth.py | 55 +++++++++++----------------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 8d88ee499..8883639b2 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import ( compat_HTTPError, @@ -18,7 +16,6 @@ from ..utils import ( class RoosterTeethIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' - _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', @@ -53,48 +50,40 @@ class RoosterTeethIE(InfoExtractor): 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'only_matching': True, }] + _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/' def _login(self): username, password = self._get_login_info() if username is None: return - login_page = self._download_webpage( - self._LOGIN_URL, None, - note='Downloading login page', - errnote='Unable to download login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'username': username, - 'password': password, - }) - - login_request = self._download_webpage( - self._LOGIN_URL, None, - note='Logging in', - data=urlencode_postdata(login_form), - headers={ - 'Referer': self._LOGIN_URL, - }) - - if not any(re.search(p, login_request) for p in ( - r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"', - r'>Sign Out<')): - error = self._html_search_regex( - r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>', - login_request, 'alert', default=None, group='error') - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') + try: + self._download_json( + 'https://auth.roosterteeth.com/oauth/token', + None, 'Logging in', data=urlencode_postdata({ + 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', + 'grant_type': 'password', + 'username': username, + 'password': password, + })) + except ExtractorError as e: + msg = 'Unable to login' + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + resp = self._parse_json(e.cause.read().decode(), None, fatal=False) + if resp: + error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') + if error: + msg += ': ' + error + self.report_warning(msg) def _real_initialize(self): + if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'): + return self._login() def _real_extract(self, url): display_id = self._match_id(url) - api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id + api_episode_url = self._EPISODE_BASE_URL + display_id try: m3u8_url = self._download_json( From b77c3949e899902de78b140f6e444dc55bac824f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 14:04:17 +0100 Subject: [PATCH 0940/1201] [patreon] minimize reponse size and extract uploader_id and filesize --- youtube_dl/extractor/patreon.py | 52 +++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 426dd8121..761a4b1de 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -6,7 +6,11 @@ from ..utils import ( clean_html, determine_ext, int_or_none, + KNOWN_EXTENSIONS, + mimetype2ext, parse_iso8601, + str_or_none, + try_get, ) @@ -24,6 +28,7 @@ class PatreonIE(InfoExtractor): 'thumbnail': 're:^https?://.*$', 'timestamp': 1406473987, 'upload_date': '20140727', + 'uploader_id': '87145', }, }, { 'url': 'http://www.patreon.com/creation?hid=754133', @@ -90,7 +95,13 @@ class PatreonIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) post = self._download_json( - 'https://www.patreon.com/api/posts/' + video_id, video_id) + 'https://www.patreon.com/api/posts/' + video_id, video_id, query={ + 'fields[media]': 'download_url,mimetype,size_bytes', + 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title', + 'fields[user]': 'full_name,url', + 'json-api-use-default-includes': 'false', + 'include': 'media,user', + }) attributes = post['data']['attributes'] title = attributes['title'].strip() image = attributes.get('image') or {} @@ -104,33 +115,42 @@ class PatreonIE(InfoExtractor): 'comment_count': int_or_none(attributes.get('comment_count')), } - def add_file(file_data): - file_url = file_data.get('url') - if file_url: - info.update({ - 'url': file_url, - 'ext': determine_ext(file_data.get('name'), 'mp3'), - }) - for i in post.get('included', []): i_type = i.get('type') - if i_type == 'attachment': - add_file(i.get('attributes') or {}) + if i_type == 'media': + media_attributes = i.get('attributes') or {} + download_url = media_attributes.get('download_url') + ext = mimetype2ext(media_attributes.get('mimetype')) + if download_url and ext in KNOWN_EXTENSIONS: + info.update({ + 'ext': ext, + 'filesize': int_or_none(media_attributes.get('size_bytes')), + 'url': download_url, + }) elif i_type == 'user': user_attributes = i.get('attributes') if user_attributes: info.update({ 'uploader': user_attributes.get('full_name'), + 'uploader_id': str_or_none(i.get('id')), 'uploader_url': user_attributes.get('url'), }) if not info.get('url'): - add_file(attributes.get('post_file') or {}) + embed_url = try_get(attributes, lambda x: x['embed']['url']) + if embed_url: + info.update({ + '_type': 'url', + 'url': embed_url, + }) if not info.get('url'): - info.update({ - '_type': 'url', - 'url': attributes['embed']['url'], - }) + post_file = attributes['post_file'] + ext = determine_ext(post_file.get('name')) + if ext in KNOWN_EXTENSIONS: + info.update({ + 'ext': ext, + 'url': post_file['url'], + }) return info From 2318629b2b79cad5fcab743bce86233a7592ed46 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 14:04:50 +0100 Subject: [PATCH 0941/1201] [dplay] minimize response size --- youtube_dl/extractor/dplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index d9c3d59cd..a7b9db568 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -146,6 +146,11 @@ class DPlayIE(InfoExtractor): video = self._download_json( disco_base + 'content/videos/' + display_id, display_id, headers=headers, query={ + 'fields[channel]': 'name', + 'fields[image]': 'height,src,width', + 'fields[show]': 'name', + 'fields[tag]': 'name', + 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', 'include': 'images,primaryChannel,show,tags' }) video_id = video['data']['id'] @@ -226,7 +231,6 @@ class DPlayIE(InfoExtractor): 'series': series, 'season_number': int_or_none(info.get('seasonNumber')), 'episode_number': int_or_none(info.get('episodeNumber')), - 'age_limit': int_or_none(info.get('minimum_age')), 'creator': creator, 'tags': tags, 'thumbnails': thumbnails, From b6139cb0c3635eb96e39973ab288c17a9f104067 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 22:56:25 +0100 Subject: [PATCH 0942/1201] [common] pass headers to _extract_(m3u8|mpd)_formats methods --- youtube_dl/extractor/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 50d48c40d..2688b19e4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1586,12 +1586,12 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True, live=False): + fatal=True, live=False, headers=None): res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', - fatal=fatal) + fatal=fatal, headers=headers) if res is False: return [] @@ -2009,12 +2009,12 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers=None): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', - fatal=fatal) + fatal=fatal, headers=None) if res is False: return [] mpd_doc, urlh = res From d7def23d0539430f5d816f1cfd733e436f62c257 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 23:08:42 +0100 Subject: [PATCH 0943/1201] [hotstar] pass Referer header to format requests(closes #22836) --- youtube_dl/extractor/hotstar.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index f9f7c5a64..f97eefa3d 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -118,6 +118,7 @@ class HotStarIE(HotStarBaseIE): if video_data.get('drmProtected'): raise ExtractorError('This video is DRM protected.', expected=True) + headers = {'Referer': url} formats = [] geo_restricted = False playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] @@ -137,10 +138,11 @@ class HotStarIE(HotStarBaseIE): if 'package:hls' in tags or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls')) + entry_protocol='m3u8_native', + m3u8_id='hls', headers=headers)) elif 'package:dash' in tags or ext == 'mpd': formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash')) + format_url, video_id, mpd_id='dash', headers=headers)) elif ext == 'f4m': # produce broken files pass @@ -158,6 +160,9 @@ class HotStarIE(HotStarBaseIE): self.raise_geo_restricted(countries=['IN']) self._sort_formats(formats) + for f in formats: + f.setdefault('http_headers', {}).update(headers) + return { 'id': video_id, 'title': title, From 57033e35e58e1d57ab3be5ffe5df5a80a5dbcf83 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 5 Nov 2019 23:41:57 +0100 Subject: [PATCH 0944/1201] [common] fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2688b19e4..1e6b66d25 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2014,7 +2014,7 @@ class InfoExtractor(object): mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', - fatal=fatal, headers=None) + fatal=fatal, headers=headers) if res is False: return [] mpd_doc, urlh = res From 3ec86619e33a3d1e29c14ec053d7e420ac8b62ae Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 07:18:29 +0100 Subject: [PATCH 0945/1201] [common] initialize headers param with empty dict --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1e6b66d25..4a683f6d6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1586,7 +1586,7 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True, live=False, headers=None): + fatal=True, live=False, headers={}): res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', @@ -2009,7 +2009,7 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers=None): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers={}): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', From d64ec1242e9dec03ea2aa86b6e913db78c8619e0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 10:44:19 +0100 Subject: [PATCH 0946/1201] [onionstudios] fix extraction --- youtube_dl/extractor/onionstudios.py | 78 ++++++++++++++++------------ 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py index c6e3d5640..7f8c6f0d3 100644 --- a/youtube_dl/extractor/onionstudios.py +++ b/youtube_dl/extractor/onionstudios.py @@ -5,10 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, + compat_str, int_or_none, - float_or_none, - mimetype2ext, + js_to_json, + parse_iso8601, + try_get, ) @@ -17,14 +18,16 @@ class OnionStudiosIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', - 'md5': '719d1f8c32094b8c33902c17bcae5e34', + 'md5': '5a118d466d62b5cd03647cf2c593977f', 'info_dict': { 'id': '2937', 'ext': 'mp4', 'title': 'Hannibal charges forward, stops for a cocktail', + 'description': 'md5:545299bda6abf87e5ec666548c6a9448', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'The A.V. Club', - 'uploader_id': 'the-av-club', + 'uploader': 'a.v. club', + 'upload_date': '20150619', + 'timestamp': 1434728546, }, }, { 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', @@ -44,38 +47,49 @@ class OnionStudiosIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js', + video_id) + mcp_id = compat_str(self._parse_json(self._search_regex( + r'window\.mcpMapping\s*=\s*({.+?});', webpage, + 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) video_data = self._download_json( - 'http://www.onionstudios.com/video/%s.json' % video_id, video_id) - - title = video_data['title'] - + 'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id, + mcp_id)['videoMetadata'] + iptc = video_data['photoVideoMetadataIPTC'] + title = iptc['title']['en'] + fmg = video_data.get('photoVideoMetadata_fmg') or {} + tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' + data = self._download_json( + tvss_domain + '/api/v3/video-auth/url-signature-tokens', + mcp_id, query={'mcpids': mcp_id})['data'][0] formats = [] - for source in video_data.get('sources', []): - source_url = source.get('url') - if not source_url: - continue - ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - tbr = int_or_none(source.get('bitrate')) - formats.append({ - 'format_id': ext + ('-%d' % tbr if tbr else ''), - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'tbr': tbr, - 'ext': ext, - }) + + rendition_url = data.get('renditionUrl') + if rendition_url: + formats = self._extract_m3u8_formats( + rendition_url, mcp_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + + fallback_rendition_url = data.get('fallbackRenditionUrl') + if fallback_rendition_url: + formats.append({ + 'format_id': 'fallback', + 'tbr': int_or_none(self._search_regex( + r'_(\d+)\.mp4', fallback_rendition_url, + 'bitrate', default=None)), + 'url': fallback_rendition_url, + }) + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('poster_url'), - 'uploader': video_data.get('channel_name'), - 'uploader_id': video_data.get('channel_slug'), - 'duration': float_or_none(video_data.get('duration', 1000)), - 'tags': video_data.get('tags'), + 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), + 'uploader': fmg.get('network'), + 'duration': int_or_none(iptc.get('fileDuration')), 'formats': formats, + 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), + 'timestamp': parse_iso8601(iptc.get('dateReleased')), } From 55adb63e5412fa5556be22e97d61b8d27c7a5e67 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 19:56:10 +0100 Subject: [PATCH 0947/1201] [kinja] add support for Kinja embeds closes #5756 closes #11282 closes #22237 closes #22384 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 17 ++- youtube_dl/extractor/kinja.py | 221 +++++++++++++++++++++++++++ youtube_dl/extractor/onionstudios.py | 54 +------ 4 files changed, 241 insertions(+), 52 deletions(-) create mode 100644 youtube_dl/extractor/kinja.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f43b284d..9e3b554fa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -513,6 +513,7 @@ from .keezmovies import KeezMoviesIE from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE +from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .konserthusetplay import KonserthusetPlayIE from .kontrtube import KontrTubeIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c0780e98..3d919f656 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -119,6 +119,7 @@ from .viqeo import ViqeoIE from .expressen import ExpressenIE from .zype import ZypeIE from .odnoklassniki import OdnoklassnikiIE +from .kinja import KinjaEmbedIE class GenericIE(InfoExtractor): @@ -1487,16 +1488,18 @@ class GenericIE(InfoExtractor): 'timestamp': 1432570283, }, }, - # OnionStudios embed + # Kinja embed { 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', 'info_dict': { - 'id': '2855', + 'id': '106351', 'ext': 'mp4', 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', + 'description': 'Migrated from OnionStudios', 'thumbnail': r're:^https?://.*\.jpe?g$', - 'uploader': 'ClickHole', - 'uploader_id': 'clickhole', + 'uploader': 'clickhole', + 'upload_date': '20150527', + 'timestamp': 1432744860, } }, # SnagFilms embed @@ -2894,6 +2897,12 @@ class GenericIE(InfoExtractor): if senate_isvp_url: return self.url_result(senate_isvp_url, 'SenateISVP') + # Look for Kinja embeds + kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url) + if kinja_embed_urls: + return self.playlist_from_matches( + kinja_embed_urls, video_id, video_title) + # Look for OnionStudios embeds onionstudios_url = OnionStudiosIE._extract_url(webpage) if onionstudios_url: diff --git a/youtube_dl/extractor/kinja.py b/youtube_dl/extractor/kinja.py new file mode 100644 index 000000000..79e3026d2 --- /dev/null +++ b/youtube_dl/extractor/kinja.py @@ -0,0 +1,221 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) +from ..utils import ( + int_or_none, + parse_iso8601, + strip_or_none, + try_get, + unescapeHTML, + urljoin, +) + + +class KinjaEmbedIE(InfoExtractor): + IENAME = 'kinja:embed' + _DOMAIN_REGEX = r'''(?:[^.]+\.)? + (?: + avclub| + clickhole| + deadspin| + gizmodo| + jalopnik| + jezebel| + kinja| + kotaku| + lifehacker| + splinternews| + the(?:inventory|onion|root|takeout) + )\.com''' + _COMMON_REGEX = r'''/ + (?: + ajax/inset| + embed/video + )/iframe\?.*?\bid=''' + _VALID_URL = r'''(?x)https?://%s%s + (?P<type> + fb| + imgur| + instagram| + jwp(?:layer)?-video| + kinjavideo| + mcp| + megaphone| + ooyala| + soundcloud(?:-playlist)?| + tumblr-post| + twitch-stream| + twitter| + ustream-channel| + vimeo| + vine| + youtube-(?:list|video) + )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) + _TESTS = [{ + 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', + 'only_matching': True, + }, { + 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', + 'only_matching': True, + }] + _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') + _PROVIDER_MAP = { + 'fb': ('facebook.com/video.php?v=', 'Facebook'), + 'imgur': ('imgur.com/', 'Imgur'), + 'instagram': ('instagram.com/p/', 'Instagram'), + 'jwplayer-video': _JWPLATFORM_PROVIDER, + 'jwp-video': _JWPLATFORM_PROVIDER, + 'megaphone': ('player.megaphone.fm/', 'Generic'), + 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), + 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), + 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), + 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), + 'twitch-stream': ('twitch.tv/', 'TwitchStream'), + 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), + 'ustream-channel': ('ustream.tv/embed/', 'Ustream'), + 'vimeo': ('vimeo.com/', 'Vimeo'), + 'vine': ('vine.co/v/', 'Vine'), + 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), + 'youtube-video': ('youtube.com/embed/', 'Youtube'), + } + + @staticmethod + def _extract_urls(webpage, url): + return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( + r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), + webpage)] + + def _real_extract(self, url): + video_type, video_id = re.match(self._VALID_URL, url).groups() + + provider = self._PROVIDER_MAP.get(video_type) + if provider: + video_id = compat_urllib_parse_unquote(video_id) + if video_type == 'tumblr-post': + video_id, blog = video_id.split('-', 1) + result_url = provider[0] % (blog, video_id) + elif video_type == 'youtube-list': + video_id, playlist_id = video_id.split('/') + result_url = provider[0] % (video_id, playlist_id) + else: + if video_type == 'ooyala': + video_id = video_id.split('/')[0] + result_url = provider[0] + video_id + return self.url_result('http://' + result_url, provider[1]) + + if video_type == 'kinjavideo': + data = self._download_json( + 'https://kinja.com/api/core/video/views/videoById', + video_id, query={'videoId': video_id})['data'] + title = data['title'] + + formats = [] + for k in ('signedPlaylist', 'streaming'): + m3u8_url = data.get(k + 'Url') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + thumbnail = None + poster = data.get('poster') or {} + poster_id = poster.get('id') + if poster_id: + thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') + + return { + 'id': video_id, + 'title': title, + 'description': strip_or_none(data.get('description')), + 'formats': formats, + 'tags': data.get('tags'), + 'timestamp': int_or_none(try_get( + data, lambda x: x['postInfo']['publishTimeMillis']), 1000), + 'thumbnail': thumbnail, + 'uploader': data.get('network'), + } + else: + video_data = self._download_json( + 'https://api.vmh.univision.com/metadata/v1/content/' + video_id, + video_id)['videoMetadata'] + iptc = video_data['photoVideoMetadataIPTC'] + title = iptc['title']['en'] + fmg = video_data.get('photoVideoMetadata_fmg') or {} + tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' + data = self._download_json( + tvss_domain + '/api/v3/video-auth/url-signature-tokens', + video_id, query={'mcpids': video_id})['data'][0] + formats = [] + + rendition_url = data.get('renditionUrl') + if rendition_url: + formats = self._extract_m3u8_formats( + rendition_url, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + + fallback_rendition_url = data.get('fallbackRenditionUrl') + if fallback_rendition_url: + formats.append({ + 'format_id': 'fallback', + 'tbr': int_or_none(self._search_regex( + r'_(\d+)\.mp4', fallback_rendition_url, + 'bitrate', default=None)), + 'url': fallback_rendition_url, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), + 'uploader': fmg.get('network'), + 'duration': int_or_none(iptc.get('fileDuration')), + 'formats': formats, + 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), + 'timestamp': parse_iso8601(iptc.get('dateReleased')), + } diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py index 7f8c6f0d3..cf5c39e66 100644 --- a/youtube_dl/extractor/onionstudios.py +++ b/youtube_dl/extractor/onionstudios.py @@ -4,13 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - compat_str, - int_or_none, - js_to_json, - parse_iso8601, - try_get, -) +from ..compat import compat_str +from ..utils import js_to_json class OnionStudiosIE(InfoExtractor): @@ -20,7 +15,7 @@ class OnionStudiosIE(InfoExtractor): 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', 'md5': '5a118d466d62b5cd03647cf2c593977f', 'info_dict': { - 'id': '2937', + 'id': '3459881', 'ext': 'mp4', 'title': 'Hannibal charges forward, stops for a cocktail', 'description': 'md5:545299bda6abf87e5ec666548c6a9448', @@ -53,43 +48,6 @@ class OnionStudiosIE(InfoExtractor): mcp_id = compat_str(self._parse_json(self._search_regex( r'window\.mcpMapping\s*=\s*({.+?});', webpage, 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) - video_data = self._download_json( - 'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id, - mcp_id)['videoMetadata'] - iptc = video_data['photoVideoMetadataIPTC'] - title = iptc['title']['en'] - fmg = video_data.get('photoVideoMetadata_fmg') or {} - tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' - data = self._download_json( - tvss_domain + '/api/v3/video-auth/url-signature-tokens', - mcp_id, query={'mcpids': mcp_id})['data'][0] - formats = [] - - rendition_url = data.get('renditionUrl') - if rendition_url: - formats = self._extract_m3u8_formats( - rendition_url, mcp_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - - fallback_rendition_url = data.get('fallbackRenditionUrl') - if fallback_rendition_url: - formats.append({ - 'format_id': 'fallback', - 'tbr': int_or_none(self._search_regex( - r'_(\d+)\.mp4', fallback_rendition_url, - 'bitrate', default=None)), - 'url': fallback_rendition_url, - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), - 'uploader': fmg.get('network'), - 'duration': int_or_none(iptc.get('fileDuration')), - 'formats': formats, - 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), - 'timestamp': parse_iso8601(iptc.get('dateReleased')), - } + return self.url_result( + 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id, + 'KinjaEmbed', mcp_id) From 5d92b407e0ea856e3dbadfef35e5258e94e0bb23 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 20:41:49 +0100 Subject: [PATCH 0948/1201] [mixcloud] improve extraction - improve metadata extraction(closes #11721) - fix playlist extraction(closes #22378) - fix user mixes extraction(closes #15197)(closes #17865) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/mixcloud.py | 498 +++++++++++++---------------- 2 files changed, 225 insertions(+), 274 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9e3b554fa..2f9ba6893 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -639,7 +639,6 @@ from .mixcloud import ( MixcloudIE, MixcloudUserIE, MixcloudPlaylistIE, - MixcloudStreamIE, ) from .mlb import MLBIE from .mnet import MnetIE diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index e5f631506..9759560f1 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import functools import itertools import re @@ -11,28 +10,37 @@ from ..compat import ( compat_ord, compat_str, compat_urllib_parse_unquote, - compat_urlparse, compat_zip ) from ..utils import ( - clean_html, - ExtractorError, int_or_none, - OnDemandPagedList, - str_to_int, + parse_iso8601, + strip_or_none, try_get, - urljoin, ) -class MixcloudIE(InfoExtractor): +class MixcloudBaseIE(InfoExtractor): + def _call_api(self, object_type, object_fields, display_id, username, slug=None): + lookup_key = object_type + 'Lookup' + return self._download_json( + 'https://www.mixcloud.com/graphql', display_id, query={ + 'query': '''{ + %s(lookup: {username: "%s"%s}) { + %s + } +}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) + })['data'][lookup_key] + + +class MixcloudIE(MixcloudBaseIE): _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' _TESTS = [{ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 'info_dict': { - 'id': 'dholbach-cryptkeeper', + 'id': 'dholbach_cryptkeeper', 'ext': 'm4a', 'title': 'Cryptkeeper', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', @@ -40,11 +48,13 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'dholbach', 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, + 'timestamp': 1321359578, + 'upload_date': '20111115', }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 'info_dict': { - 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', + 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', 'ext': 'mp3', 'title': 'Caribou 7 inch Vinyl Mix & Chat', 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', @@ -52,11 +62,14 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, + 'timestamp': 1422987057, + 'upload_date': '20150203', }, }, { 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', 'only_matching': True, }] + _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD' @staticmethod def _decrypt_xor_cipher(key, ciphertext): @@ -66,177 +79,193 @@ class MixcloudIE(InfoExtractor): for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader = mobj.group(1) - cloudcast_name = mobj.group(2) - track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) + username, slug = re.match(self._VALID_URL, url).groups() + username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) + track_id = '%s_%s' % (username, slug) - webpage = self._download_webpage(url, track_id) + cloudcast = self._call_api('cloudcast', '''audioLength + comments(first: 100) { + edges { + node { + comment + created + user { + displayName + username + } + } + } + totalCount + } + description + favorites { + totalCount + } + featuringArtistList + isExclusive + name + owner { + displayName + url + username + } + picture(width: 1024, height: 1024) { + url + } + plays + publishDate + reposts { + totalCount + } + streamInfo { + dashUrl + hlsUrl + url + } + tags { + tag { + name + } + }''', track_id, username, slug) - # Legacy path - encrypted_play_info = self._search_regex( - r'm-play-info="([^"]+)"', webpage, 'play info', default=None) + title = cloudcast['name'] - if encrypted_play_info is not None: - # Decode - encrypted_play_info = compat_b64decode(encrypted_play_info) - else: - # New path - full_info_json = self._parse_json(self._html_search_regex( - r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>', - webpage, 'play info'), 'play info') - for item in full_info_json: - item_data = try_get(item, [ - lambda x: x['cloudcast']['data']['cloudcastLookup'], - lambda x: x['cloudcastLookup']['data']['cloudcastLookup'], - ], dict) - if try_get(item_data, lambda x: x['streamInfo']['url']): - info_json = item_data - break - else: - raise ExtractorError('Failed to extract matching stream info') + stream_info = cloudcast['streamInfo'] + formats = [] - message = self._html_search_regex( - r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', - webpage, 'error message', default=None) - - js_url = self._search_regex( - r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)', - webpage, 'js url') - js = self._download_webpage(js_url, track_id, 'Downloading JS') - # Known plaintext attack - if encrypted_play_info: - kps = ['{"stream_url":'] - kpa_target = encrypted_play_info - else: - kps = ['https://', 'http://'] - kpa_target = compat_b64decode(info_json['streamInfo']['url']) - for kp in kps: - partial_key = self._decrypt_xor_cipher(kpa_target, kp) - for quote in ["'", '"']: - key = self._search_regex( - r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), - js, 'encryption key', default=None) - if key is not None: - break - else: + for url_key in ('url', 'hlsUrl', 'dashUrl'): + format_url = stream_info.get(url_key) + if not format_url: continue - break - else: - raise ExtractorError('Failed to extract encryption key') + decrypted = self._decrypt_xor_cipher( + self._DECRYPTION_KEY, compat_b64decode(format_url)) + if url_key == 'hlsUrl': + formats.extend(self._extract_m3u8_formats( + decrypted, track_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif url_key == 'dashUrl': + formats.extend(self._extract_mpd_formats( + decrypted, track_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'format_id': 'http', + 'url': decrypted, + 'downloader_options': { + # Mixcloud starts throttling at >~5M + 'http_chunk_size': 5242880, + }, + }) - if encrypted_play_info is not None: - play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info') - if message and 'stream_url' not in play_info: - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) - song_url = play_info['stream_url'] - formats = [{ - 'format_id': 'normal', - 'url': song_url - }] + if not formats and cloudcast.get('isExclusive'): + self.raise_login_required() - title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') - thumbnail = self._proto_relative_url(self._html_search_regex( - r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) - uploader = self._html_search_regex( - r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) - uploader_id = self._search_regex( - r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) - description = self._og_search_description(webpage) - view_count = str_to_int(self._search_regex( - [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', - r'/listeners/?">([0-9,.]+)</a>', - r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], - webpage, 'play count', default=None)) + self._sort_formats(formats) - else: - title = info_json['name'] - thumbnail = urljoin( - 'https://thumbnailer.mixcloud.com/unsafe/600x600/', - try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str)) - uploader = try_get(info_json, lambda x: x['owner']['displayName']) - uploader_id = try_get(info_json, lambda x: x['owner']['username']) - description = try_get(info_json, lambda x: x['description']) - view_count = int_or_none(try_get(info_json, lambda x: x['plays'])) + comments = [] + for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): + node = edge.get('node') or {} + text = strip_or_none(node.get('comment')) + if not text: + continue + user = node.get('user') or {} + comments.append({ + 'author': user.get('displayName'), + 'author_id': user.get('username'), + 'text': text, + 'timestamp': parse_iso8601(node.get('created')), + }) - stream_info = info_json['streamInfo'] - formats = [] + tags = [] + for t in cloudcast.get('tags'): + tag = try_get(t, lambda x: x['tag']['name'], compat_str) + if not tag: + tags.append(tag) - def decrypt_url(f_url): - for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): - decrypted_url = self._decrypt_xor_cipher(k, f_url) - if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): - return decrypted_url + get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount'])) - for url_key in ('url', 'hlsUrl', 'dashUrl'): - format_url = stream_info.get(url_key) - if not format_url: - continue - decrypted = decrypt_url(compat_b64decode(format_url)) - if not decrypted: - continue - if url_key == 'hlsUrl': - formats.extend(self._extract_m3u8_formats( - decrypted, track_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif url_key == 'dashUrl': - formats.extend(self._extract_mpd_formats( - decrypted, track_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': 'http', - 'url': decrypted, - 'downloader_options': { - # Mixcloud starts throttling at >~5M - 'http_chunk_size': 5242880, - }, - }) - self._sort_formats(formats) + owner = cloudcast.get('owner') or {} return { 'id': track_id, 'title': title, 'formats': formats, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'view_count': view_count, + 'description': cloudcast.get('description'), + 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str), + 'uploader': owner.get('displayName'), + 'timestamp': parse_iso8601(cloudcast.get('publishDate')), + 'uploader_id': owner.get('username'), + 'uploader_url': owner.get('url'), + 'duration': int_or_none(cloudcast.get('audioLength')), + 'view_count': int_or_none(cloudcast.get('plays')), + 'like_count': get_count('favorites'), + 'repost_count': get_count('reposts'), + 'comment_count': get_count('comments'), + 'comments': comments, + 'tags': tags, + 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None, } -class MixcloudPlaylistBaseIE(InfoExtractor): - _PAGE_SIZE = 24 +class MixcloudPlaylistBaseIE(MixcloudBaseIE): + def _get_cloudcast(self, node): + return node - def _find_urls_in_page(self, page): - for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page): - yield self.url_result( - compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)), - MixcloudIE.ie_key()) + def _get_playlist_title(self, title, slug): + return title - def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None): - real_page_number = real_page_number or current_page + 1 - return self._download_webpage( - 'https://www.mixcloud.com/%s/' % path, video_id, - note='Download %s (page %d)' % (page_name, current_page + 1), - errnote='Unable to download %s' % page_name, - query={'page': real_page_number, 'list': 'main', '_ajax': '1'}, - headers={'X-Requested-With': 'XMLHttpRequest'}) + def _real_extract(self, url): + username, slug = re.match(self._VALID_URL, url).groups() + username = compat_urllib_parse_unquote(username) + if not slug: + slug = 'uploads' + else: + slug = compat_urllib_parse_unquote(slug) + playlist_id = '%s_%s' % (username, slug) - def _tracks_page_func(self, page, video_id, page_name, current_page): - resp = self._fetch_tracks_page(page, video_id, page_name, current_page) + is_playlist_type = self._ROOT_TYPE == 'playlist' + playlist_type = 'items' if is_playlist_type else slug + list_filter = '' - for item in self._find_urls_in_page(resp): - yield item + has_next_page = True + entries = [] + while has_next_page: + playlist = self._call_api( + self._ROOT_TYPE, '''%s + %s + %s(first: 100%s) { + edges { + node { + %s + } + } + pageInfo { + endCursor + hasNextPage + } + }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), + playlist_id, username, slug if is_playlist_type else None) - def _get_user_description(self, page_content): - return self._html_search_regex( - r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>', - page_content, 'user description', fatal=False) + items = playlist.get(playlist_type) or {} + for edge in items.get('edges', []): + cloudcast = self._get_cloudcast(edge.get('node') or {}) + cloudcast_url = cloudcast.get('url') + if not cloudcast_url: + continue + entries.append(self.url_result( + cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug'))) + + page_info = items['pageInfo'] + has_next_page = page_info['hasNextPage'] + list_filter = ', after: "%s"' % page_info['endCursor'] + + return self.playlist_result( + entries, playlist_id, + self._get_playlist_title(playlist[self._TITLE_KEY], slug), + playlist.get(self._DESCRIPTION_KEY)) class MixcloudUserIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' + _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$' IE_NAME = 'mixcloud:user' _TESTS = [{ @@ -244,68 +273,58 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'playlist_mincount': 11, + 'playlist_mincount': 36, }, { 'url': 'http://www.mixcloud.com/dholbach/uploads/', 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'playlist_mincount': 11, + 'playlist_mincount': 36, }, { 'url': 'http://www.mixcloud.com/dholbach/favorites/', 'info_dict': { 'id': 'dholbach_favorites', 'title': 'Daniel Holbach (favorites)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'params': { - 'playlist_items': '1-100', - }, - 'playlist_mincount': 100, + # 'params': { + # 'playlist_items': '1-100', + # }, + 'playlist_mincount': 396, }, { 'url': 'http://www.mixcloud.com/dholbach/listens/', 'info_dict': { 'id': 'dholbach_listens', 'title': 'Daniel Holbach (listens)', - 'description': 'md5:def36060ac8747b3aabca54924897e47', + 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789', }, - 'params': { - 'playlist_items': '1-100', + # 'params': { + # 'playlist_items': '1-100', + # }, + 'playlist_mincount': 1623, + 'skip': 'Large list', + }, { + 'url': 'https://www.mixcloud.com/FirstEar/stream/', + 'info_dict': { + 'id': 'FirstEar_stream', + 'title': 'First Ear (stream)', + 'description': 'Curators of good music\r\n\r\nfirstearmusic.com', }, - 'playlist_mincount': 100, + 'playlist_mincount': 271, }] - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user') - list_type = mobj.group('type') + _TITLE_KEY = 'displayName' + _DESCRIPTION_KEY = 'biog' + _ROOT_TYPE = 'user' + _NODE_TEMPLATE = '''slug + url''' - # if only a profile URL was supplied, default to download all uploads - if list_type is None: - list_type = 'uploads' - - video_id = '%s_%s' % (user_id, list_type) - - profile = self._download_webpage( - 'https://www.mixcloud.com/%s/' % user_id, video_id, - note='Downloading user profile', - errnote='Unable to download user profile') - - username = self._og_search_title(profile) - description = self._get_user_description(profile) - - entries = OnDemandPagedList( - functools.partial( - self._tracks_page_func, - '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), - self._PAGE_SIZE) - - return self.playlist_result( - entries, video_id, '%s (%s)' % (username, list_type), description) + def _get_playlist_title(self, title, slug): + return '%s (%s)' % (title, slug) class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): @@ -313,87 +332,20 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): IE_NAME = 'mixcloud:playlist' _TESTS = [{ - 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/', - 'info_dict': { - 'id': 'RedBullThre3style_tokyo-finalists-2015', - 'title': 'National Champions 2015', - 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3', - }, - 'playlist_mincount': 16, - }, { 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user') - playlist_id = mobj.group('playlist') - video_id = '%s_%s' % (user_id, playlist_id) - - webpage = self._download_webpage( - url, user_id, - note='Downloading playlist page', - errnote='Unable to download playlist page') - - title = self._html_search_regex( - r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)', - webpage, 'playlist title', - default=None) or self._og_search_title(webpage, fatal=False) - description = self._get_user_description(webpage) - - entries = OnDemandPagedList( - functools.partial( - self._tracks_page_func, - '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), - self._PAGE_SIZE) - - return self.playlist_result(entries, video_id, title, description) - - -class MixcloudStreamIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$' - IE_NAME = 'mixcloud:stream' - - _TEST = { - 'url': 'https://www.mixcloud.com/FirstEar/stream/', 'info_dict': { - 'id': 'FirstEar', - 'title': 'First Ear', - 'description': 'Curators of good music\nfirstearmusic.com', + 'id': 'maxvibes_jazzcat-on-ness-radio', + 'title': 'Ness Radio sessions', }, - 'playlist_mincount': 192, - } + 'playlist_mincount': 59, + }] + _TITLE_KEY = 'name' + _DESCRIPTION_KEY = 'description' + _ROOT_TYPE = 'playlist' + _NODE_TEMPLATE = '''cloudcast { + slug + url + }''' - def _real_extract(self, url): - user_id = self._match_id(url) - - webpage = self._download_webpage(url, user_id) - - entries = [] - prev_page_url = None - - def _handle_page(page): - entries.extend(self._find_urls_in_page(page)) - return self._search_regex( - r'm-next-page-url="([^"]+)"', page, - 'next page URL', default=None) - - next_page_url = _handle_page(webpage) - - for idx in itertools.count(0): - if not next_page_url or prev_page_url == next_page_url: - break - - prev_page_url = next_page_url - current_page = int(self._search_regex( - r'\?page=(\d+)', next_page_url, 'next page number')) - - next_page_url = _handle_page(self._fetch_tracks_page( - '%s/stream' % user_id, user_id, 'stream', idx, - real_page_number=current_page)) - - username = self._og_search_title(webpage) - description = self._get_user_description(webpage) - - return self.playlist_result(entries, user_id, username, description) + def _get_cloudcast(self, node): + return node.get('cloudcast') or {} From d4f53af482cc47b0473a3576da7ad902bea4ac39 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 6 Nov 2019 23:14:26 +0100 Subject: [PATCH 0949/1201] [lnkgo] fix extraction(closes #16834) --- youtube_dl/extractor/lnkgo.py | 100 ++++++++++++---------------------- 1 file changed, 36 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index cfec0d3d0..3e71852aa 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -5,24 +5,27 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + compat_str, int_or_none, - unified_strdate, + parse_iso8601, ) class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?' _TESTS = [{ - 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', + 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', 'info_dict': { - 'id': '46712', + 'id': '10809', 'ext': 'mp4', - 'title': 'Yra kaip yra', - 'upload_date': '20150107', - 'description': 'md5:d82a5e36b775b7048617f263a0e3475e', - 'age_limit': 7, - 'duration': 3019, - 'thumbnail': r're:^https?://.*\.jpg$' + 'title': "Put'ka: Trys Klausimai", + 'upload_date': '20161216', + 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.', + 'age_limit': 18, + 'duration': 117, + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1481904000, }, 'params': { 'skip_download': True, # HLS download @@ -30,20 +33,21 @@ class LnkGoIE(InfoExtractor): }, { 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2', 'info_dict': { - 'id': '47289', + 'id': '10467', 'ext': 'mp4', 'title': 'Nėrdas: Kompiuterio Valymas', 'upload_date': '20150113', 'description': 'md5:7352d113a242a808676ff17e69db6a69', 'age_limit': 18, 'duration': 346, - 'thumbnail': r're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1421164800, }, 'params': { 'skip_download': True, # HLS download }, }, { - 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', + 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413', 'only_matching': True, }] _AGE_LIMITS = { @@ -51,66 +55,34 @@ class LnkGoIE(InfoExtractor): 'N-14': 14, 'S': 18, } + _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s' def _real_extract(self, url): - display_id = self._match_id(url) + display_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - url, display_id, 'Downloading player webpage') - - video_id = self._search_regex( - r'data-ep="([^"]+)"', webpage, 'video ID') - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - upload_date = unified_strdate(self._search_regex( - r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False)) - - thumbnail_w = int_or_none( - self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False)) - thumbnail_h = int_or_none( - self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False)) - thumbnail = { - 'url': self._og_search_thumbnail(webpage), - } - if thumbnail_w and thumbnail_h: - thumbnail.update({ - 'width': thumbnail_w, - 'height': thumbnail_h, - }) - - config = self._parse_json(self._search_regex( - r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id) - - if config.get('pGeo'): - self.report_warning( - 'This content might not be available in your country due to copyright reasons') - - formats = [{ - 'format_id': 'hls', - 'ext': 'mp4', - 'url': config['EpisodeVideoLink_HLS'], - }] - - m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink']) - if m: - formats.append({ - 'format_id': 'rtmp', - 'ext': 'flv', - 'url': m.group('url'), - 'play_path': m.group('play_path'), - 'page_url': url, - }) + video_info = self._download_json( + 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'), + display_id)['videoConfig']['videoInfo'] + video_id = compat_str(video_info['id']) + title = video_info['title'] + prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4' + formats = self._extract_m3u8_formats( + self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''), + video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) + poster_image = video_info.get('posterImage') + return { 'id': video_id, 'display_id': display_id, 'title': title, 'formats': formats, - 'thumbnails': [thumbnail], - 'duration': int_or_none(config.get('VideoTime')), - 'description': description, - 'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0), - 'upload_date': upload_date, + 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None, + 'duration': int_or_none(video_info.get('duration')), + 'description': clean_html(video_info.get('htmlDescription')), + 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0), + 'timestamp': parse_iso8601(video_info.get('airDate')), + 'view_count': int_or_none(video_info.get('viewsCount')), } From 0b16b3c2d35d1706ec5c55e5b06352c753127368 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 9 Nov 2019 09:22:24 +0100 Subject: [PATCH 0950/1201] [twitch] add support for Clip embed URLs --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index ca7676fe2..a5681409c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -644,7 +644,7 @@ class TwitchStreamIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE): IE_NAME = 'twitch:clips' - _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', @@ -667,6 +667,9 @@ class TwitchClipsIE(TwitchBaseIE): }, { 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', 'only_matching': True, + }, { + 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited', + 'only_matching': True, }] def _real_extract(self, url): From 18ca61c5e153d1c1cb8b9a2de3c8b9dfdaa69b0e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 9 Nov 2019 09:23:20 +0100 Subject: [PATCH 0951/1201] [twitter] improve extraction - add support for generic embeds(closes #22168) - always extract http formats for native videos(closes #14934) - add support for Twitter Broadcasts(closes #21369) - extract more metadata - improve VMap format extraction - unify extraction code for both twitter statuses and cards --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/periscope.py | 80 ++-- youtube_dl/extractor/twitter.py | 570 +++++++++++++++-------------- 3 files changed, 344 insertions(+), 307 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2f9ba6893..598006061 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1241,6 +1241,7 @@ from .twitter import ( TwitterCardIE, TwitterIE, TwitterAmplifyIE, + TwitterBroadcastIE, ) from .udemy import ( UdemyIE, diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index b337a56c0..c02e34aba 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -17,12 +17,54 @@ class PeriscopeBaseIE(InfoExtractor): 'https://api.periscope.tv/api/v2/%s' % method, item_id, query=query) + def _parse_broadcast_data(self, broadcast, video_id): + title = broadcast['status'] + uploader = broadcast.get('user_display_name') or broadcast.get('username') + title = '%s - %s' % (uploader, title) if uploader else title + is_live = broadcast.get('state').lower() == 'running' + + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + + return { + 'id': broadcast.get('id') or video_id, + 'title': self._live_title(title) if is_live else title, + 'timestamp': parse_iso8601(broadcast.get('created_at')), + 'uploader': uploader, + 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), + 'thumbnails': thumbnails, + 'view_count': int_or_none(broadcast.get('total_watched')), + 'tags': broadcast.get('tags'), + 'is_live': is_live, + } + + @staticmethod + def _extract_common_format_info(broadcast): + return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height')) + + @staticmethod + def _add_width_and_height(f, width, height): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True): + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=fatal) + if len(m3u8_formats) == 1: + self._add_width_and_height(m3u8_formats[0], width, height) + return m3u8_formats + class PeriscopeIE(PeriscopeBaseIE): IE_DESC = 'Periscope' IE_NAME = 'periscope' _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' - # Alive example URLs can be found here http://onperiscope.com/ + # Alive example URLs can be found here https://www.periscope.tv/ _TESTS = [{ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'md5': '65b57957972e503fcbbaeed8f4fa04ca', @@ -61,21 +103,9 @@ class PeriscopeIE(PeriscopeBaseIE): 'accessVideoPublic', {'broadcast_id': token}, token) broadcast = stream['broadcast'] - title = broadcast['status'] + info = self._parse_broadcast_data(broadcast, token) - uploader = broadcast.get('user_display_name') or broadcast.get('username') - uploader_id = (broadcast.get('user_id') or broadcast.get('username')) - - title = '%s - %s' % (uploader, title) if uploader else title state = broadcast.get('state').lower() - if state == 'running': - title = self._live_title(title) - timestamp = parse_iso8601(broadcast.get('created_at')) - - thumbnails = [{ - 'url': broadcast[image], - } for image in ('image_url', 'image_url_small') if broadcast.get(image)] - width = int_or_none(broadcast.get('width')) height = int_or_none(broadcast.get('height')) @@ -92,32 +122,20 @@ class PeriscopeIE(PeriscopeBaseIE): continue video_urls.add(video_url) if format_id != 'rtmp': - m3u8_formats = self._extract_m3u8_formats( - video_url, token, 'mp4', - entry_protocol='m3u8_native' - if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=False) - if len(m3u8_formats) == 1: - add_width_and_height(m3u8_formats[0]) + m3u8_formats = self._extract_pscp_m3u8_formats( + video_url, token, format_id, state, width, height, False) formats.extend(m3u8_formats) continue rtmp_format = { 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', } - add_width_and_height(rtmp_format) + self._add_width_and_height(rtmp_format) formats.append(rtmp_format) self._sort_formats(formats) - return { - 'id': broadcast.get('id') or token, - 'title': title, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'thumbnails': thumbnails, - 'formats': formats, - } + info['formats'] = formats + return info class PeriscopeUserIE(PeriscopeBaseIE): diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index cebb6238c..5f8d90fb4 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -4,32 +4,67 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_HTTPError, + compat_parse_qs, + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) from ..utils import ( - determine_ext, dict_get, ExtractorError, float_or_none, int_or_none, - remove_end, try_get, + strip_or_none, + unified_timestamp, + update_url_query, xpath_text, ) -from .periscope import PeriscopeIE +from .periscope import ( + PeriscopeBaseIE, + PeriscopeIE, +) class TwitterBaseIE(InfoExtractor): + _API_BASE = 'https://api.twitter.com/1.1/' + _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/' + _GUEST_TOKEN = None + + def _extract_variant_formats(self, variant, video_id): + variant_url = variant.get('url') + if not variant_url: + return [] + elif '.m3u8' in variant_url: + return self._extract_m3u8_formats( + variant_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + else: + tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None + f = { + 'url': variant_url, + 'format_id': 'http' + ('-%d' % tbr if tbr else ''), + 'tbr': tbr, + } + self._search_dimensions_in_video_url(f, variant_url) + return [f] + def _extract_formats_from_vmap_url(self, vmap_url, video_id): vmap_data = self._download_xml(vmap_url, video_id) - video_url = xpath_text(vmap_data, './/MediaFile').strip() - if determine_ext(video_url) == 'm3u8': - return self._extract_m3u8_formats( - video_url, video_id, ext='mp4', m3u8_id='hls', - entry_protocol='m3u8_native') - return [{ - 'url': video_url, - }] + formats = [] + urls = [] + for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'): + video_variant.attrib['url'] = compat_urllib_parse_unquote( + video_variant.attrib['url']) + urls.append(video_variant.attrib['url']) + formats.extend(self._extract_variant_formats( + video_variant.attrib, video_id)) + video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile')) + if video_url not in urls: + formats.extend(self._extract_variant_formats({'url': video_url}, video_id)) + return formats @staticmethod def _search_dimensions_in_video_url(a_format, video_url): @@ -40,10 +75,30 @@ class TwitterBaseIE(InfoExtractor): 'height': int(m.group('height')), }) + def _call_api(self, path, video_id, query={}): + headers = { + 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw', + } + if not self._GUEST_TOKEN: + self._GUEST_TOKEN = self._download_json( + self._API_BASE + 'guest/activate.json', video_id, + 'Downloading guest token', data=b'', + headers=headers)['guest_token'] + headers['x-guest-token'] = self._GUEST_TOKEN + try: + return self._download_json( + self._API_BASE + path, video_id, headers=headers, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + raise ExtractorError(self._parse_json( + e.cause.read().decode(), + video_id)['errors'][0]['message'], expected=True) + raise -class TwitterCardIE(TwitterBaseIE): + +class TwitterCardIE(InfoExtractor): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -51,19 +106,28 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '560070183650213889', 'ext': 'mp4', - 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.", + 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96', + 'uploader': 'Twitter', + 'uploader_id': 'Twitter', + 'thumbnail': r're:^https?://.*\.jpg', 'duration': 30.033, + 'timestamp': 1422366112, + 'upload_date': '20150127', }, }, { 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', - 'md5': '7ee2a553b63d1bccba97fbed97d9e1c8', + 'md5': '7137eca597f72b9abbe61e5ae0161399', 'info_dict': { 'id': '623160978427936768', 'ext': 'mp4', - 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*$', + 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.", + 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA", + 'uploader': 'NASA', + 'uploader_id': 'NASA', + 'timestamp': 1437408129, + 'upload_date': '20150720', }, }, { @@ -75,7 +139,7 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Ubuntu 11.10 Overview', 'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'upload_date': '20111013', - 'uploader': 'OMG! Ubuntu!', + 'uploader': 'OMG! UBUNTU!', 'uploader_id': 'omgubuntu', }, 'add_ie': ['Youtube'], @@ -99,190 +163,30 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*', + 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.", + 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", + 'uploader': 'Brent Yarina', + 'uploader_id': 'BTNBrentYarina', + 'timestamp': 1456976204, + 'upload_date': '20160303', }, + 'skip': 'This content is no longer available.', }, { 'url': 'https://twitter.com/i/videos/752274308186120192', 'only_matching': True, }, ] - _API_BASE = 'https://api.twitter.com/1.1' - - def _parse_media_info(self, media_info, video_id): - formats = [] - for media_variant in media_info.get('variants', []): - media_url = media_variant['url'] - if media_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls')) - elif media_url.endswith('.mpd'): - formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) - else: - tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) - a_format = { - 'url': media_url, - 'format_id': 'http-%d' % tbr if tbr else 'http', - 'tbr': tbr, - } - # Reported bitRate may be zero - if not a_format['tbr']: - del a_format['tbr'] - - self._search_dimensions_in_video_url(a_format, media_url) - - formats.append(a_format) - return formats - - def _extract_mobile_formats(self, username, video_id): - webpage = self._download_webpage( - 'https://mobile.twitter.com/%s/status/%s' % (username, video_id), - video_id, 'Downloading mobile webpage', - headers={ - # A recent mobile UA is necessary for `gt` cookie - 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0', - }) - main_script_url = self._html_search_regex( - r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL') - main_script = self._download_webpage( - main_script_url, video_id, 'Downloading main script') - bearer_token = self._search_regex( - r'BEARER_TOKEN\s*:\s*"([^"]+)"', - main_script, 'bearer token') - # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id - api_data = self._download_json( - '%s/statuses/show/%s.json' % (self._API_BASE, video_id), - video_id, 'Downloading API data', - headers={ - 'Authorization': 'Bearer ' + bearer_token, - }) - media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {} - return self._parse_media_info(media_info, video_id) - def _real_extract(self, url): - path, video_id = re.search(self._VALID_URL, url).groups() - - config = None - formats = [] - duration = None - - urls = [url] - if path.startswith('cards/'): - urls.append('https://twitter.com/i/videos/' + video_id) - - for u in urls: - webpage = self._download_webpage( - u, video_id, headers={'Referer': 'https://twitter.com/'}) - - iframe_url = self._html_search_regex( - r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', - webpage, 'video iframe', default=None) - if iframe_url: - return self.url_result(iframe_url) - - config = self._parse_json(self._html_search_regex( - r'data-(?:player-)?config="([^"]+)"', webpage, - 'data player config', default='{}'), - video_id) - - if config.get('source_type') == 'vine': - return self.url_result(config['player_url'], 'Vine') - - periscope_url = PeriscopeIE._extract_url(webpage) - if periscope_url: - return self.url_result(periscope_url, PeriscopeIE.ie_key()) - - video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') - - if video_url: - if determine_ext(video_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) - else: - f = { - 'url': video_url, - } - - self._search_dimensions_in_video_url(f, video_url) - - formats.append(f) - - vmap_url = config.get('vmapUrl') or config.get('vmap_url') - if vmap_url: - formats.extend( - self._extract_formats_from_vmap_url(vmap_url, video_id)) - - media_info = None - - for entity in config.get('status', {}).get('entities', []): - if 'mediaInfo' in entity: - media_info = entity['mediaInfo'] - - if media_info: - formats.extend(self._parse_media_info(media_info, video_id)) - duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) - - username = config.get('user', {}).get('screen_name') - if username: - formats.extend(self._extract_mobile_formats(username, video_id)) - - if formats: - title = self._search_regex(r'<title>([^<]+)', webpage, 'title') - thumbnail = config.get('posterImageUrl') or config.get('image_src') - duration = float_or_none(config.get('duration'), scale=1000) or duration - break - - if not formats: - headers = { - 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw', - 'Referer': url, - } - ct0 = self._get_cookies(url).get('ct0') - if ct0: - headers['csrf_token'] = ct0.value - guest_token = self._download_json( - '%s/guest/activate.json' % self._API_BASE, video_id, - 'Downloading guest token', data=b'', - headers=headers)['guest_token'] - headers['x-guest-token'] = guest_token - self._set_cookie('api.twitter.com', 'gt', guest_token) - config = self._download_json( - '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id), - video_id, headers=headers) - track = config['track'] - vmap_url = track.get('vmapUrl') - if vmap_url: - formats = self._extract_formats_from_vmap_url(vmap_url, video_id) - else: - playback_url = track['playbackUrl'] - if determine_ext(playback_url) == 'm3u8': - formats = self._extract_m3u8_formats( - playback_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - else: - formats = [{ - 'url': playback_url, - }] - title = 'Twitter web player' - thumbnail = config.get('posterImage') - duration = float_or_none(track.get('durationMs'), scale=1000) - - self._remove_duplicate_formats(formats) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } + status_id = self._match_id(url) + return self.url_result( + 'https://twitter.com/statuses/' + status_id, + TwitterIE.ie_key(), status_id) -class TwitterIE(InfoExtractor): +class TwitterIE(TwitterBaseIE): IE_NAME = 'twitter' - _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P[^/]+))/status/(?P\d+)' - _TEMPLATE_URL = 'https://twitter.com/%s/status/%s' - _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P\d+)' _TESTS = [{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', @@ -291,10 +195,13 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', + 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', 'duration': 12.922, + 'timestamp': 1442188653, + 'upload_date': '20150913', + 'age_limit': 18, }, }, { 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', @@ -316,19 +223,23 @@ class TwitterIE(InfoExtractor): 'id': '665052190608723968', 'ext': 'mp4', 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.', - 'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."', + 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': 'Star Wars', + 'timestamp': 1447395772, + 'upload_date': '20151113', }, }, { 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880', 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.', - 'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."', + 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.", + 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", 'uploader_id': 'BTNBrentYarina', 'uploader': 'Brent Yarina', + 'timestamp': 1456976204, + 'upload_date': '20160303', }, 'params': { # The same video as https://twitter.com/i/videos/tweet/705235433198714880 @@ -340,12 +251,14 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Simon Vertugo - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'JG', - 'uploader_id': 'jaydingeer', + 'uploader': 'Simon Vertugo', + 'uploader_id': 'simonvertugo', 'duration': 30.0, + 'timestamp': 1455777459, + 'upload_date': '20160218', }, }, { 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', @@ -353,10 +266,9 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': 'MIOxnrUteUd', 'ext': 'mp4', - 'title': 'Vince Mancini - Vine of the day', - 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', - 'uploader': 'Vince Mancini', - 'uploader_id': 'Filmdrunk', + 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン', + 'uploader': 'TAKUMA', + 'uploader_id': '1004126642786242560', 'timestamp': 1402826626, 'upload_date': '20140615', }, @@ -367,21 +279,22 @@ class TwitterIE(InfoExtractor): 'id': '719944021058060289', 'ext': 'mp4', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', - 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"', - 'uploader_id': 'captainamerica', + 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', + 'uploader_id': 'CaptainAmerica', 'uploader': 'Captain America', 'duration': 3.17, + 'timestamp': 1460483005, + 'upload_date': '20160412', }, }, { 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', 'info_dict': { 'id': '1zqKVVlkqLaKB', 'ext': 'mp4', - 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence', - 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"', + 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', 'upload_date': '20160923', - 'uploader_id': 'OPP_HSD', - 'uploader': 'Sgt Kerry Schmidt', + 'uploader_id': '1PmKqpJdOJQoY', + 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', 'timestamp': 1474613214, }, 'add_ie': ['Periscope'], @@ -392,10 +305,12 @@ class TwitterIE(InfoExtractor): 'id': '852138619213144067', 'ext': 'mp4', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', - 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', + 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'uploader': 'عالم الأخبار', 'uploader_id': 'news_al3alm', 'duration': 277.4, + 'timestamp': 1492000653, + 'upload_date': '20170412', }, }, { 'url': 'https://twitter.com/i/web/status/910031516746514432', @@ -404,10 +319,12 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"', + 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'uploader': 'Préfet de Guadeloupe', 'uploader_id': 'Prefet971', 'duration': 47.48, + 'timestamp': 1505803395, + 'upload_date': '20170919', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -420,10 +337,12 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 're:.*?Shep is on a roll today.*?', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'md5:63b036c228772523ae1924d5f8e5ed6b', + 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'uploader': 'Lis Power', 'uploader_id': 'LisPower1', 'duration': 111.278, + 'timestamp': 1527623489, + 'upload_date': '20180529', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -435,88 +354,163 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'md5:66d493500c013e3e2d434195746a7f78', + 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976', 'uploader': 'Twitter', 'uploader_id': 'Twitter', 'duration': 61.567, + 'timestamp': 1548184644, + 'upload_date': '20190122', }, + }, { + # not available in Periscope + 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656', + 'info_dict': { + 'id': '1vOGwqejwoWxB', + 'ext': 'mp4', + 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019', + 'uploader': 'Vivi', + 'uploader_id': '1eVjYOLGkGrQL', + }, + 'add_ie': ['TwitterBroadcast'], + }, { + # Twitch Clip Embed + 'url': 'https://twitter.com/GunB1g/status/1163218564784017422', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - twid = mobj.group('id') - - webpage, urlh = self._download_webpage_handle( - self._TEMPLATE_STATUSES_URL % twid, twid) - - if 'twitter.com/account/suspended' in urlh.geturl(): - raise ExtractorError('Account suspended by Twitter.', expected=True) - - user_id = None - - redirect_mobj = re.match(self._VALID_URL, urlh.geturl()) - if redirect_mobj: - user_id = redirect_mobj.group('user_id') - - if not user_id: - user_id = mobj.group('user_id') - - username = remove_end(self._og_search_title(webpage), ' on Twitter') - - title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”') + twid = self._match_id(url) + status = self._call_api( + 'statuses/show/%s.json' % twid, twid, { + 'cards_platform': 'Web-12', + 'include_cards': 1, + 'include_reply_count': 1, + 'include_user_entities': 0, + 'tweet_mode': 'extended', + }) + title = description = status['full_text'].replace('\n', ' ') # strip 'https -_t.co_BJYgOjSeGA' junk from filenames title = re.sub(r'\s+(https?://[^ ]+)', '', title) + user = status.get('user') or {} + uploader = user.get('name') + if uploader: + title = '%s - %s' % (uploader, title) + uploader_id = user.get('screen_name') + + tags = [] + for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []): + hashtag_text = hashtag.get('text') + if not hashtag_text: + continue + tags.append(hashtag_text) info = { - 'uploader_id': user_id, - 'uploader': username, - 'webpage_url': url, - 'description': '%s on Twitter: "%s"' % (username, description), - 'title': username + ' - ' + title, + 'id': twid, + 'title': title, + 'description': description, + 'uploader': uploader, + 'timestamp': unified_timestamp(status.get('created_at')), + 'uploader_id': uploader_id, + 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None, + 'like_count': int_or_none(status.get('favorite_count')), + 'repost_count': int_or_none(status.get('retweet_count')), + 'comment_count': int_or_none(status.get('reply_count')), + 'age_limit': 18 if status.get('possibly_sensitive') else 0, + 'tags': tags, } - mobj = re.search(r'''(?x) - ]+class="animated-gif"(?P[^>]+)>\s* - ]+video-src="(?P[^"]+)" - ''', webpage) + media = try_get(status, lambda x: x['extended_entities']['media'][0]) + if media and media.get('type') != 'photo': + video_info = media.get('video_info') or {} + + formats = [] + for variant in video_info.get('variants', []): + formats.extend(self._extract_variant_formats(variant, twid)) + self._sort_formats(formats) + + thumbnails = [] + media_url = media.get('media_url_https') or media.get('media_url') + if media_url: + def add_thumbnail(name, size): + thumbnails.append({ + 'id': name, + 'url': update_url_query(media_url, {'name': name}), + 'width': int_or_none(size.get('w') or size.get('width')), + 'height': int_or_none(size.get('h') or size.get('height')), + }) + for name, size in media.get('sizes', {}).items(): + add_thumbnail(name, size) + add_thumbnail('orig', media.get('original_info') or {}) - if mobj: - more_info = mobj.group('more_info') - height = int_or_none(self._search_regex( - r'data-height="(\d+)"', more_info, 'height', fatal=False)) - width = int_or_none(self._search_regex( - r'data-width="(\d+)"', more_info, 'width', fatal=False)) - thumbnail = self._search_regex( - r'poster="([^"]+)"', more_info, 'poster', fatal=False) info.update({ - 'id': twid, - 'url': mobj.group('url'), - 'height': height, - 'width': width, - 'thumbnail': thumbnail, + 'formats': formats, + 'thumbnails': thumbnails, + 'duration': float_or_none(video_info.get('duration_millis'), 1000), }) - return info - - twitter_card_url = None - if 'class="PlayableMedia' in webpage: - twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) else: - twitter_card_iframe_url = self._search_regex( - r'data-full-card-iframe-url=([\'"])(?P(?:(?!\1).)+)\1', - webpage, 'Twitter card iframe URL', default=None, group='url') - if twitter_card_iframe_url: - twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) + card = status.get('card') + if card: + binding_values = card['binding_values'] - if twitter_card_url: - info.update({ - '_type': 'url_transparent', - 'ie_key': 'TwitterCard', - 'url': twitter_card_url, - }) - return info + def get_binding_value(k): + o = binding_values.get(k) or {} + return try_get(o, lambda x: x[x['type'].lower() + '_value']) - raise ExtractorError('There\'s no video in this tweet.') + card_name = card['name'].split(':')[-1] + if card_name == 'amplify': + formats = self._extract_formats_from_vmap_url( + get_binding_value('amplify_url_vmap'), + get_binding_value('amplify_content_id') or twid) + self._sort_formats(formats) + + thumbnails = [] + for suffix in ('_small', '', '_large', '_x_large', '_original'): + image = get_binding_value('player_image' + suffix) or {} + image_url = image.get('url') + if not image_url or '/player-placeholder' in image_url: + continue + thumbnails.append({ + 'id': suffix[1:] if suffix else 'medium', + 'url': image_url, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + }) + + info.update({ + 'formats': formats, + 'thumbnails': thumbnails, + 'duration': int_or_none(get_binding_value( + 'content_duration_seconds')), + }) + elif card_name == 'player': + info.update({ + '_type': 'url', + 'url': get_binding_value('player_url'), + }) + elif card_name == 'periscope_broadcast': + info.update({ + '_type': 'url', + 'url': get_binding_value('url') or get_binding_value('player_url'), + 'ie_key': PeriscopeIE.ie_key(), + }) + elif card_name == 'broadcast': + info.update({ + '_type': 'url', + 'url': get_binding_value('broadcast_url'), + 'ie_key': TwitterBroadcastIE.ie_key(), + }) + else: + raise ExtractorError('Unsupported Twitter Card.') + else: + expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url']) + if not expanded_url: + raise ExtractorError("There's no video in this tweet.") + info.update({ + '_type': 'url', + 'url': expanded_url, + }) + return info class TwitterAmplifyIE(TwitterBaseIE): @@ -573,3 +567,27 @@ class TwitterAmplifyIE(TwitterBaseIE): 'formats': formats, 'thumbnails': thumbnails, } + + +class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): + IE_NAME = 'twitter:broadcast' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P[0-9a-zA-Z]{13})' + + def _real_extract(self, url): + broadcast_id = self._match_id(url) + broadcast = self._call_api( + 'broadcasts/show.json', broadcast_id, + {'ids': broadcast_id})['broadcasts'][broadcast_id] + info = self._parse_broadcast_data(broadcast, broadcast_id) + media_key = broadcast['media_key'] + source = self._call_api( + 'live_video_stream/status/' + media_key, media_key)['source'] + m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] + if '/live_video_stream/geoblocked/' in m3u8_url: + self.raise_geo_restricted() + m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse( + m3u8_url).query).get('type', [None])[0] + state, width, height = self._extract_common_format_info(broadcast) + info['formats'] = self._extract_pscp_m3u8_formats( + m3u8_url, broadcast_id, m3u8_id, state, width, height) + return info From ce112a8c19ebcc9d401ff26a5cdcf58ba565901c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 11:01:07 +0100 Subject: [PATCH 0952/1201] [twitch] fix video comments URL(#18593)(closes #15828) --- youtube_dl/extractor/twitch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index a5681409c..8c0d70010 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -344,9 +344,8 @@ class TwitchVodIE(TwitchItemBaseIE): info['subtitles'] = { 'rechat': [{ 'url': update_url_query( - 'https://rechat.twitch.tv/rechat-messages', { - 'video_id': 'v%s' % item_id, - 'start': info['timestamp'], + 'https://api.twitch.tv/v5/videos/%s/comments' % item_id, { + 'client_id': self._CLIENT_ID, }), 'ext': 'json', }], From f81dd65ba2c1e7be549e5c8cfe6cbf0f0829edfe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 13:11:59 +0100 Subject: [PATCH 0953/1201] [extractor/common] clean jwplayer description HTML tags --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4a683f6d6..4c2f9303e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2689,7 +2689,7 @@ class InfoExtractor(object): entry = { 'id': this_video_id, 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), - 'description': video_data.get('description'), + 'description': clean_html(video_data.get('description')), 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), From 8fbf5d2f87fbfe0441bc20cf69d506109b2810bc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 13:14:23 +0100 Subject: [PATCH 0954/1201] [seeker] remove Revision3 extractors and fix extraction --- youtube_dl/extractor/extractors.py | 4 - youtube_dl/extractor/revision3.py | 170 ----------------------------- youtube_dl/extractor/seeker.py | 45 ++++---- 3 files changed, 23 insertions(+), 196 deletions(-) delete mode 100644 youtube_dl/extractor/revision3.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 598006061..8df9d95b1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -932,10 +932,6 @@ from .rentv import ( from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE -from .revision3 import ( - Revision3EmbedIE, - Revision3IE, -) from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .ro220 import Ro220IE diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py deleted file mode 100644 index 833d8a2f0..000000000 --- a/youtube_dl/extractor/revision3.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_iso8601, - unescapeHTML, - qualities, -) - - -class Revision3EmbedIE(InfoExtractor): - IE_NAME = 'revision3:embed' - _VALID_URL = r'(?:revision3:(?:(?P[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P\d+)' - _TEST = { - 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', - 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', - 'info_dict': { - 'id': '67558', - 'ext': 'mp4', - 'title': 'The Pros & Cons Of Zoos', - 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', - 'uploader_id': 'dnews', - 'uploader': 'DNews', - } - } - _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('playlist_id') - playlist_type = mobj.group('playlist_type') or 'video_id' - video_data = self._download_json( - 'http://revision3.com/api/getPlaylist.json', playlist_id, query={ - 'api_key': self._API_KEY, - 'codecs': 'h264,vp8,theora', - playlist_type: playlist_id, - })['items'][0] - - formats = [] - for vcodec, media in video_data['media'].items(): - for quality_id, quality in media.items(): - if quality_id == 'hls': - formats.extend(self._extract_m3u8_formats( - quality['url'], playlist_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': quality['url'], - 'format_id': '%s-%s' % (vcodec, quality_id), - 'tbr': int_or_none(quality.get('bitrate')), - 'vcodec': vcodec, - }) - self._sort_formats(formats) - - return { - 'id': playlist_id, - 'title': unescapeHTML(video_data['title']), - 'description': unescapeHTML(video_data.get('summary')), - 'uploader': video_data.get('show', {}).get('name'), - 'uploader_id': video_data.get('show', {}).get('slug'), - 'duration': int_or_none(video_data.get('duration')), - 'formats': formats, - } - - -class Revision3IE(InfoExtractor): - IE_NAME = 'revision' - _VALID_URL = r'https?://(?:www\.)?(?P(?:revision3|animalist)\.com)/(?P[^/]+(?:/[^/?#]+)?)' - _TESTS = [{ - 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', - 'md5': 'd94a72d85d0a829766de4deb8daaf7df', - 'info_dict': { - 'id': '71089', - 'display_id': 'technobuffalo/5-google-predictions-for-2016', - 'ext': 'webm', - 'title': '5 Google Predictions for 2016', - 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', - 'upload_date': '20151228', - 'timestamp': 1451325600, - 'duration': 187, - 'uploader': 'TechnoBuffalo', - 'uploader_id': 'technobuffalo', - } - }, { - # Show - 'url': 'http://revision3.com/variant', - 'only_matching': True, - }, { - # Tag - 'url': 'http://revision3.com/vr', - 'only_matching': True, - }] - _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' - - def _real_extract(self, url): - domain, display_id = re.match(self._VALID_URL, url).groups() - site = domain.split('.')[0] - page_info = self._download_json( - self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) - - page_data = page_info['data'] - page_type = page_data['type'] - if page_type in ('episode', 'embed'): - show_data = page_data['show']['data'] - page_id = compat_str(page_data['id']) - video_id = compat_str(page_data['video']['data']['id']) - - preference = qualities(['mini', 'small', 'medium', 'large']) - thumbnails = [{ - 'url': image_url, - 'id': image_id, - 'preference': preference(image_id) - } for image_id, image_url in page_data.get('images', {}).items()] - - info = { - 'id': page_id, - 'display_id': display_id, - 'title': unescapeHTML(page_data['name']), - 'description': unescapeHTML(page_data.get('summary')), - 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), - 'author': page_data.get('author'), - 'uploader': show_data.get('name'), - 'uploader_id': show_data.get('slug'), - 'thumbnails': thumbnails, - 'extractor_key': site, - } - - if page_type == 'embed': - info.update({ - '_type': 'url_transparent', - 'url': page_data['video']['data']['embed'], - }) - return info - - info.update({ - '_type': 'url_transparent', - 'url': 'revision3:%s' % video_id, - }) - return info - else: - list_data = page_info[page_type]['data'] - episodes_data = page_info['episodes']['data'] - num_episodes = page_info['meta']['totalEpisodes'] - processed_episodes = 0 - entries = [] - page_num = 1 - while True: - entries.extend([{ - '_type': 'url', - 'url': 'http://%s%s' % (domain, episode['path']), - 'id': compat_str(episode['id']), - 'ie_key': 'Revision3', - 'extractor_key': site, - } for episode in episodes_data]) - processed_episodes += len(episodes_data) - if processed_episodes == num_episodes: - break - page_num += 1 - episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( - domain, display_id + '/' + compat_str(page_num), domain), - display_id)['episodes']['data'] - - return self.playlist_result( - entries, compat_str(list_data['id']), - list_data.get('name'), list_data.get('summary')) diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py index 3b9c65e7e..7872dc80d 100644 --- a/youtube_dl/extractor/seeker.py +++ b/youtube_dl/extractor/seeker.py @@ -4,34 +4,37 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + get_element_by_class, + strip_or_none, +) class SeekerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html' _TESTS = [{ - # player.loadRevision3Item 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', - 'md5': '30c1dc4030cc715cf05b423d0947ac18', + 'md5': '897d44bbe0d8986a2ead96de565a92db', 'info_dict': { - 'id': '76243', - 'ext': 'webm', + 'id': 'Elrn3gnY', + 'ext': 'mp4', 'title': 'Should Trump Be Required To Release His Tax Returns?', - 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', - 'uploader': 'Seeker Daily', - 'uploader_id': 'seekerdaily', + 'description': 'md5:41efa8cfa8d627841045eec7b018eb45', + 'timestamp': 1490090165, + 'upload_date': '20170321', } }, { 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', 'playlist': [ { - 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'md5': '0497b9f20495174be73ae136949707d2', 'info_dict': { - 'id': '67558', + 'id': 'FihYQ8AE', 'ext': 'mp4', 'title': 'The Pros & Cons Of Zoos', - 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', - 'uploader': 'DNews', - 'uploader_id': 'dnews', + 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', + 'timestamp': 1490039133, + 'upload_date': '20170320', }, } ], @@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor): def _real_extract(self, url): display_id, article_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) - mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) - if mobj: - playlist_type, playlist_id = mobj.groups() - return self.url_result( - 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) - else: - entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( - r']+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] - return self.playlist_result( - entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) + entries = [] + for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): + entries.append(self.url_result( + 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) + return self.playlist_result( + entries, article_id, + self._og_search_title(webpage), + strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) From 20baa17c0180c7254644abea968792abcf0743cb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 16:00:12 +0100 Subject: [PATCH 0955/1201] [daisuki] remove extractor --- youtube_dl/extractor/daisuki.py | 154 ----------------------------- youtube_dl/extractor/extractors.py | 4 - 2 files changed, 158 deletions(-) delete mode 100644 youtube_dl/extractor/daisuki.py diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py deleted file mode 100644 index dbc1aa5d4..000000000 --- a/youtube_dl/extractor/daisuki.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import unicode_literals - -import base64 -import json -import random -import re - -from .common import InfoExtractor -from ..aes import ( - aes_cbc_decrypt, - aes_cbc_encrypt, -) -from ..compat import compat_b64decode -from ..utils import ( - bytes_to_intlist, - bytes_to_long, - extract_attributes, - ExtractorError, - intlist_to_bytes, - js_to_json, - int_or_none, - long_to_bytes, - pkcs1pad, -) - - -class DaisukiMottoIE(InfoExtractor): - _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P[0-9a-zA-Z]{3})' - - _TEST = { - 'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428', - 'info_dict': { - 'id': 'V2e', - 'ext': 'mp4', - 'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!', - 'subtitles': { - 'mul': [{ - 'ext': 'ttml', - }], - }, - }, - 'params': { - 'skip_download': True, # AES-encrypted HLS stream - }, - } - - # The public key in PEM format can be found in clientlibs_anime_watch.min.js - _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - flashvars = self._parse_json(self._search_regex( - r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), - video_id, transform_source=js_to_json) - - iv = [0] * 16 - - data = {} - for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'): - data[key] = flashvars.get(key, '') - - encrypted_rtn = None - - # Some AES keys are rejected. Try it with different AES keys - for idx in range(5): - aes_key = [random.randint(0, 254) for _ in range(32)] - padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128)) - - n, e = self._RSA_KEY - encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) - init_data = self._download_json( - 'http://motto.daisuki.net/fastAPI/bgn/init/', - video_id, query={ - 's': flashvars.get('s', ''), - 'c': flashvars.get('ss3_prm', ''), - 'e': url, - 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( - bytes_to_intlist(json.dumps(data)), - aes_key, iv))).decode('ascii'), - 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), - }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) - - if 'rtn' in init_data: - encrypted_rtn = init_data['rtn'] - break - - self._sleep(5, video_id) - - if encrypted_rtn is None: - raise ExtractorError('Failed to fetch init data') - - rtn = self._parse_json( - intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( - compat_b64decode(encrypted_rtn)), - aes_key, iv)).decode('utf-8').rstrip('\0'), - video_id) - - title = rtn['title_str'] - - formats = self._extract_m3u8_formats( - rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') - - subtitles = {} - caption_url = rtn.get('caption_url') - if caption_url: - # mul: multiple languages - subtitles['mul'] = [{ - 'url': caption_url, - 'ext': 'ttml', - }] - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'subtitles': subtitles, - } - - -class DaisukiMottoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://motto\.daisuki\.net/(?Pinformation)/' - - _TEST = { - 'url': 'http://motto.daisuki.net/information/', - 'info_dict': { - 'title': 'DRAGON BALL SUPER', - }, - 'playlist_mincount': 117, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [] - for li in re.findall(r'(]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage): - attr = extract_attributes(li) - ad_id = attr.get('data-ad_id') - product_id = attr.get('data-product_id') - if ad_id and product_id: - episode_id = attr.get('data-chapter') - entries.append({ - '_type': 'url_transparent', - 'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id), - 'episode_id': episode_id, - 'episode_number': int_or_none(episode_id), - 'ie_key': 'DaisukiMotto', - }) - - return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8df9d95b1..e2ebe8f95 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -254,10 +254,6 @@ from .dailymotion import ( DailymotionPlaylistIE, DailymotionUserIE, ) -from .daisuki import ( - DaisukiMottoIE, - DaisukiMottoPlaylistIE, -) from .daum import ( DaumIE, DaumClipIE, From 88b87b08b1ed06940053ee018547de051bf8d986 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 17:01:21 +0100 Subject: [PATCH 0956/1201] [minhateca] remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/minhateca.py | 70 ------------------------------ 2 files changed, 71 deletions(-) delete mode 100644 youtube_dl/extractor/minhateca.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e2ebe8f95..dfd0ef198 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -625,7 +625,6 @@ from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, ) -from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE from .miomio import MioMioIE diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py deleted file mode 100644 index dccc54249..000000000 --- a/youtube_dl/extractor/minhateca.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_filesize, - sanitized_Request, - urlencode_postdata, -) - - -class MinhatecaIE(InfoExtractor): - _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P[0-9]+)\.' - _TEST = { - 'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)', - 'info_dict': { - 'id': '125848331', - 'ext': 'mp4', - 'title': 'youtube-dl test video', - 'thumbnail': r're:^https?://.*\.jpg$', - 'filesize_approx': 1530000, - 'duration': 9, - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - token = self._html_search_regex( - r'(.*?)', webpage, 'title') - title, _, ext = title_str.rpartition('.') - filesize_approx = parse_filesize(self._html_search_regex( - r'

    (.*?)

    ', - webpage, 'file size approximation', fatal=False)) - duration = parse_duration(self._html_search_regex( - r'(?s)

    .*?class="bold">(.*?)<', - webpage, 'duration', fatal=False)) - view_count = int_or_none(self._html_search_regex( - r'

    ([0-9]+)

    ', - webpage, 'view count', fatal=False)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'ext': ext, - 'filesize_approx': filesize_approx, - 'duration': duration, - 'view_count': view_count, - 'thumbnail': self._og_search_thumbnail(webpage), - } From 9e46d1f8aadd38f6de7c2b921b294e67ed2267eb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 9 Nov 2019 17:15:15 +0100 Subject: [PATCH 0957/1201] [addanime] remove extractor --- youtube_dl/extractor/addanime.py | 95 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 96 deletions(-) delete mode 100644 youtube_dl/extractor/addanime.py diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py deleted file mode 100644 index 5e7c0724e..000000000 --- a/youtube_dl/extractor/addanime.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse, -) -from ..utils import ( - ExtractorError, - qualities, -) - - -class AddAnimeIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P[\w_]+)' - _TESTS = [{ - 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', - 'md5': '72954ea10bc979ab5e2eb288b21425a0', - 'info_dict': { - 'id': '24MR3YO5SAS9', - 'ext': 'mp4', - 'description': 'One Piece 606', - 'title': 'One Piece 606', - }, - 'skip': 'Video is gone', - }, { - 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - webpage = self._download_webpage(url, video_id) - except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError) or \ - ee.cause.code != 503: - raise - - redir_webpage = ee.cause.read().decode('utf-8') - action = self._search_regex( - r'
    ', - redir_webpage, 'redirect vc value') - av = re.search( - r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', - redir_webpage) - if av is None: - raise ExtractorError('Cannot find redirect math task') - av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) - - parsed_url = compat_urllib_parse_urlparse(url) - av_val = av_res + len(parsed_url.netloc) - confirm_url = ( - parsed_url.scheme + '://' + parsed_url.netloc - + action + '?' - + compat_urllib_parse_urlencode({ - 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) - self._download_webpage( - confirm_url, video_id, - note='Confirming after redirect') - webpage = self._download_webpage(url, video_id) - - FORMATS = ('normal', 'hq') - quality = qualities(FORMATS) - formats = [] - for format_id in FORMATS: - rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) - video_url = self._search_regex(rex, webpage, 'video file URLx', - fatal=False) - if not video_url: - continue - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'quality': quality(format_id), - }) - self._sort_formats(formats) - video_title = self._og_search_title(webpage) - video_description = self._og_search_description(webpage) - - return { - '_type': 'video', - 'id': video_id, - 'formats': formats, - 'title': video_title, - 'description': video_description - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dfd0ef198..d96f0d284 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -18,7 +18,6 @@ from .acast import ( ACastIE, ACastChannelIE, ) -from .addanime import AddAnimeIE from .adn import ADNIE from .adobeconnect import AdobeConnectIE from .adobetv import ( From 433e0710585e2414697cff6d444204e1db950bd7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Nov 2019 17:02:47 +0100 Subject: [PATCH 0958/1201] [facebook] fix posts video data extraction(closes #22473) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c723726b7..ce64e2683 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -334,7 +334,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) From 2e9ad59a4d6dfd82b34a965cfc5b8c5a647d1598 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 11 Nov 2019 09:53:04 +0100 Subject: [PATCH 0959/1201] [soundcloud] check if the soundtrack has downloads left(closes #23045) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 875b9d887..e8ffb2cbe 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -276,7 +276,7 @@ class SoundcloudIE(InfoExtractor): if secret_token: query['secret_token'] = secret_token - if info.get('downloadable'): + if info.get('downloadable') and info.get('has_downloads_left'): format_url = update_url_query( info.get('download_url') or track_base_url + '/download', query) format_urls.add(format_url) From 48970d5cc8838ac404a64462d175b248401e2bd2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 12 Nov 2019 10:51:54 +0100 Subject: [PATCH 0960/1201] [teamcoco] add support for new videos(closes #23054) --- youtube_dl/extractor/teamcoco.py | 68 +++++++++++++++++--------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 7640cf00a..5793b711f 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -84,6 +84,19 @@ class TeamcocoIE(TurnerBaseIE): 'only_matching': True, } ] + _RECORD_TEMPL = '''id + title + teaser + publishOn + thumb { + preview + } + tags { + name + } + duration + turnerMediaId + turnerMediaAuthToken''' def _graphql_call(self, query_template, object_type, object_id): find_object = 'find' + object_type @@ -98,36 +111,36 @@ class TeamcocoIE(TurnerBaseIE): display_id = self._match_id(url) response = self._graphql_call('''{ - %s(slug: "%s") { + %%s(slug: "%%s") { ... on RecordSlug { record { + %s + } + } + ... on PageSlug { + child { id - title - teaser - publishOn - thumb { - preview - } - file { - url - } - tags { - name - } - duration - turnerMediaId - turnerMediaAuthToken } } ... on NotFoundSlug { status } } -}''', 'Slug', display_id) +}''' % self._RECORD_TEMPL, 'Slug', display_id) if response.get('status'): raise ExtractorError('This video is no longer available.', expected=True) - record = response['record'] + child = response.get('child') + if child: + record = self._graphql_call('''{ + %%s(id: "%%s") { + ... on Video { + %s + } + } +}''' % self._RECORD_TEMPL, 'Record', child['id']) + else: + record = response['record'] video_id = record['id'] info = { @@ -150,25 +163,21 @@ class TeamcocoIE(TurnerBaseIE): 'accessTokenType': 'jws', })) else: - d = self._download_json( + video_sources = self._download_json( 'https://teamcoco.com/_truman/d/' + video_id, - video_id, fatal=False) or {} - video_sources = d.get('meta') or {} - if not video_sources: - video_sources = self._graphql_call('''{ - %s(id: "%s") { - src - } -}''', 'RecordVideoSource', video_id) or {} + video_id)['meta']['src'] + if isinstance(video_sources, dict): + video_sources = video_sources.values() formats = [] get_quality = qualities(['low', 'sd', 'hd', 'uhd']) - for format_id, src in video_sources.get('src', {}).items(): + for src in video_sources: if not isinstance(src, dict): continue src_url = src.get('src') if not src_url: continue + format_id = src.get('label') ext = determine_ext(src_url, mimetype2ext(src.get('type'))) if format_id == 'hls' or ext == 'm3u8': # compat_urllib_parse.urljoin does not work here @@ -190,9 +199,6 @@ class TeamcocoIE(TurnerBaseIE): 'format_id': format_id, 'quality': get_quality(format_id), }) - if not formats: - formats = self._extract_m3u8_formats( - record['file']['url'], video_id, 'mp4', fatal=False) self._sort_formats(formats) info['formats'] = formats From eb22d1b55744b69d5ec3556529868acfba6c217f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 13 Nov 2019 19:09:32 +0100 Subject: [PATCH 0961/1201] [nexx] Add support for Multi Player JS Setup(closes #23052) --- youtube_dl/extractor/nexx.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index f9aad83c4..586c1b7eb 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -108,7 +108,7 @@ class NexxIE(InfoExtractor): @staticmethod def _extract_domain_id(webpage): mobj = re.search( - r']+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', + r']+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P\d+)', webpage) return mobj.group('id') if mobj else None @@ -123,7 +123,7 @@ class NexxIE(InfoExtractor): domain_id = NexxIE._extract_domain_id(webpage) if domain_id: for video_id in re.findall( - r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', + r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)', webpage): entries.append( 'https://api.nexx.cloud/v3/%s/videos/byid/%s' @@ -410,8 +410,8 @@ class NexxIE(InfoExtractor): class NexxEmbedIE(InfoExtractor): - _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P[^/?#&]+)' + _TESTS = [{ 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1', 'md5': '16746bfc28c42049492385c989b26c4a', 'info_dict': { @@ -420,7 +420,6 @@ class NexxEmbedIE(InfoExtractor): 'title': 'Nervenkitzel Achterbahn', 'alt_title': 'Karussellbauer in Deutschland', 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', - 'release_year': 2005, 'creator': 'SPIEGEL TV', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2761, @@ -431,7 +430,10 @@ class NexxEmbedIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, - } + }, { + 'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7', + 'only_matching': True, + }] @staticmethod def _extract_urls(webpage): From 5709d661a2509fab0c9f3412239ecbe7a621f45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Nov 2019 01:45:04 +0700 Subject: [PATCH 0962/1201] [drtv] Add support for new URL schema (closes #23059) --- youtube_dl/extractor/drtv.py | 57 ++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 218f10209..390e79f8c 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -17,6 +17,7 @@ from ..utils import ( float_or_none, mimetype2ext, str_or_none, + try_get, unified_timestamp, update_url_query, url_or_none, @@ -24,7 +25,14 @@ from ..utils import ( class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| + (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/ + ) + (?P[\da-z_-]+) + ''' _GEO_BYPASS = False _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' @@ -83,6 +91,26 @@ class DRTVIE(InfoExtractor): }, { 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', 'only_matching': True, + }, { + 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', + 'info_dict': { + 'id': '00951930010', + 'ext': 'mp4', + 'title': 'Bonderøven (1:8)', + 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', + 'timestamp': 1546542000, + 'upload_date': '20190103', + 'duration': 2576.6, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769', + 'only_matching': True, + }, { + 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', + 'only_matching': True, }] def _real_extract(self, url): @@ -100,13 +128,32 @@ class DRTVIE(InfoExtractor): webpage, 'video id', default=None) if not video_id: - video_id = compat_urllib_parse_unquote(self._search_regex( + video_id = self._search_regex( r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', - webpage, 'urn')) + webpage, 'urn', default=None) + if video_id: + video_id = compat_urllib_parse_unquote(video_id) + + _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard' + query = {'expanded': 'true'} + + if video_id: + programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) + else: + programcard_url = _PROGRAMCARD_BASE + page = self._parse_json( + self._search_regex( + r'data\s*=\s*({.+?})\s*(?:;| Date: Thu, 14 Nov 2019 06:38:55 +0100 Subject: [PATCH 0963/1201] [comcarcoff] remove extractor --- youtube_dl/extractor/comcarcoff.py | 74 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 75 deletions(-) delete mode 100644 youtube_dl/extractor/comcarcoff.py diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py deleted file mode 100644 index 588aad0d9..000000000 --- a/youtube_dl/extractor/comcarcoff.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, -) - - -class ComCarCoffIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P[a-z0-9\-]*)' - _TESTS = [{ - 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', - 'info_dict': { - 'id': '2494164', - 'ext': 'mp4', - 'upload_date': '20141127', - 'timestamp': 1417107600, - 'duration': 1232, - 'title': 'Happy Thanksgiving Miranda', - 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', - }, - 'params': { - 'skip_download': 'requires ffmpeg', - } - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - if not display_id: - display_id = 'comediansincarsgettingcoffee.com' - webpage = self._download_webpage(url, display_id) - - full_data = self._parse_json( - self._search_regex( - r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), - display_id)['videoData'] - - display_id = full_data['activeVideo']['video'] - video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] - - video_id = compat_str(video_data['mediaId']) - title = video_data['title'] - formats = self._extract_m3u8_formats( - video_data['mediaUrl'], video_id, 'mp4') - self._sort_formats(formats) - - thumbnails = [{ - 'url': video_data['images']['thumb'], - }, { - 'url': video_data['images']['poster'], - }] - - timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( - video_data.get('pubDate')) - duration = int_or_none(video_data.get('durationSeconds')) or parse_duration( - video_data.get('duration')) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'timestamp': timestamp, - 'duration': duration, - 'thumbnails': thumbnails, - 'formats': formats, - 'season_number': int_or_none(video_data.get('season')), - 'episode_number': int_or_none(video_data.get('episode')), - 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d96f0d284..cf4bb8f20 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -222,7 +222,6 @@ from .comedycentral import ( ComedyCentralTVIE, ToshIE, ) -from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonprotocols import ( MmsIE, From 656c20010f53851c1b01e839744f7fe48497c03f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 15 Nov 2019 21:17:47 +0100 Subject: [PATCH 0964/1201] [ivi] fix format extraction(closes #21991) --- youtube_dl/extractor/ivi.py | 56 ++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 86c014b07..efdc3cc98 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -18,6 +18,8 @@ class IviIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _GEO_BYPASS = False _GEO_COUNTRIES = ['RU'] + _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c' + _LIGHT_URL = 'https://api.ivi.ru/light/' _TESTS = [ # Single movie @@ -78,25 +80,41 @@ class IviIE(InfoExtractor): 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): + try: + from Crypto.Cipher import Blowfish + from Crypto.Hash import CMAC + except ImportError: + raise ExtractorError('pycrypto not found. Please install it.', expected=True) + video_id = self._match_id(url) - data = { + timestamp = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode())['result'] + + data = json.dumps({ 'method': 'da.content.get', 'params': [ video_id, { - 'site': 's183', + 'site': 's353', 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'contentid': video_id } ] - } + }).encode() video_json = self._download_json( - 'http://api.digitalaccess.ru/api/json/', video_id, - 'Downloading video JSON', data=json.dumps(data)) + self._LIGHT_URL, video_id, + 'Downloading video JSON', data=data, query={ + 'ts': timestamp, + 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), + }) - if 'error' in video_json: - error = video_json['error'] + error = video_json.get('error') + if error: origin = error['origin'] if origin == 'NotAllowedForLocation': self.raise_geo_restricted( @@ -108,20 +126,24 @@ class IviIE(InfoExtractor): expected=True) result = video_json['result'] + title = result['title'] quality = qualities(self._KNOWN_FORMATS) - formats = [{ - 'url': x['url'], - 'format_id': x.get('content_format'), - 'quality': quality(x.get('content_format')), - } for x in result['files'] if x.get('url')] - + formats = [] + for f in result.get('files', []): + f_url = f.get('url') + content_format = f.get('content_format') + if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format: + continue + formats.append({ + 'url': f_url, + 'format_id': content_format, + 'quality': quality(content_format), + 'filesize': int_or_none(f.get('size_in_bytes')), + }) self._sort_formats(formats) - title = result['title'] - - duration = int_or_none(result.get('duration')) compilation = result.get('compilation') episode = title if compilation else None @@ -158,7 +180,7 @@ class IviIE(InfoExtractor): 'episode_number': episode_number, 'thumbnails': thumbnails, 'description': description, - 'duration': duration, + 'duration': int_or_none(result.get('duration')), 'formats': formats, } From 1bba88efc7e1f82095f7ae38348e56026db4bf3c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 15 Nov 2019 23:46:31 +0100 Subject: [PATCH 0965/1201] [ivi] sign content request only when pycryptodome is available --- youtube_dl/extractor/ivi.py | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index efdc3cc98..1dcb17c9b 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -80,38 +80,42 @@ class IviIE(InfoExtractor): 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): - try: - from Crypto.Cipher import Blowfish - from Crypto.Hash import CMAC - except ImportError: - raise ExtractorError('pycrypto not found. Please install it.', expected=True) - video_id = self._match_id(url) - timestamp = self._download_json( - self._LIGHT_URL, video_id, - 'Downloading timestamp JSON', data=json.dumps({ - 'method': 'da.timestamp.get', - 'params': [] - }).encode())['result'] - data = json.dumps({ 'method': 'da.content.get', 'params': [ video_id, { - 'site': 's353', + 'site': 's%d', 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'contentid': video_id } ] }).encode() - video_json = self._download_json( - self._LIGHT_URL, video_id, - 'Downloading video JSON', data=data, query={ + try: + from Crypto.Cipher import Blowfish + from Crypto.Hash import CMAC + + timestamp = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode())['result'] + + data = data % 353 + query = { 'ts': timestamp, 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), - }) + } + except ImportError: + data = data % 183 + query = {} + + video_json = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading video JSON', data=data, query=query) error = video_json.get('error') if error: @@ -121,6 +125,8 @@ class IviIE(InfoExtractor): msg=error['message'], countries=self._GEO_COUNTRIES) elif origin == 'NoRedisValidData': raise ExtractorError('Video %s does not exist' % video_id, expected=True) + elif origin == 'NotAllowedError': + raise ExtractorError('pycryptodome not found. Please install it.', expected=True) raise ExtractorError( 'Unable to download video %s: %s' % (video_id, error['message']), expected=True) From 7360c06facfd96ee603ad4fc27f5903d3f8f6694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Nov 2019 05:44:14 +0700 Subject: [PATCH 0966/1201] [extractor/common] Add data, headers and query to all major extract methods preserving standard order for potential future use --- youtube_dl/extractor/common.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4c2f9303e..04d676378 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1455,14 +1455,14 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, m3u8_id=None): + fatal=True, m3u8_id=None, data=None, headers={}, query={}): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source, - fatal=fatal) + fatal=fatal, data=data, headers=headers, query=query) if manifest is False: return [] @@ -1586,12 +1586,13 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True, live=False, headers={}): + fatal=True, live=False, data=None, headers={}, + query={}): res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', - fatal=fatal, headers=headers) + fatal=fatal, data=data, headers=headers, query=query) if res is False: return [] @@ -2009,12 +2010,12 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, headers={}): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', - fatal=fatal, headers=headers) + fatal=fatal, data=data, headers=headers, query=query) if res is False: return [] mpd_doc, urlh = res @@ -2317,12 +2318,12 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats - def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): + def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', - fatal=fatal) + fatal=fatal, data=data, headers=headers, query=query) if res is False: return [] ism_doc, urlh = res From 6c79785bb0c96d6fc22d942946196f0842d70a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Nov 2019 07:47:23 +0700 Subject: [PATCH 0967/1201] [travis] Add python 3.8 build --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.travis.yml b/.travis.yml index 6d16c2955..14d95fa84 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,12 @@ matrix: - python: 3.7 dist: xenial env: YTDL_TEST_SET=download + - python: 3.8 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8 + dist: xenial + env: YTDL_TEST_SET=download - python: 3.8-dev dist: xenial env: YTDL_TEST_SET=core From 9e4e864639bf606a1931a684f130e219e869adfd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 16 Nov 2019 01:51:31 +0100 Subject: [PATCH 0968/1201] [ivi] improve error detection --- youtube_dl/extractor/ivi.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 1dcb17c9b..7f1146d95 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -119,17 +119,20 @@ class IviIE(InfoExtractor): error = video_json.get('error') if error: - origin = error['origin'] + origin = error.get('origin') + message = error.get('message') or error.get('user_message') + extractor_msg = 'Unable to download video %s' if origin == 'NotAllowedForLocation': - self.raise_geo_restricted( - msg=error['message'], countries=self._GEO_COUNTRIES) + self.raise_geo_restricted(message, self._GEO_COUNTRIES) elif origin == 'NoRedisValidData': - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - elif origin == 'NotAllowedError': - raise ExtractorError('pycryptodome not found. Please install it.', expected=True) - raise ExtractorError( - 'Unable to download video %s: %s' % (video_id, error['message']), - expected=True) + extractor_msg = 'Video %s does not exist' + elif message: + if 'недоступен для просмотра на площадке s183' in message: + raise ExtractorError( + 'pycryptodome not found. Please install it.', + expected=True) + extractor_msg += ': ' + message + raise ExtractorError(extractor_msg % video_id, expected=True) result = video_json['result'] title = result['title'] From 7e70620a342c57746812d4a8fae6f436bd90cf57 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 18 Nov 2019 12:51:25 +0100 Subject: [PATCH 0969/1201] [vk] fix wall audio thumbnails extraction(closes #23135) --- youtube_dl/extractor/vk.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 4c8ca4f41..195875938 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -634,14 +634,15 @@ class VKWallPostIE(VKBaseIE): if not a.url: continue title = unescapeHTML(a.title) + performer = unescapeHTML(a.performer) entries.append({ 'id': '%s_%s' % (a.owner_id, a.id), 'url': self._unmask_url(a.url, a.ads['vk_id']), - 'title': '%s - %s' % (a.performer, title) if a.performer else title, - 'thumbnail': a.cover_url.split(',') if a.cover_url else None, - 'duration': a.duration, + 'title': '%s - %s' % (performer, title) if performer else title, + 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None, + 'duration': int_or_none(a.duration), 'uploader': uploader, - 'artist': a.performer, + 'artist': performer, 'track': title, 'ext': 'mp4', 'protocol': 'm3u8', From f9c4a4521068a02c583803ea422c6fedfa7598e3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 18 Nov 2019 21:40:53 +0100 Subject: [PATCH 0970/1201] [ntvru] add support for non relative file URLs(closes #23140) --- youtube_dl/extractor/ntvru.py | 49 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index 4f9cedb84..c47d1dfa4 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - clean_html, - xpath_text, int_or_none, + strip_or_none, + unescapeHTML, + xpath_text, ) @@ -47,10 +48,10 @@ class NTVRuIE(InfoExtractor): 'duration': 1496, }, }, { - 'url': 'http://www.ntv.ru/kino/Koma_film', - 'md5': 'f825770930937aa7e5aca0dc0d29319a', + 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/', + 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4', 'info_dict': { - 'id': '1007609', + 'id': '1126480', 'ext': 'mp4', 'title': 'Остросюжетный фильм «Кома»', 'description': 'Остросюжетный фильм «Кома»', @@ -68,6 +69,10 @@ class NTVRuIE(InfoExtractor): 'thumbnail': r're:^http://.*\.jpg', 'duration': 2590, }, + }, { + # Schemeless file URL + 'url': 'https://www.ntv.ru/video/1797442', + 'only_matching': True, }] _VIDEO_ID_REGEXES = [ @@ -96,37 +101,31 @@ class NTVRuIE(InfoExtractor): 'http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') - title = clean_html(xpath_text(player, './data/title', 'title', fatal=True)) - description = clean_html(xpath_text(player, './data/description', 'description')) + title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True))) video = player.find('./data/video') - video_id = xpath_text(video, './id', 'video id') - thumbnail = xpath_text(video, './splash', 'thumbnail') - duration = int_or_none(xpath_text(video, './totaltime', 'duration')) - view_count = int_or_none(xpath_text(video, './views', 'view count')) - - token = self._download_webpage( - 'http://stat.ntv.ru/services/access/token', - video_id, 'Downloading access token') formats = [] for format_id in ['', 'hi', 'webm']: - file_ = video.find('./%sfile' % format_id) - if file_ is None: + file_ = xpath_text(video, './%sfile' % format_id) + if not file_: continue - size = video.find('./%ssize' % format_id) + if file_.startswith('//'): + file_ = self._proto_relative_url(file_) + elif not file_.startswith('http'): + file_ = 'http://media.ntv.ru/vod/' + file_ formats.append({ - 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token), - 'filesize': int_or_none(size.text if size is not None else None), + 'url': file_, + 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)), }) self._sort_formats(formats) return { - 'id': video_id, + 'id': xpath_text(video, './id'), 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, + 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))), + 'thumbnail': xpath_text(video, './splash'), + 'duration': int_or_none(xpath_text(video, './totaltime')), + 'view_count': int_or_none(xpath_text(video, './views')), 'formats': formats, } From 76d9eca43dd4fd7698d138b90ab6b2dd159559e0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 19 Nov 2019 20:16:31 +0100 Subject: [PATCH 0971/1201] [ivi] fallback to old extraction method for unknown error codes --- youtube_dl/extractor/ivi.py | 79 +++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 7f1146d95..0db023622 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -93,46 +93,57 @@ class IviIE(InfoExtractor): ] }).encode() - try: - from Crypto.Cipher import Blowfish - from Crypto.Hash import CMAC + for site in (353, 183): + content_data = data % site + if site == 353: + try: + from Cryptodome.Cipher import Blowfish + from Cryptodome.Hash import CMAC + pycryptodomex_found = True + except ImportError: + pycryptodomex_found = False + continue - timestamp = self._download_json( + timestamp = (self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode(), fatal=False) or {}).get('result') + if not timestamp: + continue + + query = { + 'ts': timestamp, + 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(), + } + else: + query = {} + + video_json = self._download_json( self._LIGHT_URL, video_id, - 'Downloading timestamp JSON', data=json.dumps({ - 'method': 'da.timestamp.get', - 'params': [] - }).encode())['result'] + 'Downloading video JSON', data=content_data, query=query) - data = data % 353 - query = { - 'ts': timestamp, - 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), - } - except ImportError: - data = data % 183 - query = {} - - video_json = self._download_json( - self._LIGHT_URL, video_id, - 'Downloading video JSON', data=data, query=query) - - error = video_json.get('error') - if error: - origin = error.get('origin') - message = error.get('message') or error.get('user_message') - extractor_msg = 'Unable to download video %s' - if origin == 'NotAllowedForLocation': - self.raise_geo_restricted(message, self._GEO_COUNTRIES) - elif origin == 'NoRedisValidData': - extractor_msg = 'Video %s does not exist' - elif message: - if 'недоступен для просмотра на площадке s183' in message: + error = video_json.get('error') + if error: + origin = error.get('origin') + message = error.get('message') or error.get('user_message') + extractor_msg = 'Unable to download video %s' + if origin == 'NotAllowedForLocation': + self.raise_geo_restricted(message, self._GEO_COUNTRIES) + elif origin == 'NoRedisValidData': + extractor_msg = 'Video %s does not exist' + elif site == 353: + continue + elif not pycryptodomex_found: raise ExtractorError( 'pycryptodome not found. Please install it.', expected=True) - extractor_msg += ': ' + message - raise ExtractorError(extractor_msg % video_id, expected=True) + elif message: + extractor_msg += ': ' + message + raise ExtractorError(extractor_msg % video_id, expected=True) + else: + break result = video_json['result'] title = result['title'] From f0f6a7e73f55b6227c40af17c6fcab44b5a2df79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Nov 2019 23:21:03 +0700 Subject: [PATCH 0972/1201] [chaturbate] Fix extraction (closes #23010, closes #23012) --- youtube_dl/extractor/chaturbate.py | 42 +++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 656e715ae..a459dcb8d 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -3,7 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + lowercase_escape, + url_or_none, +) class ChaturbateIE(InfoExtractor): @@ -38,12 +42,31 @@ class ChaturbateIE(InfoExtractor): 'https://chaturbate.com/%s/' % video_id, video_id, headers=self.geo_verification_headers()) - m3u8_urls = [] + found_m3u8_urls = [] - for m in re.finditer( - r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): - m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group( - 'url').replace('_fast', '') + data = self._parse_json( + self._search_regex( + r'initialRoomDossier\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data', default='{}', group='value'), + video_id, transform_source=lowercase_escape, fatal=False) + if data: + m3u8_url = url_or_none(data.get('hls_source')) + if m3u8_url: + found_m3u8_urls.append(m3u8_url) + + if not found_m3u8_urls: + for m in re.finditer( + r'(\\u002[27])(?Phttp.+?\.m3u8.*?)\1', webpage): + found_m3u8_urls.append(lowercase_escape(m.group('url'))) + + if not found_m3u8_urls: + for m in re.finditer( + r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): + found_m3u8_urls.append(m.group('url')) + + m3u8_urls = [] + for found_m3u8_url in found_m3u8_urls: + m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '') for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): if m3u8_url not in m3u8_urls: m3u8_urls.append(m3u8_url) @@ -63,7 +86,12 @@ class ChaturbateIE(InfoExtractor): formats = [] for m3u8_url in m3u8_urls: - m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow' + for known_id in ('fast', 'slow'): + if '_%s' % known_id in m3u8_url: + m3u8_id = known_id + break + else: + m3u8_id = None formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, ext='mp4', # ffmpeg skips segments for fast m3u8 From 25d3f770e6ef518a4230ad41bd4ea69dd2e851af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Nov 2019 23:22:59 +0700 Subject: [PATCH 0973/1201] [ivi] Ask for pycryptodomex instead of pycryptodome See discussion at https://github.com/ytdl-org/youtube-dl/commit/1bba88efc7e1f82095f7ae38348e56026db4bf3c#r35982110 --- youtube_dl/extractor/ivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 0db023622..52b53bfeb 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -137,7 +137,7 @@ class IviIE(InfoExtractor): continue elif not pycryptodomex_found: raise ExtractorError( - 'pycryptodome not found. Please install it.', + 'pycryptodomex not found. Please install it.', expected=True) elif message: extractor_msg += ': ' + message From f8015c15746e83394ecc395c6a13823d20971772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Nov 2019 23:38:39 +0700 Subject: [PATCH 0974/1201] [ivi] Fix python 3.4 support --- youtube_dl/extractor/ivi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 52b53bfeb..315ea03fa 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -91,10 +91,10 @@ class IviIE(InfoExtractor): 'contentid': video_id } ] - }).encode() + }) for site in (353, 183): - content_data = data % site + content_data = (data % site).encode() if site == 353: try: from Cryptodome.Cipher import Blowfish From 80a51fc2ef3ebb7d3e3d5fd0b6e9942bb4be6f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Nov 2019 01:10:24 +0700 Subject: [PATCH 0975/1201] [ivi] Skip s353 for bundled exe See https://github.com/Legrandin/pycryptodome/issues/228 --- youtube_dl/extractor/ivi.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 315ea03fa..a502e8806 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json +import re +import sys from .common import InfoExtractor from ..utils import ( @@ -93,9 +94,13 @@ class IviIE(InfoExtractor): ] }) + bundled = hasattr(sys, 'frozen') + for site in (353, 183): content_data = (data % site).encode() if site == 353: + if bundled: + continue try: from Cryptodome.Cipher import Blowfish from Cryptodome.Hash import CMAC @@ -135,6 +140,10 @@ class IviIE(InfoExtractor): extractor_msg = 'Video %s does not exist' elif site == 353: continue + elif bundled: + raise ExtractorError( + 'This feature does not work from bundled exe. Run youtube-dl from sources.', + expected=True) elif not pycryptodomex_found: raise ExtractorError( 'pycryptodomex not found. Please install it.', From fb8dfc5a2772ca35dd65bad7b7565ad6ec1ad4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Nov 2019 01:21:00 +0700 Subject: [PATCH 0976/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/ChangeLog b/ChangeLog index d46d20082..acee2a75a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +version + +Core ++ [extractor/common] Clean jwplayer description HTML tags ++ [extractor/common] Add data, headers and query to all major extract formats + methods + +Extractors +* [chaturbate] Fix extraction (#23010, #23012) ++ [ntvru] Add support for non relative file URLs (#23140) +* [vk] Fix wall audio thumbnails extraction (#23135) +* [ivi] Fix format extraction (#21991) +- [comcarcoff] Remove extractor ++ [drtv] Add support for new URL schema (#23059) ++ [nexx] Add support for Multi Player JS Setup (#23052) ++ [teamcoco] Add support for new videos (#23054) +* [soundcloud] Check if the soundtrack has downloads left (#23045) +* [facebook] Fix posts video data extraction (#22473) +- [addanime] Remove extractor +- [minhateca] Remove extractor +- [daisuki] Remove extractor +* [seeker] Fix extraction +- [revision3] Remove extractors +* [twitch] Fix video comments URL (#18593, #15828) +* [twitter] Improve extraction + + Add support for generic embeds (#22168) + * Always extract http formats for native videos (#14934) + + Add support for Twitter Broadcasts (#21369) + + Extract more metadata + * Improve VMap format extraction + * Unify extraction code for both twitter statuses and cards ++ [twitch] Add support for Clip embed URLs +* [lnkgo] Fix extraction (#16834) +* [mixcloud] Improve extraction + * Improve metadata extraction (#11721) + * Fix playlist extraction (#22378) + * Fix user mixes extraction (#15197, #17865) ++ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384) +* [onionstudios] Fix extraction ++ [hotstar] Pass Referer header to format requests (#22836) +* [dplay] Minimize response size ++ [patreon] Extract uploader_id and filesize +* [patreon] Minimize response size +* [roosterteeth] Fix login request (#16094, #22689) + + version 2019.11.05 Extractors From 0de9fd24dc8723c78a90cb546e4a05818304521e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 Nov 2019 01:24:27 +0700 Subject: [PATCH 0977/1201] release 2019.11.22 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 10 ++-------- youtube_dl/version.py | 2 +- 8 files changed, 16 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 12de9add2..d3e11cdcf 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.05 + [debug] youtube-dl version 2019.11.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8a6202cf6..51bf4db3b 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 83f91d5fe..19025ff25 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index be8e70f1e..a381b6979 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.05 + [debug] youtube-dl version 2019.11.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 7544d171c..9c945d5ec 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.05** +- [ ] I've verified that I'm running youtube-dl version **2019.11.22** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index acee2a75a..daaff3eef 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.11.22 Core + [extractor/common] Clean jwplayer description HTML tags diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 536b87479..3dcb026c5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -26,7 +26,6 @@ - **AcademicEarth:Course** - **acast** - **acast:channel** - - **AddAnime** - **ADN**: Anime Digital Network - **AdobeConnect** - **AdobeTV** @@ -175,7 +174,6 @@ - **CNN** - **CNNArticle** - **CNNBlogs** - - **ComCarCoff** - **ComedyCentral** - **ComedyCentralFullEpisodes** - **ComedyCentralShortname** @@ -203,8 +201,6 @@ - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** - - **DaisukiMotto** - - **DaisukiMottoPlaylist** - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -404,6 +400,7 @@ - **Ketnet** - **KhanAcademy** - **KickStarter** + - **KinjaEmbed** - **KinoPoisk** - **KonserthusetPlay** - **kontrtube**: KontrTube.ru - Труба зовёт @@ -485,14 +482,12 @@ - **Mgoon** - **MGTV**: 芒果TV - **MiaoPai** - - **Minhateca** - **MinistryGrid** - **Minoto** - **miomio.tv** - **MiTele**: mitele.es - **mixcloud** - **mixcloud:playlist** - - **mixcloud:stream** - **mixcloud:user** - **Mixer:live** - **Mixer:vod** @@ -723,8 +718,6 @@ - **Restudy** - **Reuters** - **ReverbNation** - - **revision** - - **revision3:embed** - **RICE** - **RMCDecouverte** - **RockstarGames** @@ -958,6 +951,7 @@ - **twitch:vod** - **twitter** - **twitter:amplify** + - **twitter:broadcast** - **twitter:card** - **udemy** - **udemy:course** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8012a66db..361809681 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.11.05' +__version__ = '2019.11.22' From cf3c9eafad5e6b83788e15a605aa6804b1ab307c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 Nov 2019 00:03:51 +0700 Subject: [PATCH 0978/1201] [soundcloud] Update client id (closes #23214) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e8ffb2cbe..988dec4fa 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -245,7 +245,7 @@ class SoundcloudIE(InfoExtractor): _API_BASE = 'https://api.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' - _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' + _CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _ARTWORK_MAP = { From 9d30c2132acf2d12bfa8e559987c341c76d9cd24 Mon Sep 17 00:00:00 2001 From: InfernalUnderling <42065091+InfernalUnderling@users.noreply.github.com> Date: Tue, 26 Nov 2019 17:08:37 +0000 Subject: [PATCH 0979/1201] [utils] Handle rd-suffixed day parts in unified_strdate (#23199) --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3920542bb..0db37d9d8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -340,6 +340,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') + self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103') + self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index aed988b88..0d30075aa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1718,13 +1718,16 @@ DATE_FORMATS = ( '%B %d %Y', '%B %dst %Y', '%B %dnd %Y', + '%B %drd %Y', '%B %dth %Y', '%b %d %Y', '%b %dst %Y', '%b %dnd %Y', + '%b %drd %Y', '%b %dth %Y', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', + '%b %drd %Y %I:%M', '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', From 6ddd4bf6ac04ae0b8ba39fb4124e844afc49b5a9 Mon Sep 17 00:00:00 2001 From: InfernalUnderling <42065091+InfernalUnderling@users.noreply.github.com> Date: Tue, 26 Nov 2019 17:20:39 +0000 Subject: [PATCH 0980/1201] [bitchute] Extract upload date (closes #22990) (#23193) --- youtube_dl/extractor/bitchute.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 430663fbf..0c773e66e 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor from ..utils import ( orderedSet, + unified_strdate, urlencode_postdata, ) @@ -23,6 +24,7 @@ class BitChuteIE(InfoExtractor): 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Victoria X Rave', + 'upload_date': '20170813', }, }, { 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', @@ -74,12 +76,17 @@ class BitChuteIE(InfoExtractor): r'(?s)]+\bclass=["\']video-author[^>]+>(.+?)

    '), webpage, 'uploader', fatal=False) + upload_date = unified_strdate(self._search_regex( + r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', + webpage, 'upload date', fatal=False)) + return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, + 'upload_date': upload_date, 'formats': formats, } From 1ced222120c00854865c5b16e89838235ed549ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 Nov 2019 02:26:42 +0700 Subject: [PATCH 0981/1201] [utils] Add generic caesar cipher and rot47 --- test/test_utils.py | 16 ++++++++++++++++ youtube_dl/utils.py | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 0db37d9d8..e83c8ea11 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -19,6 +19,7 @@ from youtube_dl.utils import ( age_restricted, args_to_str, encode_base_n, + caesar, clean_html, date_from_str, DateRange, @@ -69,6 +70,7 @@ from youtube_dl.utils import ( remove_start, remove_end, remove_quotes, + rot47, shell_quote, smuggle_url, str_to_int, @@ -1369,6 +1371,20 @@ Line 1 self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) + def test_caesar(self): + self.assertEqual(caesar('ace', 'abcdef', 2), 'cea') + self.assertEqual(caesar('cea', 'abcdef', -2), 'ace') + self.assertEqual(caesar('ace', 'abcdef', -2), 'eac') + self.assertEqual(caesar('eac', 'abcdef', 2), 'ace') + self.assertEqual(caesar('ace', 'abcdef', 0), 'ace') + self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz') + self.assertEqual(caesar('abc', 'acegik', 2), 'ebg') + self.assertEqual(caesar('ebg', 'acegik', -2), 'abc') + + def test_rot47(self): + self.assertEqual(rot47('youtube-dl'), r'J@FEF36\5=') + self.assertEqual(rot47('YOUTUBE-DL'), r'*~&%&qt\s{') + def test_urshift(self): self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0d30075aa..b14603d8a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -5383,6 +5383,19 @@ def decode_packed_codes(code): obfucasted_code) +def caesar(s, alphabet, shift): + if shift == 0: + return s + l = len(alphabet) + return ''.join( + alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c + for c in s) + + +def rot47(s): + return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47) + + def parse_m3u8_attributes(attrib): info = {} for (key, val) in re.findall(r'(?P[A-Z0-9-]+)=(?P"[^"]+"|[^",]+)(?:,|$)', attrib): From edc2a1f68b267abc6b4c94991da4ad83fd8374bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 Nov 2019 02:28:06 +0700 Subject: [PATCH 0982/1201] [vivo] Fix extraction (closes #22328, closes #22279) --- youtube_dl/extractor/shared.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index ff575f592..02295d1a4 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,13 +1,18 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_b64decode +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote_plus, +) from ..utils import ( determine_ext, ExtractorError, int_or_none, + js_to_json, KNOWN_EXTENSIONS, parse_filesize, + rot47, url_or_none, urlencode_postdata, ) @@ -112,16 +117,22 @@ class VivoIE(SharedBaseIE): webpage, 'filesize', fatal=False)) def _extract_video_url(self, webpage, video_id, url): - def decode_url(encoded_url): + def decode_url_old(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') - stream_url = url_or_none(decode_url(self._search_regex( + stream_url = self._search_regex( r'data-stream\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'stream url', default=None, group='url'))) + 'stream url', default=None, group='url') + if stream_url: + stream_url = url_or_none(decode_url_old(stream_url)) if stream_url: return stream_url - return self._parse_json( + + def decode_url(encoded_url): + return rot47(compat_urllib_parse_unquote_plus(encoded_url)) + + return decode_url(self._parse_json( self._search_regex( - r'InitializeStream\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'stream', group='url'), - video_id, transform_source=decode_url)[0] + r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, + 'stream'), + video_id, transform_source=js_to_json)['source']) From df65a4a1ed3096b8210c097c77d00f0391f78503 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 26 Nov 2019 21:53:51 +0100 Subject: [PATCH 0983/1201] [corus] improve extraction - add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com and disneylachaine.ca(closes #20861) - add support for self hosted videos(closes #22075) - detect DRM protection(closes #14910)(closes #9164) --- youtube_dl/extractor/corus.py | 169 ++++++++++++++++++++++------------ 1 file changed, 112 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py index a1b251804..e11aadf14 100644 --- a/youtube_dl/extractor/corus.py +++ b/youtube_dl/extractor/corus.py @@ -4,7 +4,12 @@ from __future__ import unicode_literals import re from .theplatform import ThePlatformFeedIE -from ..utils import int_or_none +from ..utils import ( + dict_get, + ExtractorError, + float_or_none, + int_or_none, +) class CorusIE(ThePlatformFeedIE): @@ -12,24 +17,49 @@ class CorusIE(ThePlatformFeedIE): https?:// (?:www\.)? (?P - (?:globaltv|etcanada)\.com| - (?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca + (?: + globaltv| + etcanada| + seriesplus| + wnetwork| + ytv + )\.com| + (?: + hgtv| + foodnetwork| + slice| + history| + showcase| + bigbrothercanada| + abcspark| + disney(?:channel|lachaine) + )\.ca + ) + /(?:[^/]+/)* + (?: + video\.html\?.*?\bv=| + videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)? + ) + (?P + [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}| + (?:[A-Z]{4})?\d{12,20} ) - /(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=)) - (?P\d+) ''' _TESTS = [{ 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/', - 'md5': '05dcbca777bf1e58c2acbb57168ad3a6', 'info_dict': { 'id': '870923331648', 'ext': 'mp4', 'title': 'Movie Night Popcorn with Bryan', 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.', - 'uploader': 'SHWM-NEW', 'upload_date': '20170206', 'timestamp': 1486392197, }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to parse JSON'], }, { 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753', 'only_matching': True, @@ -48,58 +78,83 @@ class CorusIE(ThePlatformFeedIE): }, { 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/', 'only_matching': True + }, { + 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/', + 'only_matching': True + }, { + 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/', + 'only_matching': True }] - - _TP_FEEDS = { - 'globaltv': { - 'feed_id': 'ChQqrem0lNUp', - 'account_id': 2269680845, - }, - 'etcanada': { - 'feed_id': 'ChQqrem0lNUp', - 'account_id': 2269680845, - }, - 'hgtv': { - 'feed_id': 'L0BMHXi2no43', - 'account_id': 2414428465, - }, - 'foodnetwork': { - 'feed_id': 'ukK8o58zbRmJ', - 'account_id': 2414429569, - }, - 'slice': { - 'feed_id': '5tUJLgV2YNJ5', - 'account_id': 2414427935, - }, - 'history': { - 'feed_id': 'tQFx_TyyEq4J', - 'account_id': 2369613659, - }, - 'showcase': { - 'feed_id': '9H6qyshBZU3E', - 'account_id': 2414426607, - }, - 'bigbrothercanada': { - 'feed_id': 'ChQqrem0lNUp', - 'account_id': 2269680845, - }, + _GEO_BYPASS = False + _SITE_MAP = { + 'globaltv': 'series', + 'etcanada': 'series', + 'foodnetwork': 'food', + 'bigbrothercanada': 'series', + 'disneychannel': 'disneyen', + 'disneylachaine': 'disneyfr', } def _real_extract(self, url): domain, video_id = re.match(self._VALID_URL, url).groups() - feed_info = self._TP_FEEDS[domain.split('.')[0]] - return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: { - 'episode_number': int_or_none(e.get('pl1$episode')), - 'season_number': int_or_none(e.get('pl1$season')), - 'series': e.get('pl1$show'), - }, { - 'HLS': { - 'manifest': 'm3u', - }, - 'DesktopHLS Default': { - 'manifest': 'm3u', - }, - 'MP4 MBR': { - 'manifest': 'm3u', - }, - }, feed_info['account_id']) + site = domain.split('.')[0] + path = self._SITE_MAP.get(site, site) + if path != 'series': + path = 'migration/' + path + video = self._download_json( + 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path, + video_id, query={'byId': video_id}, + headers={'Accept': 'application/json'})[0] + title = video['title'] + + formats = [] + for source in video.get('sources', []): + smil_url = source.get('file') + if not smil_url: + continue + source_type = source.get('type') + note = 'Downloading%s smil file' % (' ' + source_type if source_type else '') + resp = self._download_webpage( + smil_url, video_id, note, fatal=False, + headers=self.geo_verification_headers()) + if not resp: + continue + error = self._parse_json(resp, video_id, fatal=False) + if error: + if error.get('exception') == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=['CA']) + raise ExtractorError(error['description']) + smil = self._parse_xml(resp, video_id, fatal=False) + if smil is None: + continue + namespace = self._parse_smil_namespace(smil) + formats.extend(self._parse_smil_formats( + smil, smil_url, video_id, namespace)) + if not formats and video.get('drm'): + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) + + subtitles = {} + for track in video.get('tracks', []): + track_url = track.get('file') + if not track_url: + continue + lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en' + subtitles.setdefault(lang, []).append({'url': track_url}) + + metadata = video.get('metadata') or {} + get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')), + 'description': video.get('description'), + 'timestamp': int_or_none(video.get('availableDate'), 1000), + 'subtitles': subtitles, + 'duration': float_or_none(metadata.get('duration')), + 'series': dict_get(video, ('show', 'pl1$show')), + 'season_number': get_number('season'), + 'episode_number': get_number('episode'), + } From 5ef62fc4ce1f255343d67b70f3cee2f2240cdfba Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 26 Nov 2019 22:01:34 +0100 Subject: [PATCH 0984/1201] [dailymotion] improve extraction - extract http formats included in m3u8 manifest - fix user extraction(closes #3553)(closes #21415) - add suport for User Authentication(closes #11491) - fix password protected videos extraction(closes #23176) - respect age limit option and family filter cookie value(closes #18437) - handle video url playlist query param - report alowed countries for geo-restricted videos --- youtube_dl/extractor/common.py | 13 + youtube_dl/extractor/dailymotion.py | 559 +++++++++++----------------- youtube_dl/extractor/vk.py | 3 +- 3 files changed, 234 insertions(+), 341 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 04d676378..eaae5e484 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1766,6 +1766,19 @@ class InfoExtractor(object): # the same GROUP-ID f['acodec'] = 'none' formats.append(f) + + # for DailyMotion + progressive_uri = last_stream_inf.get('PROGRESSIVE-URI') + if progressive_uri: + http_f = f.copy() + del http_f['manifest_url'] + http_f.update({ + 'format_id': f['format_id'].replace('hls-', 'http-'), + 'protocol': 'http', + 'url': progressive_uri, + }) + formats.append(http_f) + last_stream_inf = {} return formats diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 745971900..327fdb04a 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,50 +1,93 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import functools -import hashlib -import itertools import json -import random import re -import string from .common import InfoExtractor -from ..compat import compat_struct_pack +from ..compat import compat_HTTPError from ..utils import ( - determine_ext, - error_to_compat_str, + age_restricted, + clean_html, ExtractorError, int_or_none, - mimetype2ext, OnDemandPagedList, - parse_iso8601, - sanitized_Request, - str_to_int, try_get, unescapeHTML, - update_url_query, - url_or_none, urlencode_postdata, ) class DailymotionBaseInfoExtractor(InfoExtractor): + _FAMILY_FILTER = None + _HEADERS = { + 'Content-Type': 'application/json', + 'Origin': 'https://www.dailymotion.com', + } + _NETRC_MACHINE = 'dailymotion' + + def _get_dailymotion_cookies(self): + return self._get_cookies('https://www.dailymotion.com/') + @staticmethod - def _build_request(url): - """Build a request with the family filter disabled""" - request = sanitized_Request(url) - request.add_header('Cookie', 'family_filter=off; ff=off') - return request + def _get_cookie_value(cookies, name): + cookie = cookies.get('name') + if cookie: + return cookie.value - def _download_webpage_handle_no_ff(self, url, *args, **kwargs): - request = self._build_request(url) - return self._download_webpage_handle(request, *args, **kwargs) + def _set_dailymotion_cookie(self, name, value): + self._set_cookie('www.dailymotion.com', name, value) - def _download_webpage_no_ff(self, url, *args, **kwargs): - request = self._build_request(url) - return self._download_webpage(request, *args, **kwargs) + def _real_initialize(self): + cookies = self._get_dailymotion_cookies() + ff = self._get_cookie_value(cookies, 'ff') + self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit')) + self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off') + + def _call_api(self, object_type, xid, object_fields, note, filter_extra=None): + if not self._HEADERS.get('Authorization'): + cookies = self._get_dailymotion_cookies() + token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token') + if not token: + data = { + 'client_id': 'f1a362d288c1b98099c7', + 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5', + } + username, password = self._get_login_info() + if username: + data.update({ + 'grant_type': 'password', + 'password': password, + 'username': username, + }) + else: + data['grant_type'] = 'client_credentials' + try: + token = self._download_json( + 'https://graphql.api.dailymotion.com/oauth/token', + None, 'Downloading Access Token', + data=urlencode_postdata(data))['access_token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + raise ExtractorError(self._parse_json( + e.cause.read().decode(), xid)['error_description'], expected=True) + raise + self._set_dailymotion_cookie('access_token' if username else 'client_token', token) + self._HEADERS['Authorization'] = 'Bearer ' + token + + resp = self._download_json( + 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({ + 'query': '''{ + %s(xid: "%s"%s) { + %s + } +}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields), + }).encode(), headers=self._HEADERS) + obj = resp['data'][object_type] + if not obj: + raise ExtractorError(resp['errors'][0]['message'], expected=True) + return obj class DailymotionIE(DailymotionBaseInfoExtractor): @@ -54,18 +97,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| (?:www\.)?lequipe\.fr/video ) - /(?P[^/?_]+) + /(?P[^/?_]+)(?:.+?\bplaylist=(?Px[0-9a-z]+))? ''' IE_NAME = 'dailymotion' - - _FORMATS = [ - ('stream_h264_ld_url', 'ld'), - ('stream_h264_url', 'standard'), - ('stream_h264_hq_url', 'hq'), - ('stream_h264_hd_url', 'hd'), - ('stream_h264_hd1080_url', 'hd180'), - ] - _TESTS = [{ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', 'md5': '074b95bdee76b9e3654137aee9c79dfe', @@ -74,7 +108,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', - 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', 'duration': 187, 'timestamp': 1493651285, 'upload_date': '20170501', @@ -146,7 +179,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor): }, { 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', 'only_matching': True, + }, { + 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw', + 'only_matching': True, }] + _GEO_BYPASS = False + _COMMON_MEDIA_FIELDS = '''description + geoblockedCountries { + allowed + } + xid''' @staticmethod def _extract_urls(webpage): @@ -162,264 +204,140 @@ class DailymotionIE(DailymotionBaseInfoExtractor): return urls def _real_extract(self, url): - video_id = self._match_id(url) + video_id, playlist_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage_no_ff( - 'https://www.dailymotion.com/video/%s' % video_id, video_id) + if playlist_id: + if not self._downloader.params.get('noplaylist'): + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + 'http://www.dailymotion.com/playlist/' + playlist_id, + 'DailymotionPlaylist', playlist_id) + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - age_limit = self._rta_search(webpage) - - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'description', webpage, 'description') - - view_count_str = self._search_regex( - (r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"', - r'video_views_count[^>]+>\s+([\s\d\,.]+)'), - webpage, 'view count', default=None) - if view_count_str: - view_count_str = re.sub(r'\s', '', view_count_str) - view_count = str_to_int(view_count_str) - comment_count = int_or_none(self._search_regex( - r']+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', - webpage, 'comment count', default=None)) - - player_v5 = self._search_regex( - [r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826 - r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);', - r'var\s+config\s*=\s*({.+?});', - # New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580) - r'__PLAYER_CONFIG__\s*=\s*({.+?});'], - webpage, 'player v5', default=None) - if player_v5: - player = self._parse_json(player_v5, video_id, fatal=False) or {} - metadata = try_get(player, lambda x: x['metadata'], dict) - if not metadata: - metadata_url = url_or_none(try_get( - player, lambda x: x['context']['metadata_template_url1'])) - if metadata_url: - metadata_url = metadata_url.replace(':videoId', video_id) - else: - metadata_url = update_url_query( - 'https://www.dailymotion.com/player/metadata/video/%s' - % video_id, { - 'embedder': url, - 'integration': 'inline', - 'GK_PV5_NEON': '1', - }) - metadata = self._download_json( - metadata_url, video_id, 'Downloading metadata JSON') - - if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': - password = self._downloader.params.get('videopassword') - if password: - r = int(metadata['id'][1:], 36) - us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=') - t = ''.join(random.choice(string.ascii_letters) for i in range(10)) - n = us64e(compat_struct_pack('I', r)) - i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest()) - metadata = self._download_json( - 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id) - - self._check_error(metadata) - - formats = [] - for quality, media_list in metadata['qualities'].items(): - for media in media_list: - media_url = media.get('url') - if not media_url: - continue - type_ = media.get('type') - if type_ == 'application/vnd.lumberjack.manifest': - continue - ext = mimetype2ext(type_) or determine_ext(media_url) - if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - media_url, video_id, 'mp4', preference=-1, - m3u8_id='hls', fatal=False) - for f in m3u8_formats: - f['url'] = f['url'].split('#')[0] - formats.append(f) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - media_url, video_id, preference=-1, f4m_id='hds', fatal=False)) - else: - f = { - 'url': media_url, - 'format_id': 'http-%s' % quality, - 'ext': ext, - } - m = re.search(r'H264-(?P\d+)x(?P\d+)', media_url) - if m: - f.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - formats.append(f) - self._sort_formats(formats) - - title = metadata['title'] - duration = int_or_none(metadata.get('duration')) - timestamp = int_or_none(metadata.get('created_time')) - thumbnail = metadata.get('poster_url') - uploader = metadata.get('owner', {}).get('screenname') - uploader_id = metadata.get('owner', {}).get('id') - - subtitles = {} - subtitles_data = metadata.get('subtitles', {}).get('data', {}) - if subtitles_data and isinstance(subtitles_data, dict): - for subtitle_lang, subtitle in subtitles_data.items(): - subtitles[subtitle_lang] = [{ - 'ext': determine_ext(subtitle_url), - 'url': subtitle_url, - } for subtitle_url in subtitle.get('urls', [])] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'age_limit': age_limit, - 'view_count': view_count, - 'comment_count': comment_count, - 'formats': formats, - 'subtitles': subtitles, - } - - # vevo embed - vevo_id = self._search_regex( - r'[\w]*)', - webpage, 'vevo embed', default=None) - if vevo_id: - return self.url_result('vevo:%s' % vevo_id, 'Vevo') - - # fallback old player - embed_page = self._download_webpage_no_ff( - 'https://www.dailymotion.com/embed/video/%s' % video_id, - video_id, 'Downloading embed page') - - timestamp = parse_iso8601(self._html_search_meta( - 'video:release_date', webpage, 'upload date')) - - info = self._parse_json( - self._search_regex( - r'var info = ({.*?}),$', embed_page, - 'video info', flags=re.MULTILINE), - video_id) - - self._check_error(info) - - formats = [] - for (key, format_id) in self._FORMATS: - video_url = info.get(key) - if video_url is not None: - m_size = re.search(r'H264-(\d+)x(\d+)', video_url) - if m_size is not None: - width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) - else: - width, height = None, None - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'format_id': format_id, - 'width': width, - 'height': height, - }) - self._sort_formats(formats) - - # subtitles - video_subtitles = self.extract_subtitles(video_id, webpage) - - title = self._og_search_title(webpage, default=None) - if title is None: - title = self._html_search_regex( - r'(?s)]*>(.*?)', webpage, - 'title') - - return { - 'id': video_id, - 'formats': formats, - 'uploader': info['owner.screenname'], - 'timestamp': timestamp, - 'title': title, - 'description': description, - 'subtitles': video_subtitles, - 'thumbnail': info['thumbnail_url'], - 'age_limit': age_limit, - 'view_count': view_count, - 'duration': info['duration'] + password = self._downloader.params.get('videopassword') + media = self._call_api( + 'media', video_id, '''... on Video { + %s + stats { + likes { + total } + views { + total + } + } + } + ... on Live { + %s + audienceCount + isOnAir + }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata', + 'password: "%s"' % self._downloader.params.get('videopassword') if password else None) + xid = media['xid'] - def _check_error(self, info): - error = info.get('error') + metadata = self._download_json( + 'https://www.dailymotion.com/player/metadata/video/' + xid, + xid, 'Downloading metadata JSON', + query={'app': 'com.dailymotion.neon'}) + + error = metadata.get('error') if error: - title = error.get('title') or error['message'] + title = error.get('title') or error['raw_message'] # See https://developer.dailymotion.com/api#access-error if error.get('code') == 'DM007': - self.raise_geo_restricted(msg=title) + allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list) + self.raise_geo_restricted(msg=title, countries=allowed_countries) raise ExtractorError( '%s said: %s' % (self.IE_NAME, title), expected=True) - def _get_subtitles(self, video_id, webpage): - try: - sub_list = self._download_webpage( - 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, - video_id, note=False) - except ExtractorError as err: - self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err)) - return {} - info = json.loads(sub_list) - if (info['total'] > 0): - sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) - return sub_lang_list - self._downloader.report_warning('video doesn\'t have subtitles') - return {} + title = metadata['title'] + is_live = media.get('isOnAir') + formats = [] + for quality, media_list in metadata['qualities'].items(): + for m in media_list: + media_url = m.get('url') + media_type = m.get('type') + if not media_url or media_type == 'application/vnd.lumberjack.manifest': + continue + if media_type == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', + 'm3u8' if is_live else 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + f = { + 'url': media_url, + 'format_id': 'http-' + quality, + } + m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url) + if m: + width, height, fps = map(int_or_none, m.groups()) + f.update({ + 'fps': fps, + 'height': height, + 'width': width, + }) + formats.append(f) + for f in formats: + f['url'] = f['url'].split('#')[0] + if not f.get('fps') and f['format_id'].endswith('@60'): + f['fps'] = 60 + self._sort_formats(formats) + + subtitles = {} + subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {} + for subtitle_lang, subtitle in subtitles_data.items(): + subtitles[subtitle_lang] = [{ + 'url': subtitle_url, + } for subtitle_url in subtitle.get('urls', [])] + + thumbnails = [] + for height, poster_url in metadata.get('posters', {}).items(): + thumbnails.append({ + 'height': int_or_none(height), + 'id': height, + 'url': poster_url, + }) + + owner = metadata.get('owner') or {} + stats = media.get('stats') or {} + get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total'])) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': clean_html(media.get('description')), + 'thumbnails': thumbnails, + 'duration': int_or_none(metadata.get('duration')) or None, + 'timestamp': int_or_none(metadata.get('created_time')), + 'uploader': owner.get('screenname'), + 'uploader_id': owner.get('id') or metadata.get('screenname'), + 'age_limit': 18 if metadata.get('explicit') else 0, + 'tags': metadata.get('tags'), + 'view_count': get_count('view') or int_or_none(media.get('audienceCount')), + 'like_count': get_count('like'), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + } -class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): - IE_NAME = 'dailymotion:playlist' - _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?Px[0-9a-z]+)' - _TESTS = [{ - 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', - 'info_dict': { - 'title': 'SPORT', - 'id': 'xv4bw', - }, - 'playlist_mincount': 20, - }] +class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor): _PAGE_SIZE = 100 - def _fetch_page(self, playlist_id, authorizaion, page): + def _fetch_page(self, playlist_id, page): page += 1 - videos = self._download_json( - 'https://graphql.api.dailymotion.com', - playlist_id, 'Downloading page %d' % page, - data=json.dumps({ - 'query': '''{ - collection(xid: "%s") { - videos(first: %d, page: %d) { - pageInfo { - hasNextPage - nextPage - } + videos = self._call_api( + self._OBJECT_TYPE, playlist_id, + '''videos(allowExplicit: %s, first: %d, page: %d) { edges { node { xid url } } - } - } -}''' % (playlist_id, self._PAGE_SIZE, page) - }).encode(), headers={ - 'Authorization': authorizaion, - 'Origin': 'https://www.dailymotion.com', - })['data']['collection']['videos'] + }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page), + 'Downloading page %d' % page)['videos'] for edge in videos['edges']: node = edge['node'] yield self.url_result( @@ -427,86 +345,49 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - api = self._parse_json(self._search_regex( - r'__PLAYER_CONFIG__\s*=\s*({.+?});', - webpage, 'player config'), playlist_id)['context']['api'] - auth = self._download_json( - api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'), - playlist_id, data=urlencode_postdata({ - 'client_id': api.get('client_id', 'f1a362d288c1b98099c7'), - 'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'), - 'grant_type': 'client_credentials', - })) - authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token']) entries = OnDemandPagedList(functools.partial( - self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE) + self._fetch_page, playlist_id), self._PAGE_SIZE) return self.playlist_result( - entries, playlist_id, - self._og_search_title(webpage)) + entries, playlist_id) -class DailymotionUserIE(DailymotionBaseInfoExtractor): +class DailymotionPlaylistIE(DailymotionPlaylistBaseIE): + IE_NAME = 'dailymotion:playlist' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?Px[0-9a-z]+)' + _TESTS = [{ + 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', + 'info_dict': { + 'id': 'xv4bw', + }, + 'playlist_mincount': 20, + }] + _OBJECT_TYPE = 'collection' + + +class DailymotionUserIE(DailymotionPlaylistBaseIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P[^/]+)' - _MORE_PAGES_INDICATOR = r'(?s)
    .*?[^/]+)' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', 'info_dict': { 'id': 'nqtv', - 'title': 'Rémi Gaillard', }, - 'playlist_mincount': 100, + 'playlist_mincount': 152, }, { 'url': 'http://www.dailymotion.com/user/UnderProject', 'info_dict': { 'id': 'UnderProject', - 'title': 'UnderProject', }, - 'playlist_mincount': 1800, - 'expected_warnings': [ - 'Stopped at duplicated page', - ], + 'playlist_mincount': 1000, 'skip': 'Takes too long time', + }, { + 'url': 'https://www.dailymotion.com/user/nqtv', + 'info_dict': { + 'id': 'nqtv', + }, + 'playlist_mincount': 148, + 'params': { + 'age_limit': 0, + }, }] - - def _extract_entries(self, id): - video_ids = set() - processed_urls = set() - for pagenum in itertools.count(1): - page_url = self._PAGE_TEMPLATE % (id, pagenum) - webpage, urlh = self._download_webpage_handle_no_ff( - page_url, id, 'Downloading page %s' % pagenum) - if urlh.geturl() in processed_urls: - self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( - page_url, urlh.geturl()), id) - break - - processed_urls.add(urlh.geturl()) - - for video_id in re.findall(r'data-xid="(.+?)"', webpage): - if video_id not in video_ids: - yield self.url_result( - 'http://www.dailymotion.com/video/%s' % video_id, - DailymotionIE.ie_key(), video_id) - video_ids.add(video_id) - - if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: - break - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - user = mobj.group('user') - webpage = self._download_webpage( - 'https://www.dailymotion.com/user/%s' % user, user) - full_user = unescapeHTML(self._html_search_regex( - r'' % re.escape(user), - webpage, 'user')) - - return { - '_type': 'playlist', - 'id': user, - 'title': full_user, - 'entries': self._extract_entries(user), - } + _OBJECT_TYPE = 'channel' diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 195875938..a5e4a3e67 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -216,8 +216,7 @@ class VKIE(VKBaseIE): 'id': 'k3lz2cmXyRuJQSjGHUv', 'ext': 'mp4', 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', - # TODO: fix test by fixing dailymotion description extraction - 'description': 'md5:c651358f03c56f1150b555c26d90a0fd', + 'description': 'md5:424b8e88cc873217f520e582ba28bb36', 'uploader': 'AniLibria.Tv', 'upload_date': '20160914', 'uploader_id': 'x1p5vl5', From 6471d0d3b8086b282622c84a9eea968d4edfcf9b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 26 Nov 2019 23:57:37 +0100 Subject: [PATCH 0985/1201] [openload] remove OpenLoad related extractors(closes #11999)(closes #15406) --- youtube_dl/extractor/extractors.py | 5 - youtube_dl/extractor/generic.py | 16 -- youtube_dl/extractor/openload.py | 263 ----------------------------- youtube_dl/extractor/streamango.py | 128 -------------- 4 files changed, 412 deletions(-) delete mode 100644 youtube_dl/extractor/streamango.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cf4bb8f20..0e349b778 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -796,10 +796,6 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) -from .openload import ( - OpenloadIE, - VerystreamIE, -) from .ora import OraTVIE from .orf import ( ORFTVthekIE, @@ -1060,7 +1056,6 @@ from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamable import StreamableIE -from .streamango import StreamangoIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3d919f656..743ef47db 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -88,10 +88,6 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE -from .openload import ( - OpenloadIE, - VerystreamIE, -) from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE @@ -3048,18 +3044,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) - # Look for Openload embeds - openload_urls = OpenloadIE._extract_urls(webpage) - if openload_urls: - return self.playlist_from_matches( - openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) - - # Look for Verystream embeds - verystream_urls = VerystreamIE._extract_urls(webpage) - if verystream_urls: - return self.playlist_from_matches( - verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key()) - # Look for VideoPress embeds videopress_urls = VideoPressIE._extract_urls(webpage) if videopress_urls: diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 66e38cdb4..0c20d0177 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -3,21 +3,17 @@ from __future__ import unicode_literals import json import os -import re import subprocess import tempfile -from .common import InfoExtractor from ..compat import ( compat_urlparse, compat_kwargs, ) from ..utils import ( check_executable, - determine_ext, encodeArgument, ExtractorError, - get_element_by_id, get_exe_version, is_outdated_version, std_headers, @@ -240,262 +236,3 @@ class PhantomJSwrapper(object): self._load_cookies() return (html, encodeArgument(out)) - - -class OpenloadIE(InfoExtractor): - _DOMAINS = r''' - (?: - openload\.(?:co|io|link|pw)| - oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)| - oladblock\.(?:services|xyz|me)|openloed\.co - ) - ''' - _VALID_URL = r'''(?x) - https?:// - (?P - (?:www\.)? - %s - )/ - (?:f|embed)/ - (?P[a-zA-Z0-9-_]+) - ''' % _DOMAINS - _EMBED_WORD = 'embed' - _STREAM_WORD = 'f' - _REDIR_WORD = 'stream' - _URL_IDS = ('streamurl', 'streamuri', 'streamurj') - _TESTS = [{ - 'url': 'https://openload.co/f/kUEfGclsU9o', - 'md5': 'bf1c059b004ebc7a256f89408e65c36e', - 'info_dict': { - 'id': 'kUEfGclsU9o', - 'ext': 'mp4', - 'title': 'skyrim_no-audio_1080.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'https://openload.co/embed/rjC09fkPLYs', - 'info_dict': { - 'id': 'rjC09fkPLYs', - 'ext': 'mp4', - 'title': 'movie.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'subtitles': { - 'en': [{ - 'ext': 'vtt', - }], - }, - }, - 'params': { - 'skip_download': True, # test subtitles only - }, - }, { - 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', - 'only_matching': True, - }, { - 'url': 'https://openload.io/f/ZAn6oz-VZGE/', - 'only_matching': True, - }, { - 'url': 'https://openload.co/f/_-ztPaZtMhM/', - 'only_matching': True, - }, { - # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout - # for title and ext - 'url': 'https://openload.co/embed/Sxz5sADo82g/', - 'only_matching': True, - }, { - # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available - # via https://openload.co/f/e-Ixz9ZR5L0/ - 'url': 'https://openload.co/f/e-Ixz9ZR5L0/', - 'only_matching': True, - }, { - 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', - 'only_matching': True, - }, { - 'url': 'http://www.openload.link/f/KnG-kKZdcfY', - 'only_matching': True, - }, { - 'url': 'https://oload.stream/f/KnG-kKZdcfY', - 'only_matching': True, - }, { - 'url': 'https://oload.xyz/f/WwRBpzW8Wtk', - 'only_matching': True, - }, { - 'url': 'https://oload.win/f/kUEfGclsU9o', - 'only_matching': True, - }, { - 'url': 'https://oload.download/f/kUEfGclsU9o', - 'only_matching': True, - }, { - 'url': 'https://oload.cloud/f/4ZDnBXRWiB8', - 'only_matching': True, - }, { - # Its title has not got its extension but url has it - 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', - 'only_matching': True, - }, { - 'url': 'https://oload.cc/embed/5NEAbI2BDSk', - 'only_matching': True, - }, { - 'url': 'https://oload.icu/f/-_i4y_F_Hs8', - 'only_matching': True, - }, { - 'url': 'https://oload.fun/f/gb6G1H4sHXY', - 'only_matching': True, - }, { - 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', - 'only_matching': True, - }, { - 'url': 'https://oload.info/f/5NEAbI2BDSk', - 'only_matching': True, - }, { - 'url': 'https://openload.pw/f/WyKgK8s94N0', - 'only_matching': True, - }, { - 'url': 'https://oload.pw/f/WyKgK8s94N0', - 'only_matching': True, - }, { - 'url': 'https://oload.live/f/-Z58UZ-GR4M', - 'only_matching': True, - }, { - 'url': 'https://oload.space/f/IY4eZSst3u8/', - 'only_matching': True, - }, { - 'url': 'https://oload.services/embed/bs1NWj1dCag/', - 'only_matching': True, - }, { - 'url': 'https://oload.online/f/W8o2UfN1vNY/', - 'only_matching': True, - }, { - 'url': 'https://oload.monster/f/W8o2UfN1vNY/', - 'only_matching': True, - }, { - 'url': 'https://oload.press/embed/drTBl1aOTvk/', - 'only_matching': True, - }, { - 'url': 'https://oload.website/embed/drTBl1aOTvk/', - 'only_matching': True, - }, { - 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', - 'only_matching': True, - }, { - 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', - 'only_matching': True, - }, { - 'url': 'https://oload.best/embed/kkz9JgVZeWc/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.me/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://openloed.co/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oload.vip/f/kUEfGclsU9o', - 'only_matching': True, - }] - - @classmethod - def _extract_urls(cls, webpage): - return re.findall( - r'(?x)]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' - % (cls._DOMAINS, cls._EMBED_WORD), webpage) - - def _extract_decrypted_page(self, page_url, webpage, video_id): - phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) - return webpage - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - video_id = mobj.group('id') - - url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - - for path in (self._EMBED_WORD, self._STREAM_WORD): - page_url = url_pattern % path - last = path == self._STREAM_WORD - webpage = self._download_webpage( - page_url, video_id, 'Downloading %s webpage' % path, - fatal=last) - if not webpage: - continue - if 'File not found' in webpage or 'deleted by the owner' in webpage: - if not last: - continue - raise ExtractorError('File not found', expected=True, video_id=video_id) - break - - webpage = self._extract_decrypted_page(page_url, webpage, video_id) - for element_id in self._URL_IDS: - decoded_id = get_element_by_id(element_id, webpage) - if decoded_id: - break - if not decoded_id: - decoded_id = self._search_regex( - (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', - r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', - r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, - 'stream URL') - video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id) - - title = self._og_search_title(webpage, default=None) or self._search_regex( - r']+class=["\']title["\'][^>]*>([^<]+)', webpage, - 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', fatal=True) - - entries = self._parse_html5_media_entries(page_url, webpage, video_id) - entry = entries[0] if entries else {} - subtitles = entry.get('subtitles') - - return { - 'id': video_id, - 'title': title, - 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), - 'url': video_url, - 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), - 'subtitles': subtitles, - } - - -class VerystreamIE(OpenloadIE): - IE_NAME = 'verystream' - - _DOMAINS = r'(?:verystream\.com|woof\.tube)' - _VALID_URL = r'''(?x) - https?:// - (?P - (?:www\.)? - %s - )/ - (?:stream|e)/ - (?P[a-zA-Z0-9-_]+) - ''' % _DOMAINS - _EMBED_WORD = 'e' - _STREAM_WORD = 'stream' - _REDIR_WORD = 'gettoken' - _URL_IDS = ('videolink', ) - _TESTS = [{ - 'url': 'https://verystream.com/stream/c1GWQ9ngBBx/', - 'md5': 'd3e8c5628ccb9970b65fd65269886795', - 'info_dict': { - 'id': 'c1GWQ9ngBBx', - 'ext': 'mp4', - 'title': 'Big Buck Bunny.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'https://verystream.com/e/c1GWQ9ngBBx/', - 'only_matching': True, - }] - - def _extract_decrypted_page(self, page_url, webpage, video_id): - return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py deleted file mode 100644 index f1e17dd88..000000000 --- a/youtube_dl/extractor/streamango.py +++ /dev/null @@ -1,128 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_chr -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, -) - - -class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', - 'md5': 'e992787515a182f55e38fc97588d802a', - 'info_dict': { - 'id': 'clapasobsptpkdfe', - 'ext': 'mp4', - 'title': '20170315_150006.mp4', - } - }, { - # no og:title - 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4', - 'info_dict': { - 'id': 'foqebrpftarclpob', - 'ext': 'mp4', - 'title': 'foqebrpftarclpob', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'gone', - }, { - 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', - 'only_matching': True, - }, { - 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', - 'only_matching': True, - }, { - 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', - 'only_matching': True, - }] - - def _real_extract(self, url): - def decrypt_src(encoded, val): - ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA' - encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded) - decoded = '' - sm = [None] * 4 - i = 0 - str_len = len(encoded) - while i < str_len: - for j in range(4): - sm[j % 4] = ALPHABET.index(encoded[i]) - i += 1 - char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val - decoded += compat_chr(char_code) - if sm[2] != 0x40: - char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2) - decoded += compat_chr(char_code) - if sm[3] != 0x40: - char_code = ((sm[2] & 0x3) << 0x6) | sm[3] - decoded += compat_chr(char_code) - return decoded - - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage, default=video_id) - - formats = [] - for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage): - mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_) - if mobj is None: - continue - - format_ = format_.replace(mobj.group(0), '') - - video = self._parse_json( - format_, video_id, transform_source=js_to_json, - fatal=False) or {} - - mobj = re.search( - r'([\'"])(?P(?:(?!\1).)+)\1\s*,\s*(?P\d+)', - mobj.group(1)) - if mobj is None: - continue - - src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val'))) - if not src: - continue - - ext = determine_ext(src, default_ext=None) - if video.get('type') == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'url': src, - 'ext': ext or 'mp4', - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'tbr': int_or_none(video.get('bitrate')), - }) - - if not formats: - error = self._search_regex( - r']+\bclass=["\']lead[^>]+>(.+?)

    ', webpage, - 'error', default=None) - if not error and '>Sorry' in webpage: - error = 'Video %s is not available' % video_id - if error: - raise ExtractorError(error, expected=True) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'url': url, - 'title': title, - 'formats': formats, - } From 681ac7c92abbbd55be9796de86c2cc0d1d70a4c9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 27 Nov 2019 13:57:30 +0100 Subject: [PATCH 0986/1201] [vimeo] improve extraction - fix review extraction - fix ondemand extraction - make password protected player case as an expected error(closes #22896) - simplify channel based extractors code --- youtube_dl/extractor/vimeo.py | 177 +++++++++++++++++----------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 9abd59d98..baa46d5f3 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -15,18 +15,20 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + clean_html, determine_ext, + dict_get, ExtractorError, js_to_json, int_or_none, merge_dicts, - NO_DEFAULT, OnDemandPagedList, parse_filesize, RegexNotFoundError, sanitized_Request, smuggle_url, std_headers, + str_or_none, try_get, unified_timestamp, unsmuggle_url, @@ -210,7 +212,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): video_uploader_url = owner.get('url') return { - 'id': video_id, + 'id': str_or_none(video_data.get('id')) or video_id, 'title': self._live_title(video_title) if is_live else video_title, 'uploader': owner.get('name'), 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, @@ -258,11 +260,11 @@ class VimeoIE(VimeoBaseInfoExtractor): (?: (?: www| - (?Pplayer) + player ) \. )? - vimeo(?Ppro)?\.com/ + vimeo(?:pro)?\.com/ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: @@ -284,7 +286,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '56015672', 'ext': 'mp4', 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", - 'description': 'md5:509a9ad5c9bf97c60faee9203aca4479', + 'description': 'md5:2d3305bad981a06ff79f027f19865021', 'timestamp': 1355990239, 'upload_date': '20121220', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', @@ -293,6 +295,9 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 10, 'license': 'by-sa', }, + 'params': { + 'format': 'best[protocol=https]', + }, }, { 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', @@ -305,8 +310,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', - 'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30', + 'description': 'md5:2c362968038d4499f4d79f88458590c1', 'duration': 1595, + 'upload_date': '20130610', + 'timestamp': 1370893156, + }, + 'params': { + 'format': 'best[protocol=https]', }, }, { @@ -323,6 +333,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 3610, 'description': None, }, + 'params': { + 'format': 'best[protocol=https]', + }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://vimeo.com/68375962', @@ -341,6 +355,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', }, 'params': { + 'format': 'best[protocol=https]', 'videopassword': 'youtube-dl', }, }, @@ -441,10 +456,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': '10Ft Films', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms', 'uploader_id': 'tenfootfilms', + 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384', + 'upload_date': '20130830', + 'timestamp': 1377853339, }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://player.vimeo.com/video/68375962', @@ -459,6 +478,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 10, }, 'params': { + 'format': 'best[protocol=https]', 'videopassword': 'youtube-dl', }, }, @@ -523,7 +543,7 @@ class VimeoIE(VimeoBaseInfoExtractor): def _verify_player_video_password(self, url, video_id, headers): password = self._downloader.params.get('videopassword') if password is None: - raise ExtractorError('This video is protected by a password, use the --video-password option') + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) data = urlencode_postdata({ 'password': base64.b64encode(password.encode()), }) @@ -552,28 +572,26 @@ class VimeoIE(VimeoBaseInfoExtractor): r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) # Extract ID from URL - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) orig_url = url - if mobj.group('pro'): + is_pro = 'vimeopro.com/' in url + is_player = '://player.vimeo.com/video/' in url + if is_pro: # some videos require portfolio_id to be present in player url # https://github.com/ytdl-org/youtube-dl/issues/20070 url = self._extract_url(url, self._download_webpage(url, video_id)) - elif mobj.group('player'): + if not url: + url = 'https://vimeo.com/' + video_id + elif is_player: url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id - # Retrieve video webpage to extract further information - request = sanitized_Request(url, headers=headers) try: - webpage, urlh = self._download_webpage_handle(request, video_id) + # Retrieve video webpage to extract further information + webpage, urlh = self._download_webpage_handle( + url, video_id, headers=headers) redirect_url = compat_str(urlh.geturl()) - # Some URLs redirect to ondemand can't be extracted with - # this extractor right away thus should be passed through - # ondemand extractor (e.g. https://vimeo.com/73445910) - if VimeoOndemandIE.suitable(redirect_url): - return self.url_result(redirect_url, VimeoOndemandIE.ie_key()) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: errmsg = ee.cause.read() @@ -600,6 +618,7 @@ class VimeoIE(VimeoBaseInfoExtractor): cc_license = None timestamp = None + video_description = None # Extract the config JSON try: @@ -611,17 +630,17 @@ class VimeoIE(VimeoBaseInfoExtractor): # Sometimes new react-based page is served instead of old one that require # different config URL extraction approach (see # https://github.com/ytdl-org/youtube-dl/pull/7209) - vimeo_clip_page_config = self._search_regex( - r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, - 'vimeo clip page config') - page_config = self._parse_json(vimeo_clip_page_config, video_id) + page_config = self._parse_json(self._search_regex( + r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', + webpage, 'page config'), video_id) config_url = page_config['player']['config_url'] cc_license = page_config.get('cc_license') timestamp = try_get( page_config, lambda x: x['clip']['uploaded_on'], compat_str) - config_json = self._download_webpage(config_url, video_id) - config = json.loads(config_json) + video_description = clean_html(dict_get( + page_config, ('description', 'description_html_escaped'))) + config = self._download_json(config_url, video_id) except RegexNotFoundError: # For pro videos or player.vimeo.com urls # We try to find out to which variable is assigned the config dic @@ -675,14 +694,14 @@ class VimeoIE(VimeoBaseInfoExtractor): {'force_feature_id': True}), 'Vimeo') # Extract video description - - video_description = self._html_search_regex( - r'(?s)]*>(.*?)
    ', - webpage, 'description', default=None) + if not video_description: + video_description = self._html_search_regex( + r'(?s)]*>(.*?)
    ', + webpage, 'description', default=None) if not video_description: video_description = self._html_search_meta( 'description', webpage, default=None) - if not video_description and mobj.group('pro'): + if not video_description and is_pro: orig_webpage = self._download_webpage( orig_url, video_id, note='Downloading webpage for description', @@ -690,7 +709,7 @@ class VimeoIE(VimeoBaseInfoExtractor): if orig_webpage: video_description = self._html_search_meta( 'description', orig_webpage, default=None) - if not video_description and not mobj.group('player'): + if not video_description and not is_player: self._downloader.report_warning('Cannot find video description') # Extract upload date @@ -747,9 +766,9 @@ class VimeoIE(VimeoBaseInfoExtractor): return info_dict -class VimeoOndemandIE(VimeoBaseInfoExtractor): +class VimeoOndemandIE(VimeoIE): IE_NAME = 'vimeo:ondemand' - _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P[^/?#&]+)' _TESTS = [{ # ondemand video not available via https://vimeo.com/id 'url': 'https://vimeo.com/ondemand/20704', @@ -761,24 +780,32 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader': 'גם סרטים', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', + 'description': 'md5:4c027c965e439de4baab621e48b60791', + 'upload_date': '20140906', + 'timestamp': 1410032453, }, 'params': { 'format': 'best[protocol=https]', }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { # requires Referer to be passed along with og:video:url 'url': 'https://vimeo.com/ondemand/36938/126682985', 'info_dict': { - 'id': '126682985', + 'id': '126584684', 'ext': 'mp4', 'title': 'Rävlock, rätt läte på rätt plats', 'uploader': 'Lindroth & Norin', - 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847', - 'uploader_id': 'user14430847', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin', + 'uploader_id': 'lindrothnorin', + 'description': 'md5:c3c46a90529612c8279fb6af803fc0df', + 'upload_date': '20150502', + 'timestamp': 1430586422, }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'https://vimeo.com/ondemand/nazmaalik', 'only_matching': True, @@ -790,16 +817,6 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - return self.url_result( - # Some videos require Referer to be passed along with og:video:url - # similarly to generic vimeo embeds (e.g. - # https://vimeo.com/ondemand/36938/126682985). - VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url), - VimeoIE.ie_key()) - class VimeoChannelIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:channel' @@ -815,6 +832,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): }, 'playlist_mincount': 25, }] + _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s' def _page_url(self, base_url, pagenum): return '%s/videos/page:%d/' % (base_url, pagenum) @@ -886,14 +904,13 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return self.playlist_result(title_and_entries, list_id, list_title) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id) + channel_id = self._match_id(url) + return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id) class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ 'url': 'https://vimeo.com/nkistudio/videos', @@ -903,11 +920,7 @@ class VimeoUserIE(VimeoChannelIE): }, 'playlist_mincount': 66, }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - return self._extract_videos(name, 'https://vimeo.com/%s' % name) + _BASE_URL_TEMPL = 'https://vimeo.com/%s' class VimeoAlbumIE(VimeoChannelIE): @@ -969,25 +982,18 @@ class VimeoAlbumIE(VimeoChannelIE): r'\s*(.+?)(?:\s+on Vimeo)?', webpage, 'title', fatal=False)) -class VimeoGroupsIE(VimeoAlbumIE): +class VimeoGroupsIE(VimeoChannelIE): IE_NAME = 'vimeo:group' - _VALID_URL = r'https://vimeo\.com/groups/(?P[^/]+)(?:/(?!videos?/\d+)|$)' + _VALID_URL = r'https://vimeo\.com/groups/(?P[^/]+)(?:/(?!videos?/\d+)|$)' _TESTS = [{ - 'url': 'https://vimeo.com/groups/rolexawards', + 'url': 'https://vimeo.com/groups/kattykay', 'info_dict': { - 'id': 'rolexawards', - 'title': 'Rolex Awards for Enterprise', + 'id': 'kattykay', + 'title': 'Katty Kay', }, - 'playlist_mincount': 73, + 'playlist_mincount': 27, }] - - def _extract_list_title(self, webpage): - return self._og_search_title(webpage, fatal=False) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name) + _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s' class VimeoReviewIE(VimeoBaseInfoExtractor): @@ -1003,7 +1009,9 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'title': "DICK HARDWICK 'Comedian'", 'uploader': 'Richard Hardwick', 'uploader_id': 'user21297594', - } + 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks", + }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'note': 'video player needs Referer', 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', @@ -1016,7 +1024,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'duration': 2773, 'thumbnail': r're:^https?://.*\.jpg$', 'uploader_id': 'user22258446', - } + }, + 'skip': 'video gone', }, { 'note': 'Password protected', 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', @@ -1036,32 +1045,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _real_initialize(self): self._login() - def _get_config_url(self, webpage_url, video_id, video_password_verified=False): - webpage = self._download_webpage(webpage_url, video_id) - config_url = self._html_search_regex( - r'data-config-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'config URL', default=None, group='url') - if not config_url: - data = self._parse_json(self._search_regex( - r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', - default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config = data.get('vimeo_esi', {}).get('config', {}) - config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl']) - if config_url is None: - self._verify_video_password(webpage_url, video_id, webpage) - config_url = self._get_config_url( - webpage_url, video_id, video_password_verified=True) - return config_url - def _real_extract(self, url): page_url, video_id = re.match(self._VALID_URL, url).groups() - config_url = self._get_config_url(url, video_id) + clip_data = self._download_json( + page_url.replace('/review/', '/review/data/'), + video_id)['clipData'] + config_url = clip_data['configUrl'] config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) - source_format = self._extract_original_format(page_url, video_id) + source_format = self._extract_original_format( + page_url + '/action', video_id) if source_format: info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) + info_dict['description'] = clean_html(clip_data.get('description')) return info_dict From e3f00f139fc227217325c8e84e0b340e12ee9bb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 28 Nov 2019 23:09:48 +0700 Subject: [PATCH 0987/1201] [ChangeLog] Actualize [ci skip] --- ChangeLog | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ChangeLog b/ChangeLog index daaff3eef..d724d75ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,36 @@ +version + +Core ++ [utils] Add generic caesar cipher and rot47 +* [utils] Handle rd-suffixed day parts in unified_strdate (#23199) + +Extractors +* [vimeo] Improve extraction + * Fix review extraction + * Fix ondemand extraction + * Make password protected player case as an expected error (#22896) + * Simplify channel based extractors code +- [openload] Remove extractor (#11999) +- [verystream] Remove extractor +- [streamango] Remove extractor (#15406) +* [dailymotion] Improve extraction + * Extract http formats included in m3u8 manifest + * Fix user extraction (#3553, #21415) + + Add suport for User Authentication (#11491) + * Fix password protected videos extraction (#23176) + * Respect age limit option and family filter cookie value (#18437) + * Handle video url playlist query param + * Report allowed countries for geo-restricted videos +* [corus] Improve extraction + + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com + and disneylachaine.ca (#20861) + + Add support for self hosted videos (#22075) + * Detect DRM protection (#14910, #9164) +* [vivo] Fix extraction (#22328, #22279) ++ [bitchute] Extract upload date (#22990, #23193) +* [soundcloud] Update client id (#23214) + + version 2019.11.22 Core From b568561eba6f4aceb87419e21aba11567c5de7da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 28 Nov 2019 23:25:25 +0700 Subject: [PATCH 0988/1201] release 2019.11.28 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 3 --- youtube_dl/version.py | 2 +- 8 files changed, 14 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d3e11cdcf..3a94bd621 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.22 + [debug] youtube-dl version 2019.11.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 51bf4db3b..72bee12aa 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 19025ff25..ddf67e951 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a381b6979..7122e2714 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.11.22 + [debug] youtube-dl version 2019.11.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 9c945d5ec..a93882b39 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.11.22** +- [ ] I've verified that I'm running youtube-dl version **2019.11.28** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index d724d75ce..d4f809fc6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.11.28 Core + [utils] Add generic caesar cipher and rot47 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3dcb026c5..2744dfca8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -618,7 +618,6 @@ - **OnionStudios** - **Ooyala** - **OoyalaExternal** - - **Openload** - **OraTV** - **orf:fm4**: radio FM4 - **orf:fm4:story**: fm4.orf.at stories @@ -825,7 +824,6 @@ - **Steam** - **Stitcher** - **Streamable** - - **Streamango** - **streamcloud.eu** - **StreamCZ** - **StreetVoice** @@ -976,7 +974,6 @@ - **Vbox7** - **VeeHD** - **Veoh** - - **verystream** - **Vesti**: Вести.Ru - **Vevo** - **VevoPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 361809681..1227abc0a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.11.22' +__version__ = '2019.11.28' From 348c6bf1c1a00eec323d6e21ff7b9b12699afe04 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:05:06 +0100 Subject: [PATCH 0989/1201] [utils] handle int values passed to str_to_int --- test/test_utils.py | 1 + youtube_dl/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index e83c8ea11..fed94a906 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -499,6 +499,7 @@ class TestUtil(unittest.TestCase): def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) + self.assertEqual(str_to_int(523), 523) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b14603d8a..328f037a8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3519,8 +3519,8 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if int_str is None: - return None + if not isinstance(int_str, compat_str): + return int_str int_str = re.sub(r'[,\.\+]', '', int_str) return int(int_str) From 7f641d2c7a68b70d6c1e273af108741e5779bc28 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:06:34 +0100 Subject: [PATCH 0990/1201] [adobetv] improve extaction - use OnDemandPagedList for list extractors - reduce show extraction requests - extract original video format and subtitles - add support for adobe tv embeds --- youtube_dl/extractor/adobetv.py | 239 ++++++++++++++++++++--------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 166 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 008c98e51..80060f037 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -1,25 +1,119 @@ from __future__ import unicode_literals +import functools import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_duration, - unified_strdate, - str_to_int, - int_or_none, float_or_none, + int_or_none, ISO639Utils, - determine_ext, + OnDemandPagedList, + parse_duration, + str_or_none, + str_to_int, + unified_strdate, ) class AdobeTVBaseIE(InfoExtractor): - _API_BASE_URL = 'http://tv.adobe.com/api/v4/' + def _call_api(self, path, video_id, query, note=None): + return self._download_json( + 'http://tv.adobe.com/api/v4/' + path, + video_id, note, query=query)['data'] + + def _parse_subtitles(self, video_data, url_key): + subtitles = {} + for translation in video_data.get('translations', []): + vtt_path = translation.get(url_key) + if not vtt_path: + continue + lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) + subtitles.setdefault(lang, []).append({ + 'ext': 'vtt', + 'url': vtt_path, + }) + return subtitles + + def _parse_video_data(self, video_data): + video_id = compat_str(video_data['id']) + title = video_data['title'] + + s3_extracted = False + formats = [] + for source in video_data.get('videos', []): + source_url = source.get('url') + if not source_url: + continue + f = { + 'format_id': source.get('quality_level'), + 'fps': int_or_none(source.get('frame_rate')), + 'height': int_or_none(source.get('height')), + 'tbr': int_or_none(source.get('video_data_rate')), + 'width': int_or_none(source.get('width')), + 'url': source_url, + } + original_filename = source.get('original_filename') + if original_filename: + if not (f.get('height') and f.get('width')): + mobj = re.search(r'_(\d+)x(\d+)', original_filename) + if mobj: + f.update({ + 'height': int(mobj.group(2)), + 'width': int(mobj.group(1)), + }) + if original_filename.startswith('s3://') and not s3_extracted: + formats.append({ + 'format_id': 'original', + 'preference': 1, + 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), + }) + s3_extracted = True + formats.append(f) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnail'), + 'upload_date': unified_strdate(video_data.get('start_date')), + 'duration': parse_duration(video_data.get('duration')), + 'view_count': str_to_int(video_data.get('playcount')), + 'formats': formats, + 'subtitles': self._parse_subtitles(video_data, 'vtt'), + } + + +class AdobeTVEmbedIE(AdobeTVBaseIE): + IE_NAME = 'adobetv:embed' + _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P\d+)' + _TEST = { + 'url': 'https://tv.adobe.com/embed/22/4153', + 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', + 'info_dict': { + 'id': '4153', + 'ext': 'flv', + 'title': 'Creating Graphics Optimized for BlackBerry', + 'description': 'md5:eac6e8dced38bdaae51cd94447927459', + 'thumbnail': r're:https?://.*\.jpg$', + 'upload_date': '20091109', + 'duration': 377, + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._call_api( + 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] + return self._parse_video_data(video_data) class AdobeTVIE(AdobeTVBaseIE): + IE_NAME = 'adobetv' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)' _TEST = { @@ -42,45 +136,33 @@ class AdobeTVIE(AdobeTVBaseIE): if not language: language = 'en' - video_data = self._download_json( - self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname), - urlname)['data'][0] - - formats = [{ - 'url': source['url'], - 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - 'tbr': int_or_none(source.get('video_data_rate')), - } for source in video_data['videos']] - self._sort_formats(formats) - - return { - 'id': compat_str(video_data['id']), - 'title': video_data['title'], - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail'), - 'upload_date': unified_strdate(video_data.get('start_date')), - 'duration': parse_duration(video_data.get('duration')), - 'view_count': str_to_int(video_data.get('playcount')), - 'formats': formats, - } + video_data = self._call_api( + 'episode/get', urlname, { + 'disclosure': 'standard', + 'language': language, + 'show_urlname': show_urlname, + 'urlname': urlname, + })[0] + return self._parse_video_data(video_data) class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): - def _parse_page_data(self, page_data): - return [self.url_result(self._get_element_url(element_data)) for element_data in page_data] + _PAGE_SIZE = 25 - def _extract_playlist_entries(self, url, display_id): - page = self._download_json(url, display_id) - entries = self._parse_page_data(page['data']) - for page_num in range(2, page['paging']['pages'] + 1): - entries.extend(self._parse_page_data( - self._download_json(url + '&page=%d' % page_num, display_id)['data'])) - return entries + def _fetch_page(self, display_id, query, page): + page += 1 + query['page'] = page + for element_data in self._call_api( + self._RESOURCE, display_id, query, 'Download Page %d' % page): + yield self._process_data(element_data) + + def _extract_playlist_entries(self, display_id, query): + return OnDemandPagedList(functools.partial( + self._fetch_page, display_id, query), self._PAGE_SIZE) class AdobeTVShowIE(AdobeTVPlaylistBaseIE): + IE_NAME = 'adobetv:show' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)' _TEST = { @@ -92,26 +174,31 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE): }, 'playlist_mincount': 136, } - - def _get_element_url(self, element_data): - return element_data['urls'][0] + _RESOURCE = 'episode' + _process_data = AdobeTVBaseIE._parse_video_data def _real_extract(self, url): language, show_urlname = re.match(self._VALID_URL, url).groups() if not language: language = 'en' - query = 'language=%s&show_urlname=%s' % (language, show_urlname) + query = { + 'disclosure': 'standard', + 'language': language, + 'show_urlname': show_urlname, + } - show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0] + show_data = self._call_api( + 'show/get', show_urlname, query)[0] return self.playlist_result( - self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname), - compat_str(show_data['id']), - show_data['show_name'], - show_data['show_description']) + self._extract_playlist_entries(show_urlname, query), + str_or_none(show_data.get('id')), + show_data.get('show_name'), + show_data.get('show_description')) class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): + IE_NAME = 'adobetv:channel' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?' _TEST = { @@ -121,24 +208,30 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): }, 'playlist_mincount': 96, } + _RESOURCE = 'show' - def _get_element_url(self, element_data): - return element_data['url'] + def _process_data(self, show_data): + return self.url_result( + show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) def _real_extract(self, url): language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() if not language: language = 'en' - query = 'language=%s&channel_urlname=%s' % (language, channel_urlname) + query = { + 'channel_urlname': channel_urlname, + 'language': language, + } if category_urlname: - query += '&category_urlname=%s' % category_urlname + query['category_urlname'] = category_urlname return self.playlist_result( - self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname), + self._extract_playlist_entries(channel_urlname, query), channel_urlname) -class AdobeTVVideoIE(InfoExtractor): +class AdobeTVVideoIE(AdobeTVBaseIE): + IE_NAME = 'adobetv:video' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' _TEST = { @@ -160,38 +253,36 @@ class AdobeTVVideoIE(InfoExtractor): video_data = self._parse_json(self._search_regex( r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) + title = video_data['title'] - formats = [{ - 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), - 'url': source['src'], - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - 'tbr': int_or_none(source.get('bitrate')), - } for source in video_data['sources']] + formats = [] + sources = video_data.get('sources') or [] + for source in sources: + source_src = source.get('src') + if not source_src: + continue + formats.append({ + 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), + 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])), + 'height': int_or_none(source.get('height') or None), + 'tbr': int_or_none(source.get('bitrate') or None), + 'width': int_or_none(source.get('width') or None), + 'url': source_src, + }) self._sort_formats(formats) # For both metadata and downloaded files the duration varies among # formats. I just pick the max one duration = max(filter(None, [ float_or_none(source.get('duration'), scale=1000) - for source in video_data['sources']])) - - subtitles = {} - for translation in video_data.get('translations', []): - lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) - if lang_id not in subtitles: - subtitles[lang_id] = [] - subtitles[lang_id].append({ - 'url': translation['vttPath'], - 'ext': 'vtt', - }) + for source in sources])) return { 'id': video_id, 'formats': formats, - 'title': video_data['title'], + 'title': title, 'description': video_data.get('description'), - 'thumbnail': video_data['video'].get('poster'), + 'thumbnail': video_data.get('video', {}).get('poster'), 'duration': duration, - 'subtitles': subtitles, + 'subtitles': self._parse_subtitles(video_data, 'vttPath'), } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0e349b778..0f27c9678 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -21,6 +21,7 @@ from .acast import ( from .adn import ADNIE from .adobeconnect import AdobeConnectIE from .adobetv import ( + AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, AdobeTVChannelIE, From a15adbe461584e2e631d1be97805e81c17cfd3fe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:12:55 +0100 Subject: [PATCH 0991/1201] [channel9] reduce response size and update tests --- youtube_dl/extractor/channel9.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 81108e704..09cacf6d3 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -32,7 +32,7 @@ class Channel9IE(InfoExtractor): 'upload_date': '20130828', 'session_code': 'KOS002', 'session_room': 'Arena 1A', - 'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'], + 'session_speakers': 'count:5', }, }, { 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', @@ -64,15 +64,15 @@ class Channel9IE(InfoExtractor): 'params': { 'skip_download': True, }, - }, { - 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', - 'info_dict': { - 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', - 'title': 'Channel 9', - }, - 'playlist_mincount': 100, }, { 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'info_dict': { + 'id': 'Events/DEVintersection/DEVintersection-2016', + 'title': 'DEVintersection 2016 Orlando Sessions', + }, + 'playlist_mincount': 14, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', 'only_matching': True, }, { 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', @@ -112,11 +112,11 @@ class Channel9IE(InfoExtractor): episode_data), content_path) content_id = episode_data['contentId'] is_session = '/Sessions(' in episode_data['api'] - content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' if is_session: - content_url += '?$expand=Speakers' + content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' else: - content_url += '?$expand=Authors' + content_url += 'Authors,Body&$expand=Authors' content_data = self._download_json(content_url, content_id) title = content_data['Title'] @@ -210,7 +210,7 @@ class Channel9IE(InfoExtractor): 'id': content_id, 'title': title, 'description': clean_html(content_data.get('Description') or content_data.get('Body')), - 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'), + 'thumbnail': content_data.get('VideoPlayerPreviewImage'), 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), 'timestamp': parse_iso8601(content_data.get('PublishedDate')), 'avg_rating': int_or_none(content_data.get('Rating')), From 88a7a9089a0f3ccdd5e0e6f10b529652a24cbc7e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 29 Nov 2019 17:22:54 +0100 Subject: [PATCH 0992/1201] [abcotvs] relax _VALID_URL regex and improve metadata extraction(closes #18014) --- youtube_dl/extractor/abcotvs.py | 79 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py index 03b92a39c..0bc69a64f 100644 --- a/youtube_dl/extractor/abcotvs.py +++ b/youtube_dl/extractor/abcotvs.py @@ -4,29 +4,30 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + dict_get, int_or_none, - parse_iso8601, + try_get, ) class ABCOTVSIE(InfoExtractor): IE_NAME = 'abcotvs' IE_DESC = 'ABC Owned Television Stations' - _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P[^/]+))?/(?P\d+)' + _VALID_URL = r'https?://(?Pabc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P[^/]+))?/(?P\d+)' _TESTS = [ { 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', 'info_dict': { - 'id': '472581', + 'id': '472548', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'ext': 'mp4', - 'title': 'East Bay museum celebrates vintage synthesizers', + 'title': 'East Bay museum celebrates synthesized music', 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3', 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1421123075, + 'timestamp': 1421118520, 'upload_date': '20150113', - 'uploader': 'Jonathan Bloom', }, 'params': { # m3u8 download @@ -37,39 +38,63 @@ class ABCOTVSIE(InfoExtractor): 'url': 'http://abc7news.com/472581', 'only_matching': True, }, + { + 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/', + 'only_matching': True, + }, ] + _SITE_MAP = { + '6abc': 'wpvi', + 'abc11': 'wtvd', + 'abc13': 'ktrk', + 'abc30': 'kfsn', + 'abc7': 'kabc', + 'abc7chicago': 'wls', + 'abc7news': 'kgo', + 'abc7ny': 'wabc', + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + site, display_id, video_id = re.match(self._VALID_URL, url).groups() + display_id = display_id or video_id + station = self._SITE_MAP[site] - webpage = self._download_webpage(url, display_id) + data = self._download_json( + 'https://api.abcotvs.com/v2/content', display_id, query={ + 'id': video_id, + 'key': 'otv.web.%s.story' % station, + 'station': station, + })['data'] + video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data + video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + title = video.get('title') or video['linkText'] - m3u8 = self._html_search_meta( - 'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0] - - formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') + formats = [] + m3u8_url = video.get('m3u8') + if m3u8_url: + formats = self._extract_m3u8_formats( + video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False) + mp4_url = video.get('mp4') + if mp4_url: + formats.append({ + 'abr': 128, + 'format_id': 'https', + 'height': 360, + 'url': mp4_url, + 'width': 640, + }) self._sort_formats(formats) - title = self._og_search_title(webpage).strip() - description = self._og_search_description(webpage).strip() - thumbnail = self._og_search_thumbnail(webpage) - timestamp = parse_iso8601(self._search_regex( - r'
    \s*
    ', - webpage, 'description', default=None) or self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - duration = parse_duration(self._search_regex( - r'([^<]+)<', - webpage, 'duration', fatal=False)) - categories = re.findall(r'([^<]+)', webpage) - - return { - '_type': 'url_transparent', - 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id), - 'id': film_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'categories': categories, - 'ie_key': 'ViewLiftEmbed', - } + domain, path, display_id = re.match(self._VALID_URL, url).groups() + site = domain.split('.')[-2] + if site in self._SITE_MAP: + site = self._SITE_MAP[site] + modules = self._call_api( + site, 'content/pages', display_id, { + 'includeContent': 'true', + 'moduleOffset': 1, + 'path': path, + 'site': site, + })['modules'] + film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule') + return { + '_type': 'url_transparent', + 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id), + 'id': film_id, + 'display_id': display_id, + 'ie_key': 'ViewLiftEmbed', + } From 51c7f40c83a12f9dc0fce0b9e5102a0c13467b6a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 27 Jan 2020 23:37:29 +0100 Subject: [PATCH 1090/1201] [vimeo] fix album extraction(closes #23864) --- youtube_dl/extractor/vimeo.py | 68 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index baa46d5f3..f378aa283 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -841,33 +841,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return self._TITLE or self._html_search_regex( self._TITLE_RE, webpage, 'list title', fatal=False) - def _login_list_password(self, page_url, list_id, webpage): - login_form = self._search_regex( - r'(?s)]+?id="pw_form"(.*?)', - webpage, 'login form', default=None) - if not login_form: - return webpage - - password = self._downloader.params.get('videopassword') - if password is None: - raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) - fields = self._hidden_inputs(login_form) - token, vuid = self._extract_xsrft_and_vuid(webpage) - fields['token'] = token - fields['password'] = password - post = urlencode_postdata(fields) - password_path = self._search_regex( - r'action="([^"]+)"', login_form, 'password URL') - password_url = compat_urlparse.urljoin(page_url, password_path) - password_request = sanitized_Request(password_url, post) - password_request.add_header('Content-type', 'application/x-www-form-urlencoded') - self._set_vimeo_cookie('vuid', vuid) - self._set_vimeo_cookie('xsrft', token) - - return self._download_webpage( - password_request, list_id, - 'Verifying the password', 'Wrong password') - def _title_and_entries(self, list_id, base_url): for pagenum in itertools.count(1): page_url = self._page_url(base_url, pagenum) @@ -876,7 +849,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): 'Downloading page %s' % pagenum) if pagenum == 1: - webpage = self._login_list_password(page_url, list_id, webpage) yield self._extract_list_title(webpage) # Try extracting href first since not all videos are available via @@ -923,7 +895,7 @@ class VimeoUserIE(VimeoChannelIE): _BASE_URL_TEMPL = 'https://vimeo.com/%s' -class VimeoAlbumIE(VimeoChannelIE): +class VimeoAlbumIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:album' _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'