From 9ed06812ec8da6f1364acd00261935c334994e62 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:59:41 -0400 Subject: [PATCH 01/10] [streamango] add support for streamcherry.com --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index efb259f96..f1e17dd88 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -41,6 +41,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', 'only_matching': True, + }, { + 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', + 'only_matching': True, }] def _real_extract(self, url): From 9c017253e83ac3dfd363566ccbb9fc63f4e2ac07 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 8 Apr 2019 16:34:03 +0100 Subject: [PATCH 02/10] [jwplatfom] do not match manifest URLs(#20596) --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index d19a6a774..647b905f1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', From 5ca3459828cc0d752f02dab3e9c02cca85185cbb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:20:26 +0100 Subject: [PATCH 03/10] [kaltura] sanitize embed URLs --- youtube_dl/extractor/kaltura.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index fdf7f5bbc..79162f665 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -145,6 +145,8 @@ class KalturaIE(InfoExtractor): ) if mobj: embed_info = mobj.groupdict() + for k, v in embed_info.items(): + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( From 4bc12b8f819cd0a393e5800d4dc2ecf24401e199 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:21:46 +0100 Subject: [PATCH 04/10] [dispeak] improve mp4 bitrate extraction --- youtube_dl/extractor/dispeak.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index c05f601e2..c345e0274 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor): stream_name = xpath_text(a_format, 'streamName', fatal=True) video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') url = video_root + video_path - vbr = xpath_text(a_format, 'bitrate') + bitrate = xpath_text(a_format, 'bitrate') + tbr = int_or_none(bitrate) + vbr = int_or_none(self._search_regex( + r'-(\d+)\.mp4', video_path, 'vbr', default=None)) + abr = tbr - vbr if tbr and vbr else None video_formats.append({ + 'format_id': bitrate, 'url': url, - 'vbr': int_or_none(vbr), + 'tbr': tbr, + 'vbr': vbr, + 'abr': abr, }) return video_formats From 118f7add3b9690884edb4dc887995f3815243c78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:23:47 +0100 Subject: [PATCH 05/10] [gdc] add support for kaltura embeds and update tests(closes #20575) --- youtube_dl/extractor/gdcvault.py | 96 ++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 8806dc48a..2f555c1d4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,22 +3,24 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .kaltura import KalturaIE from ..utils import ( HEADRequest, sanitized_Request, + smuggle_url, urlencode_postdata, ) class GDCVaultIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)/(?P(\w|-)+)?' + _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)(?:/(?P[\w-]+))?' _NETRC_MACHINE = 'gdcvault' _TESTS = [ { 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 'md5': '7ce8388f544c88b7ac11c7ab1b593704', 'info_dict': { - 'id': '1019721', + 'id': '201311826596_AWNY', 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'ext': 'mp4', 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' @@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor): { 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 'info_dict': { - 'id': '1015683', + 'id': '201203272_1330951438328RSXR', 'display_id': 'Embracing-the-Dark-Art-of', 'ext': 'flv', 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' @@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor): 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', 'info_dict': { - 'id': '1023460', + 'id': '840376_BQRC', 'ext': 'mp4', 'display_id': 'Tenacious-Design-and-The-Interface', 'title': 'Tenacious Design and The Interface of \'Destiny\'', @@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor): # Multiple audios 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', 'info_dict': { - 'id': '1014631', - 'ext': 'flv', + 'id': '12396_1299111843500GMPX', + 'ext': 'mp4', 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', }, - 'params': { - 'skip_download': True, # Requires rtmpdump - 'format': 'jp', # The japanese audio - } + # 'params': { + # 'skip_download': True, # Requires rtmpdump + # 'format': 'jp', # The japanese audio + # } }, { # gdc-player.html 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', 'info_dict': { - 'id': '1435', + 'id': '9350_1238021887562UHXB', 'display_id': 'An-American-engine-in-Tokyo', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', }, + }, + { + # Kaltura Embed + 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling', + 'info_dict': { + 'id': '0_h1fg8j3p', + 'ext': 'mp4', + 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)', + 'timestamp': 1554401811, + 'upload_date': '20190404', + 'uploader_id': 'joe@blazestreaming.com', + }, 'params': { - 'skip_download': True, # Requires rtmpdump + 'format': 'mp4-408', }, }, ] @@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor): return start_page def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - display_id = mobj.group('name') or video_id + video_id, name = re.match(self._VALID_URL, url).groups() + display_id = name or video_id webpage_url = 'http://www.gdcvault.com/play/' + video_id start_page = self._download_webpage(webpage_url, display_id) @@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor): start_page, 'url', default=None) if direct_url: title = self._html_search_regex( - r'Session Name\s*(.*?)', + r'Session Name:?\s*(.*?)', start_page, 'title') video_url = 'http://www.gdcvault.com' + direct_url # resolve the url so that we can detect the correct extension - head = self._request_webpage(HEADRequest(video_url), video_id) - video_url = head.geturl() + video_url = self._request_webpage( + HEADRequest(video_url), video_id).geturl() return { 'id': video_id, @@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor): 'title': title, } - PLAYER_REGEX = r'' - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root', default=None) - if xml_root is None: - # Probably need to authenticate - login_res = self._login(webpage_url, display_id) - if login_res is None: - self.report_warning('Could not login.') - else: - start_page = login_res - # Grab the url from the authenticated page - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root') + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') - xml_name = self._html_search_regex( - r'', + r'