From 47e0cef46e9a76e589a2b1cde1b2dfa8bcf01d8e Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 16 Apr 2017 00:34:34 +0200 Subject: [PATCH 0001/2417] [openload] rewrite extractor --- youtube_dl/extractor/openload.py | 110 +++++++++++++++++++------------ 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d8036b54a..789bf997e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,12 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import os import re +import subprocess +import tempfile from .common import InfoExtractor -from ..compat import compat_chr from ..utils import ( + check_executable, determine_ext, + encodeArgument, ExtractorError, ) @@ -58,6 +62,39 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + _PHANTOMJS_SCRIPT = r''' + phantom.onError = function(msg, trace) { + var msgStack = ['PHANTOM ERROR: ' + msg]; + if(trace && trace.length) { + msgStack.push('TRACE:'); + trace.forEach(function(t) { + msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + + (t.function ? ' (in function ' + t.function +')' : '')); + }); + } + console.error(msgStack.join('\n')); + phantom.exit(1); + }; + var page = require('webpage').create(); + page.settings.resourceTimeout = 10000; + page.onInitialized = function() { + page.evaluate(function() { + delete window._phantom; + delete window.callPhantom; + }); + }; + page.open('https://openload.co/embed/%s/', function(status) { + var info = page.evaluate(function() { + return { + decoded_id: document.getElementById('streamurl').innerHTML, + title: document.querySelector('meta[name="og:title"],' + + 'meta[name=description]').content + }; + }); + console.log(info.decoded_id + ' ' + info.title); + phantom.exit(); + });''' + @staticmethod def _extract_urls(webpage): return re.findall( @@ -65,61 +102,48 @@ class OpenloadIE(InfoExtractor): webpage) def _real_extract(self, url): + exe = check_executable('phantomjs', ['-v']) + if not exe: + raise ExtractorError('PhantomJS executable not found in PATH, ' + 'download it from http://phantomjs.org', + expected=True) + video_id = self._match_id(url) - webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) + url = 'https://openload.co/embed/%s/' % video_id + webpage = self._download_webpage(url, video_id) if 'File not found' in webpage or 'deleted by the owner' in webpage: - raise ExtractorError('File not found', expected=True) + raise ExtractorError('File not found', expected=True, video_id=video_id) - ol_id = self._search_regex( - ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', - webpage, 'openload ID') + script_file = tempfile.NamedTemporaryFile(mode='w', delete=False) - decoded = '' - a = ol_id[0:24] - b = [] - for i in range(0, len(a), 8): - b.append(int(a[i:i + 8] or '0', 16)) - ol_id = ol_id[24:] - j = 0 - k = 0 - while j < len(ol_id): - c = 128 - d = 0 - e = 0 - f = 0 - _more = True - while _more: - if j + 1 >= len(ol_id): - c = 143 - f = int(ol_id[j:j + 2] or '0', 16) - j += 2 - d += (f & 127) << e - e += 7 - _more = f >= c - g = d ^ b[k % 3] - for i in range(4): - char_dec = (g >> 8 * i) & (c + 127) - char = compat_chr(char_dec) - if char != '#': - decoded += char - k += 1 + # write JS script to file and close it + with script_file: + script_file.write(self._PHANTOMJS_SCRIPT % video_id) - video_url = 'https://openload.co/stream/%s?mime=true' - video_url = video_url % decoded + self.to_screen('%s: Decoding video ID with PhantomJS' % video_id) - title = self._og_search_title(webpage, default=None) or self._search_regex( - r']+class=["\']title["\'][^>]*>([^<]+)', webpage, - 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', fatal=True) + p = subprocess.Popen([exe, '--ssl-protocol=any', script_file.name], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = p.communicate() + if p.returncode != 0: + raise ExtractorError('Decoding failed\n:' + + encodeArgument(err)) + else: + decoded_id, title = encodeArgument(output).strip().split(' ', 1) + + os.remove(script_file.name) + + video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id entries = self._parse_html5_media_entries(url, webpage, video_id) - subtitles = entries[0]['subtitles'] if entries else None + entry = entries[0] if entries else {} + subtitles = entry.get('subtitles') info_dict = { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles 'ext': determine_ext(title, 'mp4'), From da57ebaf84225240b356530cdf02d12596f0dce8 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Tue, 25 Apr 2017 01:06:14 +0200 Subject: [PATCH 0002/2417] [openload] separate PhantomJS code from extractor --- youtube_dl/extractor/openload.py | 78 ++++------------- youtube_dl/utils.py | 141 +++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 789bf997e..ac5e0bb08 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,17 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import os import re -import subprocess -import tempfile from .common import InfoExtractor from ..utils import ( - check_executable, determine_ext, - encodeArgument, ExtractorError, + get_element_by_id, + PhantomJSwrapper, ) @@ -62,38 +59,7 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - _PHANTOMJS_SCRIPT = r''' - phantom.onError = function(msg, trace) { - var msgStack = ['PHANTOM ERROR: ' + msg]; - if(trace && trace.length) { - msgStack.push('TRACE:'); - trace.forEach(function(t) { - msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line - + (t.function ? ' (in function ' + t.function +')' : '')); - }); - } - console.error(msgStack.join('\n')); - phantom.exit(1); - }; - var page = require('webpage').create(); - page.settings.resourceTimeout = 10000; - page.onInitialized = function() { - page.evaluate(function() { - delete window._phantom; - delete window.callPhantom; - }); - }; - page.open('https://openload.co/embed/%s/', function(status) { - var info = page.evaluate(function() { - return { - decoded_id: document.getElementById('streamurl').innerHTML, - title: document.querySelector('meta[name="og:title"],' - + 'meta[name=description]').content - }; - }); - console.log(info.decoded_id + ' ' + info.title); - phantom.exit(); - });''' + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' @staticmethod def _extract_urls(webpage): @@ -102,40 +68,27 @@ class OpenloadIE(InfoExtractor): webpage) def _real_extract(self, url): - exe = check_executable('phantomjs', ['-v']) - if not exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) - video_id = self._match_id(url) url = 'https://openload.co/embed/%s/' % video_id - webpage = self._download_webpage(url, video_id) + headers = { + 'User-Agent': self._USER_AGENT, + } + + phantom = PhantomJSwrapper(self) + webpage, _ = phantom.get(url, video_id=video_id, headers=headers) if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) - script_file = tempfile.NamedTemporaryFile(mode='w', delete=False) - - # write JS script to file and close it - with script_file: - script_file.write(self._PHANTOMJS_SCRIPT % video_id) - - self.to_screen('%s: Decoding video ID with PhantomJS' % video_id) - - p = subprocess.Popen([exe, '--ssl-protocol=any', script_file.name], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, err = p.communicate() - if p.returncode != 0: - raise ExtractorError('Decoding failed\n:' - + encodeArgument(err)) - else: - decoded_id, title = encodeArgument(output).strip().split(' ', 1) - - os.remove(script_file.name) + decoded_id = get_element_by_id('streamurl', webpage) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id + title = self._og_search_title(webpage, default=None) or self._search_regex( + r']+class=["\']title["\'][^>]*>([^<]+)', webpage, + 'title', default=None) or self._html_search_meta( + 'description', webpage, 'title', fatal=True) + entries = self._parse_html5_media_entries(url, webpage, video_id) entry = entries[0] if entries else {} subtitles = entry.get('subtitles') @@ -148,5 +101,6 @@ class OpenloadIE(InfoExtractor): # Seems all videos have extensions in their titles 'ext': determine_ext(title, 'mp4'), 'subtitles': subtitles, + 'http_headers': headers, } return info_dict diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2340bc306..94e1b07a6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3652,3 +3652,144 @@ def write_xattr(path, key, value): "Couldn't find a tool to set the xattrs. " "Install either the python 'xattr' module, " "or the 'xattr' binary.") + + +class PhantomJSwrapper(object): + """PhantomJS wrapper class""" + + _TEMPLATE = r''' + phantom.onError = function(msg, trace) {{ + var msgStack = ['PHANTOM ERROR: ' + msg]; + if(trace && trace.length) {{ + msgStack.push('TRACE:'); + trace.forEach(function(t) {{ + msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + + (t.function ? ' (in function ' + t.function +')' : '')); + }}); + }} + console.error(msgStack.join('\n')); + phantom.exit(1); + }}; + var page = require('webpage').create(); + var fs = require('fs'); + var read = {{ mode: 'r', charset: 'utf-8' }}; + var write = {{ mode: 'w', charset: 'utf-8' }}; + page.settings.resourceTimeout = {timeout}; + page.settings.userAgent = "{ua}"; + page.onLoadStarted = function() {{ + page.evaluate(function() {{ + delete window._phantom; + delete window.callPhantom; + }}); + }}; + var saveAndExit = function() {{ + fs.write("{html}", page.content, write); + phantom.exit(); + }}; + page.onLoadFinished = function(status) {{ + if(page.url === "") {{ + page.setContent(fs.read("{html}", read), "{url}"); + }} + else {{ + {jscode} + }} + }}; + page.open(""); + ''' + + _TMP_FILE_NAMES = ['script', 'html'] + + def __init__(self, extractor, timeout=10000): + self.exe = check_executable('phantomjs', ['-v']) + if not self.exe: + raise ExtractorError('PhantomJS executable not found in PATH, ' + 'download it from http://phantomjs.org', + expected=True) + self.extractor = extractor + self.options = { + 'timeout': timeout, + } + self._TMP_FILES = {} + for name in self._TMP_FILE_NAMES: + tmp = tempfile.NamedTemporaryFile(delete=False) + tmp.close() + self._TMP_FILES[name] = tmp + + def __del__(self): + for name in self._TMP_FILE_NAMES: + try: + os.remove(self._TMP_FILES[name].name) + except: + pass + + def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): + """ + Downloads webpage (if needed) and executes JS + + Params: + url: website url + html: optional, html code of website + video_id: video id + note: optional, displayed when downloading webpage + note2: optional, displayed when executing JS + headers: custom http headers + jscode: code to be executed when page is loaded + + Returns tuple with: + * downloaded website (after JS execution) + * anything you print with `console.log` (but not inside `page.execute`!) + + In most cases you don't need to add any `jscode`. + It is executed in `page.onLoadFinished`. + `saveAndExit();` is mandatory, use it instead of `phantom.exit()` + It is possible to wait for some element on the webpage, for example: + var check = function() { + var elementFound = page.evaluate(function() { + return document.querySelector('#b.done') !== null; + }); + if(elementFound) + saveAndExit(); + else + window.setTimeout(check, 500); + } + + page.evaluate(function(){ + document.querySelector('#a').click(); + }); + check(); + """ + if 'saveAndExit();' not in jscode: + raise ExtractorError('`saveAndExit();` not found in `jscode`') + if not html: + html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) + with open(self._TMP_FILES['html'].name, 'wb') as f: + f.write(html.encode('utf-8')) + + replaces = self.options + replaces['url'] = url + user_agent = headers.get('User-Agent') or std_headers['User-Agent'] + replaces['ua'] = user_agent.replace('"', '\\"') + replaces['jscode'] = jscode + + for x in self._TMP_FILE_NAMES: + replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + + with open(self._TMP_FILES['script'].name, 'wb') as f: + f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) + + if video_id is None: + self.extractor.to_screen('%s' % (note2,)) + else: + self.extractor.to_screen('%s: %s' % (video_id, note2)) + + p = subprocess.Popen([self.exe, '--ssl-protocol=any', + self._TMP_FILES['script'].name], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate() + if p.returncode != 0: + raise ExtractorError('Executing JS failed\n:' + + encodeArgument(err)) + with open(self._TMP_FILES['html'].name, 'rb') as f: + html = f.read().decode('utf-8') + return (html, encodeArgument(out)) + From 40e41780f1d770a355f01e3c1e6fb09ff392f97e Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Tue, 25 Apr 2017 15:12:54 +0200 Subject: [PATCH 0003/2417] [phantomjs] add cookie support --- youtube_dl/extractor/common.py | 8 +++-- youtube_dl/utils.py | 62 +++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dcc9d628a..e54adc9f0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2343,10 +2343,12 @@ class InfoExtractor(object): self._downloader.report_warning(msg) return res - def _set_cookie(self, domain, name, value, expire_time=None): + def _set_cookie(self, domain, name, value, expire_time=None, port=None, + path='/', secure=False, discard=False, rest={}, **kwargs): cookie = compat_cookiejar.Cookie( - 0, name, value, None, None, domain, None, - None, '/', True, False, expire_time, '', None, None, None) + 0, name, value, port, not port is None, domain, True, + domain.startswith('.'), path, True, secure, expire_time, + discard, None, None, rest) self._downloader.cookiejar.set_cookie(cookie) def _get_cookies(self, url): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 94e1b07a6..9c94b7ec9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3654,6 +3654,37 @@ def write_xattr(path, key, value): "or the 'xattr' binary.") +def cookie_to_dict(cookie): + cookie_dict = { + 'name': cookie.name, + 'value': cookie.value, + }; + if cookie.port_specified: + cookie_dict['port'] = cookie.port + if cookie.domain_specified: + cookie_dict['domain'] = cookie.domain + if cookie.path_specified: + cookie_dict['path'] = cookie.path + if not cookie.expires is None: + cookie_dict['expires'] = cookie.expires + if not cookie.secure is None: + cookie_dict['secure'] = cookie.secure + if not cookie.discard is None: + cookie_dict['discard'] = cookie.discard + try: + if (cookie.has_nonstandard_attr('httpOnly') or + cookie.has_nonstandard_attr('httponly') or + cookie.has_nonstandard_attr('HttpOnly')): + cookie_dict['httponly'] = True + except TypeError: + pass + return cookie_dict + + +def cookie_jar_to_list(cookie_jar): + return [cookie_to_dict(cookie) for cookie in cookie_jar] + + class PhantomJSwrapper(object): """PhantomJS wrapper class""" @@ -3674,6 +3705,9 @@ class PhantomJSwrapper(object): var fs = require('fs'); var read = {{ mode: 'r', charset: 'utf-8' }}; var write = {{ mode: 'w', charset: 'utf-8' }}; + JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ + phantom.addCookie(x); + }}); page.settings.resourceTimeout = {timeout}; page.settings.userAgent = "{ua}"; page.onLoadStarted = function() {{ @@ -3684,6 +3718,7 @@ class PhantomJSwrapper(object): }}; var saveAndExit = function() {{ fs.write("{html}", page.content, write); + fs.write("{cookies}", JSON.stringify(phantom.cookies), write); phantom.exit(); }}; page.onLoadFinished = function(status) {{ @@ -3697,7 +3732,7 @@ class PhantomJSwrapper(object): page.open(""); ''' - _TMP_FILE_NAMES = ['script', 'html'] + _TMP_FILE_NAMES = ['script', 'html', 'cookies'] def __init__(self, extractor, timeout=10000): self.exe = check_executable('phantomjs', ['-v']) @@ -3722,6 +3757,26 @@ class PhantomJSwrapper(object): except: pass + def _save_cookies(self, url): + cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) + for cookie in cookies: + if 'path' not in cookie: + cookie['path'] = '/' + if 'domain' not in cookie: + cookie['domain'] = compat_urlparse.urlparse(url).netloc + with open(self._TMP_FILES['cookies'].name, 'wb') as f: + f.write(json.dumps(cookies).encode('utf-8')) + + def _load_cookies(self): + with open(self._TMP_FILES['cookies'].name, 'rb') as f: + cookies = json.loads(f.read().decode('utf-8')) + for cookie in cookies: + if cookie['httponly'] is True: + cookie['rest'] = { 'httpOnly': None } + if 'expiry' in cookie: + cookie['expire_time'] = cookie['expiry'] + self.extractor._set_cookie(**cookie) + def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ Downloads webpage (if needed) and executes JS @@ -3765,6 +3820,8 @@ class PhantomJSwrapper(object): with open(self._TMP_FILES['html'].name, 'wb') as f: f.write(html.encode('utf-8')) + self._save_cookies(url) + replaces = self.options replaces['url'] = url user_agent = headers.get('User-Agent') or std_headers['User-Agent'] @@ -3791,5 +3848,8 @@ class PhantomJSwrapper(object): + encodeArgument(err)) with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') + + self._load_cookies() + return (html, encodeArgument(out)) From fcace2d1adac5d1f306b22219fde3a4542bcd719 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sat, 29 Apr 2017 10:30:45 +0200 Subject: [PATCH 0004/2417] [openload] raise `not found` before executing js --- youtube_dl/extractor/openload.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index ac5e0bb08..0adf17765 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -74,12 +74,14 @@ class OpenloadIE(InfoExtractor): 'User-Agent': self._USER_AGENT, } - phantom = PhantomJSwrapper(self) - webpage, _ = phantom.get(url, video_id=video_id, headers=headers) + webpage = self._download_webpage(url, video_id, headers=headers) if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) + phantom = PhantomJSwrapper(self) + webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) + decoded_id = get_element_by_id('streamurl', webpage) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id From 98f9d873814da2a8584cc30c0e197c15ed249db3 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sat, 29 Apr 2017 12:41:42 +0200 Subject: [PATCH 0005/2417] [phantomjs] Add required version checking --- youtube_dl/utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9c94b7ec9..84aaac664 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3734,13 +3734,22 @@ class PhantomJSwrapper(object): _TMP_FILE_NAMES = ['script', 'html', 'cookies'] - def __init__(self, extractor, timeout=10000): + def __init__(self, extractor, required_version=None, timeout=10000): self.exe = check_executable('phantomjs', ['-v']) if not self.exe: raise ExtractorError('PhantomJS executable not found in PATH, ' 'download it from http://phantomjs.org', expected=True) + self.extractor = extractor + + if required_version: + version = get_exe_version(self.exe, version_re=r'([0-9.]+)') + if is_outdated_version(version, required_version): + self.extractor._downloader.report_warning( + 'Your copy of PhantomJS is outdated, update it to version ' + '%s or newer if you encounter any errors.' % required_version) + self.options = { 'timeout': timeout, } From 7552f96352f35cd877e52fd0770b77ba1856fc62 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sat, 29 Apr 2017 12:41:57 +0200 Subject: [PATCH 0006/2417] [openload] Add required version --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 0adf17765..292476ef8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -79,7 +79,7 @@ class OpenloadIE(InfoExtractor): if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) - phantom = PhantomJSwrapper(self) + phantom = PhantomJSwrapper(self, required_version='2.0') webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) decoded_id = get_element_by_id('streamurl', webpage) From 5ff1bc0cc10bf3006834c4b49fc36d733c83ce5c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 28 Apr 2017 22:25:20 +0100 Subject: [PATCH 0007/2417] [YoutubeDL] write raw subtitle files --- youtube_dl/YoutubeDL.py | 43 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eb465c425..c7100bb91 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1696,29 +1696,30 @@ class YoutubeDL(object): ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] - if sub_info.get('data') is not None: - sub_data = sub_info['data'] + sub_filename = subtitles_filename(filename, sub_lang, sub_format) + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): + self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) else: - try: - sub_data = ie._download_webpage( - sub_info['url'], info_dict['id'], note=False) - except ExtractorError as err: - self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_compat_str(err.cause))) - continue - try: - sub_filename = subtitles_filename(filename, sub_lang, sub_format) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) + self.to_screen('[info] Writing video subtitles to: ' + sub_filename) + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/rg3/youtube-dl/issues/10268 + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + except (OSError, IOError): + self.report_error('Cannot write subtitles file ' + sub_filename) + return else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - # Use newline='' to prevent conversion of newline characters - # See https://github.com/rg3/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: - subfile.write(sub_data) - except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + sub_filename) - return + try: + sub_data = ie._request_webpage( + sub_info['url'], info_dict['id'], note=False).read() + with io.open(encodeFilename(sub_filename), 'wb') as subfile: + subfile.write(sub_data) + except (ExtractorError, IOError, OSError, ValueError) as err: + self.report_warning('Unable to download subtitle for "%s": %s' % + (sub_lang, error_to_compat_str(err))) + continue if self.params.get('writeinfojson', False): infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) From a49804816c0246d81b9d34d9f89f99fae06da887 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Jul 2017 18:12:15 +0700 Subject: [PATCH 0008/2417] [dailymotion] Add support for new layout (close #13580) --- youtube_dl/extractor/dailymotion.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index f8db76c18..74e991331 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -147,7 +147,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): view_count_str = self._search_regex( (r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"', r'video_views_count[^>]+>\s+([\s\d\,.]+)'), - webpage, 'view count', fatal=False) + webpage, 'view count', default=None) if view_count_str: view_count_str = re.sub(r'\s', '', view_count_str) view_count = str_to_int(view_count_str) @@ -159,7 +159,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', r'buildPlayer\(({.+?})\);', - r'var\s+config\s*=\s*({.+?});'], + r'var\s+config\s*=\s*({.+?});', + # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580) + r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) From 8b347a389eaa6d545ada901c2e236a5eb2272960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 00:26:13 +0700 Subject: [PATCH 0009/2417] [googledrive] Fix height extraction (closes #13603) --- youtube_dl/extractor/googledrive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 9705cfadd..c40da85c5 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -92,7 +92,7 @@ class GoogleDriveIE(InfoExtractor): if resolution: f.update({ 'width': resolution[0], - 'height': resolution[0], + 'height': resolution[1], }) formats.append(f) self._sort_formats(formats) From 7a5773090789bec38a3f58dfb09039155919a540 Mon Sep 17 00:00:00 2001 From: rrooij Date: Sun, 9 Jul 2017 09:21:40 +0200 Subject: [PATCH 0010/2417] [npo:live] Fix live stream id extraction (closes #13568) --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 5f8b6def1..516b1e941 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -341,7 +341,7 @@ class NPOLiveIE(NPOBaseIE): webpage = self._download_webpage(url, display_id) live_id = self._search_regex( - r'data-prid="([^"]+)"', webpage, 'live id') + [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id') return { '_type': 'url_transparent', From 15237fcd51dca192103f08a910660616e3b241b8 Mon Sep 17 00:00:00 2001 From: mlindner Date: Sun, 9 Jul 2017 00:54:52 -0700 Subject: [PATCH 0011/2417] [veoh] Extend _VALID_URL --- youtube_dl/extractor/veoh.py | 73 ++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 0f5d68738..b20dddc5c 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -12,47 +12,46 @@ from ..utils import ( class VeohIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P(?:v|yapi-)[\da-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P(?:v|e|yapi-)[\da-zA-Z]+)' - _TESTS = [ - { - 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', - 'md5': '620e68e6a3cff80086df3348426c9ca3', - 'info_dict': { - 'id': '56314296', - 'ext': 'mp4', - 'title': 'Straight Backs Are Stronger', - 'uploader': 'LUMOback', - 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', - }, + _TESTS = [{ + 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', + 'md5': '620e68e6a3cff80086df3348426c9ca3', + 'info_dict': { + 'id': '56314296', + 'ext': 'mp4', + 'title': 'Straight Backs Are Stronger', + 'uploader': 'LUMOback', + 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', }, - { - 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', - 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', - 'info_dict': { - 'id': '27701988', - 'ext': 'mp4', - 'title': 'Chile workers cover up to avoid skin damage', - 'description': 'md5:2bd151625a60a32822873efc246ba20d', - 'uploader': 'afp-news', - 'duration': 123, - }, - 'skip': 'This video has been deleted.', + }, { + 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', + 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', + 'info_dict': { + 'id': '27701988', + 'ext': 'mp4', + 'title': 'Chile workers cover up to avoid skin damage', + 'description': 'md5:2bd151625a60a32822873efc246ba20d', + 'uploader': 'afp-news', + 'duration': 123, }, - { - 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', - 'md5': '4fde7b9e33577bab2f2f8f260e30e979', - 'note': 'Embedded ooyala video', - 'info_dict': { - 'id': '69525809', - 'ext': 'mp4', - 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', - 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', - 'uploader': 'newsy-videos', - }, - 'skip': 'This video has been deleted.', + 'skip': 'This video has been deleted.', + }, { + 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', + 'md5': '4fde7b9e33577bab2f2f8f260e30e979', + 'note': 'Embedded ooyala video', + 'info_dict': { + 'id': '69525809', + 'ext': 'mp4', + 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', + 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', + 'uploader': 'newsy-videos', }, - ] + 'skip': 'This video has been deleted.', + }, { + 'url': 'http://www.veoh.com/watch/e152215AJxZktGS', + 'only_matching': True, + }] def _extract_formats(self, source): formats = [] From 5af2fd7fa02734c2a23f917fb60f1c14da149d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 15:55:04 +0700 Subject: [PATCH 0012/2417] [eagleplatform] Add support for another embed pattern (#13557) --- youtube_dl/extractor/eagleplatform.py | 36 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 76d39adac..5e1de04a1 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -60,16 +60,40 @@ class EaglePlatformIE(InfoExtractor): webpage) if mobj is not None: return mobj.group('url') - # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) + PLAYER_JS_RE = r''' + ]+ + src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) + .+? + ''' + # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) mobj = re.search( r'''(?xs) - ]+ - src=(?P["\'])(?:https?:)?//(?P.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) - .+? + %s ]+ - class=(?P["\'])eagleplayer(?P=q2)[^>]+ + class=(?P["\'])eagleplayer(?P=qclass)[^>]+ data-id=["\'](?P\d+) - ''', webpage) + ''' % PLAYER_JS_RE, webpage) + if mobj is not None: + return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() + # Generalization of "Javascript code usage", "Combined usage" and + # "Usage without attaching to DOM" embeddings (see + # http://dultonmedia.github.io/eplayer/) + mobj = re.search( + r'''(?xs) + %s + + ''' % PLAYER_JS_RE, webpage) if mobj is not None: return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() From 665e9452461abaff7127653265c78bd585acea6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 15:57:33 +0700 Subject: [PATCH 0013/2417] [eagleplatform] Add support for referrer protected videos (closes #13557) --- youtube_dl/extractor/eagleplatform.py | 25 ++++++++++++++++++++++--- youtube_dl/extractor/generic.py | 10 +++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 5e1de04a1..34891a362 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + unsmuggle_url, ) @@ -50,6 +51,10 @@ class EaglePlatformIE(InfoExtractor): 'view_count': int, }, 'skip': 'Georestricted', + }, { + # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) + 'url': 'tvrainru.media.eagleplatform.com:582306', + 'only_matching': True, }] @staticmethod @@ -103,9 +108,10 @@ class EaglePlatformIE(InfoExtractor): if status != 200: raise ExtractorError(' '.join(response['errors']), expected=True) - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs): + def _download_json(self, url_or_request, video_id, *args, **kwargs): try: - response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) + response = super(EaglePlatformIE, self)._download_json( + url_or_request, video_id, *args, **kwargs) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError): response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) @@ -117,11 +123,24 @@ class EaglePlatformIE(InfoExtractor): return self._download_json(url_or_request, video_id, note)['data'][0] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + mobj = re.match(self._VALID_URL, url) host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') + headers = {} + query = { + 'id': video_id, + } + + referrer = smuggled_data.get('referrer') + if referrer: + headers['Referer'] = referrer + query['referrer'] = referrer + player_data = self._download_json( - 'http://%s/api/player_data?id=%s' % (host, video_id), video_id) + 'http://%s/api/player_data' % host, video_id, + headers=headers, query=query) media = player_data['data']['playlist']['viewports'][0]['medialist'][0] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f9bff433c..7232f39db 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1185,7 +1185,7 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # Eagle.Platform embed (generic URL) + # EaglePlatform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -1200,7 +1200,7 @@ class GenericIE(InfoExtractor): 'age_limit': 0, }, }, - # ClipYou (Eagle.Platform) embed (custom URL) + # ClipYou (EaglePlatform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -2443,12 +2443,12 @@ class GenericIE(InfoExtractor): if kaltura_url: return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) - # Look for Eagle.Platform embeds + # Look for EaglePlatform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) if eagleplatform_url: - return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) + return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key()) - # Look for ClipYou (uses Eagle.Platform) embeds + # Look for ClipYou (uses EaglePlatform) embeds mobj = re.search( r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) if mobj is not None: From 250b042c7e71a6e8bbff534aa41c2b92dae1acf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 16:02:38 +0700 Subject: [PATCH 0014/2417] [generic] Add tests for #13557 --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7232f39db..95c38698d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1199,6 +1199,24 @@ class GenericIE(InfoExtractor): 'view_count': int, 'age_limit': 0, }, + 'params': { + 'skip_download': True, + }, + }, + # referrer protected EaglePlatform embed + { + 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', + 'info_dict': { + 'id': '582306', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3382, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, }, # ClipYou (EaglePlatform) embed (custom URL) { @@ -1212,6 +1230,9 @@ class GenericIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'params': { + 'skip_download': True, + }, }, # Pladform embed { From 4328ddf82b812420ffc120b4150251f751bff08c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Jul 2017 16:29:52 +0700 Subject: [PATCH 0015/2417] [extractor/common] Add support for AMP tags in _parse_html5_media_entries --- youtube_dl/extractor/common.py | 7 +++++-- youtube_dl/extractor/generic.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index afeb4c5da..daa10885f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2132,15 +2132,18 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] + # amp-video and amp-audio are very similar to their HTML5 counterparts + # so we wll include them right here (see + # https://www.ampproject.org/docs/reference/components/amp-video) media_tags = [(media_tag, media_type, '') for media_tag, media_type - in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)] media_tags.extend(re.findall( # We only allow video|audio followed by a whitespace or '>'. # Allowing more characters may end up in significant slow down (see # https://github.com/rg3/youtube-dl/issues/11979, example URL: # http://www.porntrex.com/maps/videositemap.xml). - r'(?s)(<(?Pvideo|audio)(?:\s+[^>]*)?>)(.*?)', webpage)) + r'(?s)(<(?P(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)', webpage)) for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 95c38698d..919f4f987 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1770,6 +1770,16 @@ class GenericIE(InfoExtractor): }, 'add_ie': [MediasetIE.ie_key()], }, + { + # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video) + 'url': 'https://tvrain.ru/amp/418921/', + 'md5': 'cc00413936695987e8de148b67d14f1d', + 'info_dict': { + 'id': '418921', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject From d2b9f362fabad8f9490825456d8ed679d7159271 Mon Sep 17 00:00:00 2001 From: Christopher Smith Date: Thu, 29 Jun 2017 13:10:45 -0600 Subject: [PATCH 0016/2417] [cjsw] Add extractor --- youtube_dl/extractor/cjsw.py | 41 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/cjsw.py diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py new file mode 100644 index 000000000..087cac9bc --- /dev/null +++ b/youtube_dl/extractor/cjsw.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class CJSWIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/\S+/(?P[0-9]+)' + IE_NAME = 'cjsw' + _TEST = { + 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', + 'md5': 'cee14d40f1e9433632c56e3d14977120', + 'info_dict': { + 'id': '20170620', + 'ext': 'mp3', + 'title': 'Freshly Squeezed', + 'description': 'Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y\'all!!', + } + } + + def _real_extract(self, url): + episode_id = self._match_id(url) + + webpage = self._download_webpage(url, episode_id) + + title = self._search_regex( + r']+data-showname=(["\'])(?P(?!\1).+?)\1[^>]*>', webpage, 'title', group='title') + description = self._html_search_regex( + r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + formats = [{ + 'url': self._search_regex( + r'<button[^>]+data-audio-src=(["\'])(?P<audio_url>(?!\1).+?)\1[^>]*>', webpage, 'audio_url', group='audio_url'), + 'ext': 'mp3', + 'vcodec': 'none', + }] + return { + 'id': episode_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b83c3aba5..4524fa687 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -185,6 +185,7 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE +from .cjsw import CJSWIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .cliprs import ClipRsIE From c319d1c4833f89df818fe39f4c99cdc5c9a8bf01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:00:45 +0700 Subject: [PATCH 0017/2417] [csjw] Fix issues and improve extraction (closes #13525) --- youtube_dl/extractor/cjsw.py | 57 ++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py index 087cac9bc..aab6ea535 100644 --- a/youtube_dl/extractor/cjsw.py +++ b/youtube_dl/extractor/cjsw.py @@ -1,41 +1,66 @@ -# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + determine_ext, + unescapeHTML, +) class CJSWIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/\S+/(?P<id>[0-9]+)' - IE_NAME = 'cjsw' + _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)' _TEST = { 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', 'md5': 'cee14d40f1e9433632c56e3d14977120', 'info_dict': { - 'id': '20170620', + 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41', 'ext': 'mp3', - 'title': 'Freshly Squeezed', - 'description': 'Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y\'all!!', - } + 'title': 'Freshly Squeezed – Episode June 20, 2017', + 'description': 'md5:c967d63366c3898a80d0c7b0ff337202', + 'series': 'Freshly Squeezed', + 'episode_id': '20170620', + }, } def _real_extract(self, url): - episode_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + program, episode_id = mobj.group('program', 'id') + audio_id = '%s/%s' % (program, episode_id) webpage = self._download_webpage(url, episode_id) - title = self._search_regex( - r'<button[^>]+data-showname=(["\'])(?P<title>(?!\1).+?)\1[^>]*>', webpage, 'title', group='title') - description = self._html_search_regex( - r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + title = unescapeHTML(self._search_regex( + (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)', + r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', group='title')) + + audio_url = self._search_regex( + r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'audio url', group='url') + + audio_id = self._search_regex( + r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3', + audio_url, 'audio id', default=audio_id) + formats = [{ - 'url': self._search_regex( - r'<button[^>]+data-audio-src=(["\'])(?P<audio_url>(?!\1).+?)\1[^>]*>', webpage, 'audio_url', group='audio_url'), - 'ext': 'mp3', + 'url': audio_url, + 'ext': determine_ext(audio_url, 'mp3'), 'vcodec': 'none', }] + + description = self._html_search_regex( + r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + series = self._search_regex( + r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, + 'series', default=program, group='name') + return { - 'id': episode_id, + 'id': audio_id, 'title': title, 'description': description, 'formats': formats, + 'series': series, + 'episode_id': episode_id, } From 0d2f0b0357325823782884327a158aeccf4f9b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:05:11 +0700 Subject: [PATCH 0018/2417] [csjw] Make description optional --- youtube_dl/extractor/cjsw.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py index aab6ea535..dd271586f 100644 --- a/youtube_dl/extractor/cjsw.py +++ b/youtube_dl/extractor/cjsw.py @@ -11,7 +11,7 @@ from ..utils import ( class CJSWIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', 'md5': 'cee14d40f1e9433632c56e3d14977120', 'info_dict': { @@ -22,7 +22,11 @@ class CJSWIE(InfoExtractor): 'series': 'Freshly Squeezed', 'episode_id': '20170620', }, - } + }, { + # no description + 'url': 'http://cjsw.com/program/road-pops/episode/20170707/', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -51,7 +55,8 @@ class CJSWIE(InfoExtractor): }] description = self._html_search_regex( - r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False) + r'<p>(?P<description>.+?)</p>', webpage, 'description', + default=None) series = self._search_regex( r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, 'series', default=program, group='name') From a02682fd13ce5ba88d2508c90559eaa7f43b65d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:09:44 +0700 Subject: [PATCH 0019/2417] Keep in sync with ffmpeg's current malformed AAC bitstream wording (closes #13587) --- youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/postprocessor/ffmpeg.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b3a6d4d3b..60ee4b7d8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1890,7 +1890,7 @@ class YoutubeDL(object): info_dict.get('protocol') == 'm3u8' and self.params.get('hls_prefer_native')): if fixup_policy == 'warn': - self.report_warning('%s: malformated aac bitstream.' % ( + self.report_warning('%s: malformed AAC bitstream detected.' % ( info_dict['id'])) elif fixup_policy == 'detect_or_warn': fixup_pp = FFmpegFixupM3u8PP(self) @@ -1899,7 +1899,7 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(fixup_pp) else: self.report_warning( - '%s: malformated aac bitstream. %s' + '%s: malformed AAC bitstream detected. %s' % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index f021ea8fd..51256a3fb 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -542,7 +542,7 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor): temp_filename = prepend_extension(filename, 'temp') options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) + self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename) self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) From ed84454d358f3cbfdc43dab31328b165f9c72c68 Mon Sep 17 00:00:00 2001 From: Santiago Calcagno <santicalcagno@gmail.com> Date: Tue, 13 Jun 2017 12:32:04 -0300 Subject: [PATCH 0020/2417] [egghead:course] Fix extraction --- youtube_dl/extractor/egghead.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index db921465e..01fcdb6cf 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -22,18 +20,18 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) + api_url = 'https://egghead.io/api/v1/series/' + playlist_id + course = self._download_json(api_url, playlist_id) + title = course.get('title') + description = course.get('description') - title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title') - ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list') - - found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul) - entries = [self.url_result(m) for m in found] + lessons = course.get('lessons') + entries = [{'_type': 'url', 'ie_key': 'Wistia', 'url': 'wistia:' + l.get('wistia_id')} for l in lessons] return { '_type': 'playlist', 'id': playlist_id, 'title': title, - 'description': self._og_search_description(webpage), + 'description': description, 'entries': entries, } From 485cb375766df8f2ef79b7fe2915ead4ef61a01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:28:42 +0700 Subject: [PATCH 0021/2417] [egghead:course] Improve (closes #13370) --- youtube_dl/extractor/egghead.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index 01fcdb6cf..c86f52319 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class EggheadCourseIE(InfoExtractor): IE_DESC = 'egghead.io course' IE_NAME = 'egghead:course' - _VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)' + _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'playlist_count': 29, @@ -20,18 +20,16 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - api_url = 'https://egghead.io/api/v1/series/' + playlist_id - course = self._download_json(api_url, playlist_id) - title = course.get('title') - description = course.get('description') - lessons = course.get('lessons') - entries = [{'_type': 'url', 'ie_key': 'Wistia', 'url': 'wistia:' + l.get('wistia_id')} for l in lessons] + course = self._download_json( + 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id) - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': title, - 'description': description, - 'entries': entries, - } + entries = [ + self.url_result( + 'wistia:%s' % lesson['wistia_id'], ie='Wistia', + video_id=lesson['wistia_id'], video_title=lesson.get('title')) + for lesson in course['lessons'] if lesson.get('wistia_id')] + + return self.playlist_result( + entries, playlist_id, course.get('title'), + course.get('description')) From 58179eb7d96ebef26a0083e80a2022fab4ca1558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 17:55:40 +0700 Subject: [PATCH 0022/2417] [abc.net.au:iview] Extract more formats (closes #13492, closes #13489) --- youtube_dl/extractor/abc.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 0247cabf9..60f753b95 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -3,11 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, js_to_json, int_or_none, parse_iso8601, + try_get, ) @@ -124,7 +126,20 @@ class ABCIViewIE(InfoExtractor): title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') - formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) + format_urls = [ + try_get(stream, lambda x: x['hds-unmetered'], compat_str)] + + # May have higher quality video + sd_url = try_get( + stream, lambda x: x['streams']['hds']['sd'], compat_str) + if sd_url: + format_urls.append(sd_url.replace('metered', 'um')) + + formats = [] + for format_url in format_urls: + if format_url: + formats.extend( + self._extract_akamai_formats(format_url, video_id)) self._sort_formats(formats) subtitles = {} From 256a746d21634eccad07a1e6dcafedcdf8b6181b Mon Sep 17 00:00:00 2001 From: luboss <lubos.katrinec@gmail.com> Date: Fri, 2 Jun 2017 22:44:39 +0200 Subject: [PATCH 0023/2417] [joj] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/joj.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 youtube_dl/extractor/joj.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4524fa687..9ee080895 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -470,6 +470,7 @@ from .jamendo import ( ) from .jeuxvideo import JeuxVideoIE from .jove import JoveIE +from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py new file mode 100755 index 000000000..2ebfec902 --- /dev/null +++ b/youtube_dl/extractor/joj.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + + +class JojIE(InfoExtractor): + _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P<title_query>(?P<release_date>[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _TESTS = [{ + 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa + 'info_dict': { + 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', + 'ext': 'mp4', + 'title': 'Nové Bývanie', + 'release_date': '20170528' + } + }, { + 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', + 'info_dict': { + 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', + 'ext': 'mp4', + 'title': 'Starí Rodičia', + 'release_date': '20160906' + } + }] + + media_src_url = 'http://n16.joj.sk/storage/' + xml_source_url = 'https://media.joj.sk/services/Video.php?clip=' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + release_date = mobj.group('release_date').replace('-', '') + webpage = self._download_webpage(url, 'id') + video_id = self._html_search_regex( + r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P<video_id>[a-f0-9\-]+)', + webpage, 'id', group='video_id') + xml_playlist_url = self.xml_source_url + video_id + xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist') + formats = [] + for file_el in xml_playlist_et.findall('files/file'): + try: + height = int(file_el.attrib['id'].replace('p', '')) + except ValueError: + height = 0 + formats.append({'height': height, + 'url': self.media_src_url + file_el.attrib['path'].replace( # noqa + 'dat/', '', 1)}) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage).title(), + 'formats': formats, + 'release_date': release_date + } From 73cf76a93fe48240bf82b1685b1403f05b793ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 19:05:18 +0700 Subject: [PATCH 0024/2417] [joj] Rewrite and add support for generic embeds (closes #13268) --- youtube_dl/extractor/generic.py | 17 +++++ youtube_dl/extractor/joj.py | 108 ++++++++++++++++++++++---------- 2 files changed, 93 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 919f4f987..f2c577f98 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -91,6 +91,7 @@ from .anvato import AnvatoIE from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE from .mediaset import MediasetIE +from .joj import JojIE class GenericIE(InfoExtractor): @@ -1770,6 +1771,16 @@ class GenericIE(InfoExtractor): }, 'add_ie': [MediasetIE.ie_key()], }, + { + # JOJ.sk embeds + 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok', + 'info_dict': { + 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok', + 'title': 'Slovenskom sa prehnala vlna silných búrok', + }, + 'playlist_mincount': 5, + 'add_ie': [JojIE.ie_key()], + }, { # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video) 'url': 'https://tvrain.ru/amp/418921/', @@ -2722,6 +2733,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) + # Look for JOJ.sk embeds + joj_urls = JojIE._extract_urls(webpage) + if joj_urls: + return self.playlist_from_matches( + joj_urls, video_id, video_title, ie=JojIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py index 2ebfec902..a764023e9 100755 --- a/youtube_dl/extractor/joj.py +++ b/youtube_dl/extractor/joj.py @@ -1,56 +1,100 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor import re +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + js_to_json, + try_get, +) + class JojIE(InfoExtractor): - _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P<title_query>(?P<release_date>[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _VALID_URL = r'''(?x) + (?: + joj:| + https?://media\.joj\.sk/embed/ + ) + (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}) + ''' _TESTS = [{ - 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa + 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932', 'info_dict': { 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 'ext': 'mp4', - 'title': 'Nové Bývanie', - 'release_date': '20170528' + 'title': 'NOVÉ BÝVANIE', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3118, } }, { - 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', - 'info_dict': { - 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', - 'ext': 'mp4', - 'title': 'Starí Rodičia', - 'release_date': '20160906' - } + 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932', + 'only_matching': True, }] - media_src_url = 'http://n16.joj.sk/storage/' - xml_source_url = 'https://media.joj.sk/services/Video.php?clip=' + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', + webpage) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - release_date = mobj.group('release_date').replace('-', '') - webpage = self._download_webpage(url, 'id') - video_id = self._html_search_regex( - r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P<video_id>[a-f0-9\-]+)', - webpage, 'id', group='video_id') - xml_playlist_url = self.xml_source_url + video_id - xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist') + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://media.joj.sk/embed/%s' % video_id, video_id) + + title = self._search_regex( + (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<title>(?P<title>[^<]+)'), webpage, 'title', + default=None, group='title') or self._og_search_title(webpage) + + bitrates = self._parse_json( + self._search_regex( + r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + formats = [] - for file_el in xml_playlist_et.findall('files/file'): - try: - height = int(file_el.attrib['id'].replace('p', '')) - except ValueError: - height = 0 - formats.append({'height': height, - 'url': self.media_src_url + file_el.attrib['path'].replace( # noqa - 'dat/', '', 1)}) + for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: + if isinstance(format_url, compat_str): + height = self._search_regex( + r'(\d+)[pP]\.', format_url, 'height', default=None) + formats.append({ + 'url': format_url, + 'format_id': '%sp' % height if height else None, + 'height': int(height), + }) + if not formats: + playlist = self._download_xml( + 'https://media.joj.sk/services/Video.php?clip=%s' % video_id, + video_id) + for file_el in playlist.findall('./files/file'): + path = file_el.get('path') + if not path: + continue + format_id = file_el.get('id') or file_el.get('label') + formats.append({ + 'url': 'http://n16.joj.sk/storage/%s' % path.replace( + 'dat/', '', 1), + 'format_id': format_id, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', format_id or path, 'height', + default=None)), + }) self._sort_formats(formats) + thumbnail = self._og_search_thumbnail(webpage) + + duration = int_or_none(self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + return { 'id': video_id, - 'title': self._og_search_title(webpage).title(), + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats, - 'release_date': release_date } From 6e925598d68f5d5216aa3e9abed5c7706a68c891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 19:15:48 +0700 Subject: [PATCH 0025/2417] [csjw] Add coding cookie --- youtube_dl/extractor/cjsw.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py index dd271586f..505bdbe16 100644 --- a/youtube_dl/extractor/cjsw.py +++ b/youtube_dl/extractor/cjsw.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re From 71a1db89198100a0e9bc5099aeed622264690203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 20:06:24 +0700 Subject: [PATCH 0026/2417] [dailymail] Add support for embeds --- youtube_dl/extractor/dailymail.py | 17 ++++++++++++++--- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py index 538565c66..af3978035 100644 --- a/youtube_dl/extractor/dailymail.py +++ b/youtube_dl/extractor/dailymail.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -12,8 +14,8 @@ from ..utils import ( class DailyMailIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html', 'md5': 'f6129624562251f628296c3a9ffde124', 'info_dict': { @@ -22,7 +24,16 @@ class DailyMailIE(InfoExtractor): 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', } - } + }, { + 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)', + webpage) def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f2c577f98..5e8890d41 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -57,6 +57,7 @@ from .dailymotion import ( DailymotionIE, DailymotionCloudIE, ) +from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE from .mtv import MTVServicesEmbeddedIE @@ -760,6 +761,20 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Dailymotion'], }, + # DailyMail embed + { + 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot', + 'info_dict': { + 'id': '1495629', + 'ext': 'mp4', + 'title': 'Care worker punches elderly dementia patient in head 11 times', + 'description': 'md5:3a743dee84e57e48ec68bf67113199a5', + }, + 'add_ie': ['DailyMail'], + 'params': { + 'skip_download': True, + }, + }, # YouTube embed { 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', @@ -2190,6 +2205,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) + # Look for DailyMail embeds + dailymail_urls = DailyMailIE._extract_urls(webpage) + if dailymail_urls: + return self.playlist_from_matches( + dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) + # Look for embedded Wistia player wistia_url = WistiaIE._extract_url(webpage) if wistia_url: From 207acd8465b51d9d00d2bdda22f10858eb7f1bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 20:15:15 +0700 Subject: [PATCH 0027/2417] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5d07c12cb..edfde8b6f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,27 @@ version <unreleased> +Core ++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries ++ [utils] Support attributes with no values in get_elements_by_attribute + Extractors ++ [dailymail] Add support for embeds ++ [joj] Add support for joj.sk (#13268) +* [abc.net.au:iview] Extract more formats (#13492, #13489) +* [egghead:course] Fix extraction (#6635, #13370) ++ [cjsw] Add support for cjsw.com (#13525) ++ [eagleplatform] Add support for referrer protected videos (#13557) ++ [eagleplatform] Add support for another embed pattern (#13557) +* [veoh] Extend URL regular expression (#13601) +* [npo:live] Fix live stream id extraction (#13568, #13605) +* [googledrive] Fix height extraction (#13603) ++ [dailymotion] Add support for new layout (#13580) - [yam] Remove extractor +* [xhamster] Extract all formats and fix duration extraction (#13593) ++ [xhamster] Add support for new URL schema (#13593) +* [espn] Extend URL regular expression (#13244, #13549) +* [kaltura] Fix typo in subtitles extraction (#13569) +* [vier] Adapt extraction to redesign (#13575) version 2017.07.02 From 65c416dda896f8a0023f01547e6b707dd57ed30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Jul 2017 20:16:38 +0700 Subject: [PATCH 0028/2417] release 2017.07.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 974603507..c4314855d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.02 +[debug] youtube-dl version 2017.07.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index edfde8b6f..c379cae71 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.07.09 Core + [extractor/common] Add support for AMP tags in _parse_html5_media_entries diff --git a/docs/supportedsites.md b/docs/supportedsites.md index db2e2bac9..b6a147faf 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -154,6 +154,7 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** + - **CJSW** - **Clipfish** - **cliphunter** - **ClipRs** @@ -369,6 +370,7 @@ - **Jamendo** - **JamendoAlbum** - **JeuxVideo** + - **Joj** - **Jove** - **jpopsuki.tv** - **JWPlatform** @@ -996,7 +998,6 @@ - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - - **Yam**: 蕃薯藤yam天空部落 - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0db974f97..14358a74c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.02' +__version__ = '2017.07.09' From 7bf539edcc3dc44481d5196fd01637698653ffc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 10 Jul 2017 00:14:41 +0700 Subject: [PATCH 0029/2417] [eagleplatform] Fix test --- youtube_dl/extractor/eagleplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 34891a362..42789278e 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -53,7 +53,7 @@ class EaglePlatformIE(InfoExtractor): 'skip': 'Georestricted', }, { # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'tvrainru.media.eagleplatform.com:582306', + 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', 'only_matching': True, }] From b71c18b4343d54ce8373e9a11df882aca1ae82a0 Mon Sep 17 00:00:00 2001 From: coreynicholson <coreynicholson@users.noreply.github.com> Date: Sun, 9 Jul 2017 22:24:04 +0100 Subject: [PATCH 0030/2417] [vlive:playlist] Add extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/vlive.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9ee080895..eb1541729 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1206,7 +1206,8 @@ from .vk import ( ) from .vlive import ( VLiveIE, - VLiveChannelIE + VLiveChannelIE, + VLivePlaylistIE ) from .vodlocker import VodlockerIE from .vodpl import VODPlIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index e58940607..f3825db5c 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -49,6 +49,10 @@ class VLiveIE(InfoExtractor): }, }] + @classmethod + def suitable(cls, url): + return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) + def _real_extract(self, url): video_id = self._match_id(url) @@ -261,3 +265,55 @@ class VLiveChannelIE(InfoExtractor): return self.playlist_result( entries, channel_code, channel_name) + + +class VLivePlaylistIE(InfoExtractor): + IE_NAME = 'vlive:playlist' + _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.vlive.tv/video/22867/playlist/22912', + 'info_dict': { + 'id': '22912', + 'title': 'Valentine Day Message from TWICE' + }, + 'playlist_mincount': 9 + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + video_id_match = re.match(self._VALID_URL, url) + assert video_id_match + video_id = compat_str(video_id_match.group('video_id')) + + VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' + if self._downloader.params.get('noplaylist'): + self.to_screen( + 'Downloading just video %s because of --no-playlist' % video_id) + return self.url_result( + VIDEO_URL_TEMPLATE % video_id, + ie=VLiveIE.ie_key(), video_id=video_id) + + self.to_screen( + 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + + webpage = self._download_webpage( + 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) + + playlist_name = self._html_search_regex( + r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', + webpage, 'playlist name', fatal=False) + + item_ids = self._search_regex( + r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])', + webpage, 'playlist item ids') + + entries = [] + for item_id in self._parse_json(item_ids, playlist_id): + item_id = compat_str(item_id) + entries.append( + self.url_result( + VIDEO_URL_TEMPLATE % item_id, + ie=VLiveIE.ie_key(), video_id=item_id)) + + return self.playlist_result( + entries, playlist_id, playlist_name) From e3cd1fcdd177613acae4198cafbff51fbbb912c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 10 Jul 2017 04:32:24 +0700 Subject: [PATCH 0031/2417] [vlive:playlist] Relax and simplify --- youtube_dl/extractor/vlive.py | 41 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index f3825db5c..77c120a57 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -280,10 +280,8 @@ class VLivePlaylistIE(InfoExtractor): } def _real_extract(self, url): - playlist_id = self._match_id(url) - video_id_match = re.match(self._VALID_URL, url) - assert video_id_match - video_id = compat_str(video_id_match.group('video_id')) + mobj = re.match(self._VALID_URL, url) + video_id, playlist_id = mobj.group('video_id', 'id') VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' if self._downloader.params.get('noplaylist'): @@ -294,26 +292,27 @@ class VLivePlaylistIE(InfoExtractor): ie=VLiveIE.ie_key(), video_id=video_id) self.to_screen( - 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + 'Downloading playlist %s - add --no-playlist to just download video' + % playlist_id) webpage = self._download_webpage( - 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) + 'http://www.vlive.tv/video/%s/playlist/%s' + % (video_id, playlist_id), playlist_id) + + item_ids = self._parse_json( + self._search_regex( + r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, + 'playlist video seqs'), + playlist_id) + + entries = [ + self.url_result( + VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), + video_id=compat_str(item_id)) + for item_id in item_ids] playlist_name = self._html_search_regex( r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', - webpage, 'playlist name', fatal=False) + webpage, 'playlist title', fatal=False) - item_ids = self._search_regex( - r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])', - webpage, 'playlist item ids') - - entries = [] - for item_id in self._parse_json(item_ids, playlist_id): - item_id = compat_str(item_id) - entries.append( - self.url_result( - VIDEO_URL_TEMPLATE % item_id, - ie=VLiveIE.ie_key(), video_id=item_id)) - - return self.playlist_result( - entries, playlist_id, playlist_name) + return self.playlist_result(entries, playlist_id, playlist_name) From c3c94ca4a40504147fce387ffb7eb9cb43233550 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 10 Jul 2017 21:34:27 +0800 Subject: [PATCH 0032/2417] [giantbomb] Extract m3u8 formats (closes #13626) --- ChangeLog | 6 ++++++ youtube_dl/extractor/giantbomb.py | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index c379cae71..a37d621a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [giantbomb] Extract m3u8 formats (#13626) + + version 2017.07.09 Core diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py index 29b684d35..6a1b1e96e 100644 --- a/youtube_dl/extractor/giantbomb.py +++ b/youtube_dl/extractor/giantbomb.py @@ -5,9 +5,10 @@ import json from .common import InfoExtractor from ..utils import ( - unescapeHTML, - qualities, + determine_ext, int_or_none, + qualities, + unescapeHTML, ) @@ -15,7 +16,7 @@ class GiantBombIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)' _TEST = { 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/', - 'md5': '57badeface303ecf6b98b812de1b9018', + 'md5': 'c8ea694254a59246a42831155dec57ac', 'info_dict': { 'id': '2300-9782', 'display_id': 'quick-look-destiny-the-dark-below', @@ -51,11 +52,16 @@ class GiantBombIE(InfoExtractor): for format_id, video_url in video['videoStreams'].items(): if format_id == 'f4m_stream': continue - if video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id) if f4m_formats: f4m_formats[0]['quality'] = quality(format_id) formats.extend(f4m_formats) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, display_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) else: formats.append({ 'url': video_url, From bb13949197458fc6bd888bbe9255c391927a997b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 7 Jun 2017 14:47:25 +0800 Subject: [PATCH 0033/2417] [niconico] Check login errors (#12486) --- youtube_dl/extractor/niconico.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index f268a72d5..695e32e59 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -1,23 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json import datetime from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, int_or_none, parse_duration, parse_iso8601, - sanitized_Request, - xpath_text, - determine_ext, urlencode_postdata, + xpath_text, ) @@ -101,19 +100,24 @@ class NiconicoIE(InfoExtractor): return True # Log in + login_ok = True login_form_strs = { - 'mail': username, + 'mail_tel': username, 'password': password, } - login_data = urlencode_postdata(login_form_strs) - request = sanitized_Request( - 'https://secure.nicovideo.jp/secure/login', login_data) - login_results = self._download_webpage( - request, None, note='Logging in', errnote='Unable to log in') - if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: + urlh = self._request_webpage( + 'https://account.nicovideo.jp/api/v1/login', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.geturl()) + if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': + login_ok = False + if not login_ok: self._downloader.report_warning('unable to log in: bad username or password') - return False - return True + return login_ok def _real_extract(self, url): video_id = self._match_id(url) From 708f6f511e1d03363c9dbac7ed42cbdebfbc7718 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 11 Jul 2017 15:04:45 +0800 Subject: [PATCH 0034/2417] [niconico] Fix authentication error handling (closes #12486) --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index a37d621a9..db1776dce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) From 2edfd745df4b7a764d7456e7536e47ef140de24d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 4 Jun 2017 00:41:55 +0800 Subject: [PATCH 0035/2417] [twitter] Extract mp4 urls via mobile API (closes #12726) --- youtube_dl/extractor/twitter.py | 123 +++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 37e3bc412..2041b4199 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -7,11 +7,13 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( determine_ext, - float_or_none, - xpath_text, - remove_end, - int_or_none, + dict_get, ExtractorError, + float_or_none, + int_or_none, + remove_end, + try_get, + xpath_text, ) from .periscope import PeriscopeIE @@ -22,6 +24,15 @@ class TwitterBaseIE(InfoExtractor): vmap_data = self._download_xml(vmap_url, video_id) return xpath_text(vmap_data, './/MediaFile').strip() + @staticmethod + def _search_dimensions_in_video_url(a_format, video_url): + m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) + if m: + a_format.update({ + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' @@ -90,6 +101,59 @@ class TwitterCardIE(TwitterBaseIE): }, ] + def _parse_media_info(self, media_info, video_id): + formats = [] + for media_variant in media_info.get('variants', []): + media_url = media_variant['url'] + if media_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls')) + elif media_url.endswith('.mpd'): + formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) + else: + vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000) + a_format = { + 'url': media_url, + 'format_id': 'http-%d' % vbr if vbr else 'http', + 'vbr': vbr, + } + # Reported bitRate may be zero + if not a_format['vbr']: + del a_format['vbr'] + + self._search_dimensions_in_video_url(a_format, media_url) + + formats.append(a_format) + return formats + + def _extract_mobile_formats(self, username, video_id): + webpage = self._download_webpage( + 'https://mobile.twitter.com/%s/status/%s' % (username, video_id), + video_id, 'Downloading mobile webpage', + headers={ + # A recent mobile UA is necessary for `gt` cookie + 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0', + }) + main_script_url = self._html_search_regex( + r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL') + main_script = self._download_webpage( + main_script_url, video_id, 'Downloading main script') + bearer_token = self._search_regex( + r'BEARER_TOKEN\s*:\s*"([^"]+)"', + main_script, 'bearer token') + guest_token = self._search_regex( + r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)', + webpage, 'guest token') + api_data = self._download_json( + 'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id, + video_id, 'Downloading mobile API data', + headers={ + 'Authorization': 'Bearer ' + bearer_token, + 'x-guest-token': guest_token, + }) + media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id] + ['extended_entities']['media'][0]['video_info']) or {} + return self._parse_media_info(media_info, video_id) + def _real_extract(self, url): video_id = self._match_id(url) @@ -117,14 +181,6 @@ class TwitterCardIE(TwitterBaseIE): if periscope_url: return self.url_result(periscope_url, PeriscopeIE.ie_key()) - def _search_dimensions_in_video_url(a_format, video_url): - m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) - if m: - a_format.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') if video_url: @@ -135,7 +191,7 @@ class TwitterCardIE(TwitterBaseIE): 'url': video_url, } - _search_dimensions_in_video_url(f, video_url) + self._search_dimensions_in_video_url(f, video_url) formats.append(f) @@ -152,29 +208,14 @@ class TwitterCardIE(TwitterBaseIE): media_info = entity['mediaInfo'] if media_info: - for media_variant in media_info['variants']: - media_url = media_variant['url'] - if media_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls')) - elif media_url.endswith('.mpd'): - formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash')) - else: - vbr = int_or_none(media_variant.get('bitRate'), scale=1000) - a_format = { - 'url': media_url, - 'format_id': 'http-%d' % vbr if vbr else 'http', - 'vbr': vbr, - } - # Reported bitRate may be zero - if not a_format['vbr']: - del a_format['vbr'] - - _search_dimensions_in_video_url(a_format, media_url) - - formats.append(a_format) - + formats.extend(self._parse_media_info(media_info, video_id)) duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9) + username = config.get('user', {}).get('screen_name') + if username: + formats.extend(self._extract_mobile_formats(username, video_id)) + + self._remove_duplicate_formats(formats) self._sort_formats(formats) title = self._search_regex(r'<title>([^<]+)', webpage, 'title') @@ -301,6 +342,20 @@ class TwitterIE(InfoExtractor): 'timestamp': 1474613214, }, 'add_ie': ['Periscope'], + }, { + # has mp4 formats via mobile API + 'url': 'https://twitter.com/news_al3alm/status/852138619213144067', + 'info_dict': { + 'id': '852138619213144067', + 'ext': 'mp4', + 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', + 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', + 'uploader': 'عالم الأخبار', + 'uploader_id': 'news_al3alm', + }, + 'params': { + 'format': 'best[format_id^=http-]', + }, }] def _real_extract(self, url): From 7f176ac4775aed5e5a72d2f0c4b579d1b886d419 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 15:35:19 +0800 Subject: [PATCH 0036/2417] [periscope] Support pscp.tv URLs in embedded frames And fix a relevant twitter test --- ChangeLog | 1 + youtube_dl/extractor/periscope.py | 2 +- youtube_dl/extractor/twitter.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index db1776dce..ffb4b6994 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [periscope] Support pscp.tv URLs in embedded frames * [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 1add6b840..bfa12edc9 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -49,7 +49,7 @@ class PeriscopeIE(PeriscopeBaseIE): @staticmethod def _extract_url(webpage): mobj = re.search( - r']+src=([\'"])(?P(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage) + r']+src=([\'"])(?P(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage) if mobj: return mobj.group('url') diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 2041b4199..e4bc7e012 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -335,10 +335,11 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '1zqKVVlkqLaKB', 'ext': 'mp4', - 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', + 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence', + 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"', 'upload_date': '20160923', 'uploader_id': 'OPP_HSD', - 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', + 'uploader': 'Sgt Kerry Schmidt', 'timestamp': 1474613214, }, 'add_ie': ['Periscope'], From 9be31e771cd9481ea690c01eed398645deadc1de Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 15:48:34 +0800 Subject: [PATCH 0037/2417] [twitter] Support HLS streams in vmap URLs --- ChangeLog | 1 + youtube_dl/extractor/twitter.py | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index ffb4b6994..8e6451144 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [twitter] Support HLS streams in vmap URLs + [periscope] Support pscp.tv URLs in embedded frames * [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index e4bc7e012..89eabe77e 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -20,9 +20,16 @@ from .periscope import PeriscopeIE class TwitterBaseIE(InfoExtractor): - def _get_vmap_video_url(self, vmap_url, video_id): + def _extract_formats_from_vmap_url(self, vmap_url, video_id): vmap_data = self._download_xml(vmap_url, video_id) - return xpath_text(vmap_data, './/MediaFile').strip() + video_url = xpath_text(vmap_data, './/MediaFile').strip() + if determine_ext(video_url) == 'm3u8': + return self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls', + entry_protocol='m3u8_native') + return [{ + 'url': video_url, + }] @staticmethod def _search_dimensions_in_video_url(a_format, video_url): @@ -197,9 +204,8 @@ class TwitterCardIE(TwitterBaseIE): vmap_url = config.get('vmapUrl') or config.get('vmap_url') if vmap_url: - formats.append({ - 'url': self._get_vmap_video_url(vmap_url, video_id), - }) + formats.extend( + self._extract_formats_from_vmap_url(vmap_url, video_id)) media_info = None @@ -449,7 +455,7 @@ class TwitterAmplifyIE(TwitterBaseIE): vmap_url = self._html_search_meta( 'twitter:amplify:vmap', webpage, 'vmap url') - video_url = self._get_vmap_video_url(vmap_url, video_id) + formats = self._extract_formats_from_vmap_url(vmap_url, video_id) thumbnails = [] thumbnail = self._html_search_meta( @@ -471,11 +477,10 @@ class TwitterAmplifyIE(TwitterBaseIE): }) video_w, video_h = _find_dimension('player') - formats = [{ - 'url': video_url, + formats[0].update({ 'width': video_w, 'height': video_h, - }] + }) return { 'id': video_id, From e8f20ffa032a791548a66bb7b694c424673537e6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 16:05:15 +0800 Subject: [PATCH 0038/2417] [vine] Make sure the title won't be empty And fix a relevant TwitterCard test case --- ChangeLog | 1 + youtube_dl/extractor/twitter.py | 1 + youtube_dl/extractor/vine.py | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e6451144..a5de3c232 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [vine] Make sure the title won't be empty + [twitter] Support HLS streams in vmap URLs + [periscope] Support pscp.tv URLs in embedded frames * [niconico] Fix authentication error handling (#12486) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 89eabe77e..2ff55412b 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -91,6 +91,7 @@ class TwitterCardIE(TwitterBaseIE): 'uploader_id': '1189339351084113920', 'uploader': 'ArsenalTerje', 'title': 'Vine by ArsenalTerje', + 'timestamp': 1447451307, }, 'add_ie': ['Vine'], }, { diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 4957a07f7..46950d3a1 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -92,10 +92,12 @@ class VineIE(InfoExtractor): username = data.get('username') + alt_title = 'Vine by %s' % username if username else None + return { 'id': video_id, - 'title': data.get('description'), - 'alt_title': 'Vine by %s' % username if username else None, + 'title': data.get('description') or alt_title or 'Vine video', + 'alt_title': alt_title, 'thumbnail': data.get('thumbnailUrl'), 'timestamp': unified_timestamp(data.get('created')), 'uploader': username, From 3615bfe1b4b97cb35ddd63c455160a50c2a10961 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Jul 2017 16:46:37 +0800 Subject: [PATCH 0039/2417] [twitter] Fix remaining tests --- youtube_dl/extractor/twitter.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 2ff55412b..6eaf360a6 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -54,7 +54,8 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Twitter Card', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 30.033, - } + }, + 'skip': 'Video gone', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', @@ -66,6 +67,7 @@ class TwitterCardIE(TwitterBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'duration': 80.155, }, + 'skip': 'Video gone', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', @@ -83,7 +85,7 @@ class TwitterCardIE(TwitterBaseIE): }, { 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568', - 'md5': 'ab2745d0b0ce53319a534fccaa986439', + 'md5': '6dabeaca9e68cbb71c99c322a4b42a11', 'info_dict': { 'id': 'iBb2x00UVlv', 'ext': 'mp4', @@ -96,12 +98,12 @@ class TwitterCardIE(TwitterBaseIE): 'add_ie': ['Vine'], }, { 'url': 'https://twitter.com/i/videos/tweet/705235433198714880', - 'md5': '3846d0a07109b5ab622425449b59049d', + 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88', 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', 'title': 'Twitter web player', - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*', }, }, { 'url': 'https://twitter.com/i/videos/752274308186120192', @@ -303,10 +305,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'JG', + 'uploader': 'Donte', 'uploader_id': 'jaydingeer', }, 'params': { @@ -318,9 +320,11 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': 'MIOxnrUteUd', 'ext': 'mp4', - 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン', - 'uploader': 'TAKUMA', - 'uploader_id': '1004126642786242560', + 'title': 'FilmDrunk - Vine of the day', + 'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', + 'uploader': 'FilmDrunk', + 'uploader_id': 'Filmdrunk', + 'timestamp': 1402826626, 'upload_date': '20140615', }, 'add_ie': ['Vine'], From f2bb33a9868e499f4582fee24a7b67d559d33575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Jul 2017 21:36:45 +0700 Subject: [PATCH 0040/2417] [ted] Fix subtitles extraction (closes #13628, closes #13629) --- youtube_dl/extractor/ted.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index f27d0e313..06a27fd04 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -271,20 +271,22 @@ class TEDIE(InfoExtractor): } def _get_subtitles(self, video_id, talk_info): - languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] - if languages: - sub_lang_list = {} - for l in languages: - sub_lang_list[l] = [ - { - 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext), - 'ext': ext, - } - for ext in ['ted', 'srt'] - ] - return sub_lang_list - else: - return {} + sub_lang_list = {} + for language in try_get( + talk_info, + (lambda x: x['downloads']['languages'], + lambda x: x['languages']), list): + lang_code = language.get('languageCode') or language.get('ianaCode') + if not lang_code: + continue + sub_lang_list[lang_code] = [ + { + 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, lang_code, ext), + 'ext': ext, + } + for ext in ['ted', 'srt'] + ] + return sub_lang_list def _watch_info(self, url, name): webpage = self._download_webpage(url, name) From 9a0942ad55bba714d6eaeb9ee4f66a138ec85e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Jul 2017 22:59:36 +0700 Subject: [PATCH 0041/2417] [drtv] Make HLS and HDS extraction non fatal --- youtube_dl/extractor/drtv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c84624f1e..69effba58 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -118,7 +118,7 @@ class DRTVIE(InfoExtractor): if target == 'HDS': f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', - video_id, preference, f4m_id=format_id) + video_id, preference, f4m_id=format_id, fatal=False) if kind == 'AudioResource': for f in f4m_formats: f['vcodec'] = 'none' @@ -126,7 +126,8 @@ class DRTVIE(InfoExtractor): elif target == 'HLS': formats.extend(self._extract_m3u8_formats( uri, video_id, 'mp4', entry_protocol='m3u8_native', - preference=preference, m3u8_id=format_id)) + preference=preference, m3u8_id=format_id, + fatal=False)) else: bitrate = link.get('Bitrate') if bitrate: From 15da37c7dc8cf14ba5ce880aa1805fceaa71fc44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jul 2017 00:40:54 +0700 Subject: [PATCH 0042/2417] [YoutubeDL] Don't expand env variables in meta fields (closes #13637) --- test/test_YoutubeDL.py | 6 ++++++ youtube_dl/YoutubeDL.py | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 75945e38f..70989e232 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -527,6 +527,8 @@ class TestYoutubeDL(unittest.TestCase): 'ext': 'mp4', 'width': None, 'height': 1080, + 'title1': '$PATH', + 'title2': '%PATH%', } def fname(templ): @@ -545,10 +547,14 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%%'), '%') + self.assertEqual(fname('%%%%'), '%%') self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') + self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') + self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') def test_format_note(self): ydl = YoutubeDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 60ee4b7d8..8730d32ef 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -20,6 +20,7 @@ import re import shutil import subprocess import socket +import string import sys import time import tokenize @@ -674,7 +675,19 @@ class YoutubeDL(object): FORMAT_RE.format(numeric_field), r'%({0})s'.format(numeric_field), outtmpl) - filename = expand_path(outtmpl % template_dict) + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)]) + outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + filename = expand_path(outtmpl).replace(sep, '') % template_dict + # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding # to workaround encoding issues with subprocess on python2 @ Windows From f354d8480700c5e6f288bfce497a363b4c6f0859 Mon Sep 17 00:00:00 2001 From: rrooij Date: Fri, 14 Jul 2017 17:10:17 +0200 Subject: [PATCH 0043/2417] [5tv] Add another video URL pattern (closes #13354) --- youtube_dl/extractor/fivetv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 15736c9fe..9f9863746 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -43,7 +43,7 @@ class FiveTVIE(InfoExtractor): 'info_dict': { 'id': 'glavnoe', 'ext': 'mp4', - 'title': 'Итоги недели с 8 по 14 июня 2015 года', + 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -70,7 +70,8 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r']+?href="([^"]+)"[^>]+?class="videoplayer"', + [r']+?class="flowplayer[^>]+?data-href="([^"]+)"', + r']+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') title = self._og_search_title(webpage, default=None) or self._search_regex( From 00dbdfc1f741b919a0add36394065ce1aeccfda8 Mon Sep 17 00:00:00 2001 From: satunnainen Date: Fri, 14 Jul 2017 18:11:07 +0300 Subject: [PATCH 0044/2417] [slideshare] Fix extraction --- youtube_dl/extractor/slideshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 74a1dc672..e89ebebe7 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -31,7 +31,7 @@ class SlideshareIE(InfoExtractor): page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) slideshare_obj = self._search_regex( - r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);', + r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);', webpage, 'slideshare object') info = json.loads(slideshare_obj) if info['slideshow']['type'] != 'video': From 7d02dcfaa2589453ee3cc6c88ee27f04c252f8a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jul 2017 22:37:04 +0700 Subject: [PATCH 0045/2417] [youtube] Don't capture YouTube Red ad for creator meta field (closes #13621) --- youtube_dl/extractor/youtube.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 77cd271ef..4597ccb3a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -673,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + # YouTube Red ad is not captured for creator { 'url': '__2ABJjxzNo', 'info_dict': { @@ -1649,7 +1650,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_webpage, 'license', default=None) m_music = re.search( - r']+class="title"[^>]*>\s*Music\s*\s*]*>\s*
  • (?P.+?) by (?P<creator>.+?)(?:\(.+?\))?</li', + r'''(?x) + <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s* + <ul[^>]*>\s* + <li>(?P<title>.+?) + by (?P<creator>.+?) + (?: + \(.+?\)| + <a[^>]* + (?: + \bhref=["\']/red[^>]*>| # drop possible + >\s*Listen ad-free with YouTube Red # YouTube Red ad + ) + .*? + )?</li + ''', video_webpage) if m_music: video_alt_title = remove_quotes(unescapeHTML(m_music.group('title'))) From 2583c0b54e56f6dbce85a079d91a05e9b13c2dce Mon Sep 17 00:00:00 2001 From: Robin Neatherway <robin.neatherway@gmail.com> Date: Fri, 14 Jul 2017 17:08:32 +0100 Subject: [PATCH 0046/2417] Fix bugs caused by typos --- youtube_dl/downloader/ism.py | 3 +-- youtube_dl/extractor/audioboom.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/karrierevideos.py | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 5f6f9faef..9b001ecff 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -98,7 +98,7 @@ def write_piff_header(stream, params): if is_audio: smhd_payload = s88.pack(0) # balance - smhd_payload = u16.pack(0) # reserved + smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header else: vmhd_payload = u16.pack(0) # graphics mode @@ -126,7 +126,6 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) else: - sample_entry_payload = sample_entry_payload sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index e48bb8972..393f381c6 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor): def from_clip(field): if clip: - clip.get(field) + return clip.get(field) audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( 'audio', webpage, 'audio url') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5e8890d41..8c2ff39d5 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2740,7 +2740,7 @@ class GenericIE(InfoExtractor): rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: return self.playlist_from_matches( - rutube_urls, ie=RutubeIE.ie_key()) + rutube_urls, video_id, video_title, ie=RutubeIE.ie_key()) # Look for WashingtonPost embeds wapo_urls = WashingtonPostIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 4e9eb67bf..f236a2f78 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -48,7 +48,7 @@ class KarriereVideosIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = (self._html_search_meta('title', webpage, default=None) or - self._search_regex(r'<h1 class="title">([^<]+)</h1>')) + self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title')) video_id = self._search_regex( r'/config/video/(.+?)\.xml', webpage, 'video id') From 4e826cd9aec383768a7b25aa3161efd4672f9310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 06:48:12 +0700 Subject: [PATCH 0047/2417] [nexx] Add extractor (closes #10807, closes #13465) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 22 +++ youtube_dl/extractor/nexx.py | 221 +++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 youtube_dl/extractor/nexx.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb1541729..9d34447a9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -653,6 +653,7 @@ from .nextmedia import ( AppleDailyIE, NextTVIE, ) +from .nexx import NexxIE from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8c2ff39d5..123a21296 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -36,6 +36,7 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) +from .nexx import NexxIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -1549,6 +1550,22 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['BrightcoveLegacy'], }, + # Nexx embed + { + 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', + 'info_dict': { + 'id': '247746', + 'ext': 'mp4', + 'title': "Yesterday's Jam (OV)", + 'description': 'md5:09bc0984723fed34e2581624a84e05f0', + 'timestamp': 1492594816, + 'upload_date': '20170419', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, # Facebook <iframe> embed { 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -2133,6 +2150,11 @@ class GenericIE(InfoExtractor): if bc_urls: return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') + # Look for Nexx embeds + nexx_urls = NexxIE._extract_urls(webpage) + if nexx_urls: + return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key()) + # Look for ThePlatform embeds tp_urls = ThePlatformIE._extract_urls(webpage) if tp_urls: diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py new file mode 100644 index 000000000..60b42cb7d --- /dev/null +++ b/youtube_dl/extractor/nexx.py @@ -0,0 +1,221 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import random +import re +import time + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + parse_duration, + try_get, + urlencode_postdata, +) + + +class NexxIE(InfoExtractor): + _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)' + _TESTS = [{ + # movie + 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', + 'md5': '16746bfc28c42049492385c989b26c4a', + 'info_dict': { + 'id': '128907', + 'ext': 'mp4', + 'title': 'Stiftung Warentest', + 'alt_title': 'Wie ein Test abläuft', + 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2', + 'release_year': 2013, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2509, + 'timestamp': 1384264416, + 'upload_date': '20131112', + }, + 'params': { + 'format': 'bestvideo', + }, + }, { + # episode + 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858', + 'info_dict': { + 'id': '247858', + 'ext': 'mp4', + 'title': 'Return of the Golden Child (OV)', + 'description': 'md5:5d969537509a92b733de21bae249dc63', + 'release_year': 2017, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1397, + 'timestamp': 1495033267, + 'upload_date': '20170517', + 'episode_number': 2, + 'season_number': 2, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + # Reference: + # 1. https://nx-s.akamaized.net/files/201510/44.pdf + + entries = [] + + # JavaScript Integration + for domain_id, video_id in re.findall( + r'''(?isx) + <script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(\d+).+? + onPLAYReady.+? + _play\.init\s*\(.+?\s*,\s*(\d+)\s*,\s*.+?\) + ''', webpage): + entries.append('https://api.nexx.cloud/v3/%s/videos/byid/%s' % (domain_id, video_id)) + + # TODO: support more embed formats + + return entries + + def _handle_error(self, response): + status = int_or_none(try_get( + response, lambda x: x['metadata']['status']) or 200) + if 200 <= status < 300: + return + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']), + expected=True) + + def _call_api(self, domain_id, path, video_id, data=None, headers={}): + headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' + result = self._download_json( + 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id, + 'Downloading %s JSON' % path, data=urlencode_postdata(data), + headers=headers) + self._handle_error(result) + return result['result'] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + domain_id, video_id = mobj.group('domain_id', 'id') + + # Reverse engineered from JS code (see getDeviceID function) + device_id = '%d:%d:%d%d' % ( + random.randint(1, 4), int(time.time()), + random.randint(1e4, 99999), random.randint(1, 9)) + + result = self._call_api(domain_id, 'session/init', video_id, data={ + 'nxp_devh': device_id, + 'nxp_userh': '', + 'precid': '0', + 'playlicense': '0', + 'screenx': '1920', + 'screeny': '1080', + 'playerversion': '6.0.00', + 'gateway': 'html5', + 'adGateway': '', + 'explicitlanguage': 'en-US', + 'addTextTemplates': '1', + 'addDomainData': '1', + 'addAdModel': '1', + }, headers={ + 'X-Request-Enable-Auth-Fallback': '1', + }) + + cid = result['general']['cid'] + + # As described in [1] X-Request-Token generation algorithm is + # as follows: + # md5( operation + domain_id + domain_secret ) + # where domain_secret is a static value that will be given by nexx.tv + # as per [1]. Here is how this "secret" is generated (reversed + # from _play.api.init function, search for clienttoken). So it's + # actually not static and not that much of a secret. + # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf + secret = result['device']['clienttoken'][int(device_id[0]):] + secret = secret[0:len(secret) - int(device_id[-1])] + + op = 'byid' + + # Reversed from JS code for _play.api.call function (search for + # X-Request-Token) + request_token = hashlib.md5( + ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() + + video = self._call_api( + domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ + 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', + 'addInteractionOptions': '1', + 'addStatusDetails': '1', + 'addStreamDetails': '1', + 'addCaptions': '1', + 'addScenes': '1', + 'addHotSpots': '1', + 'addBumpers': '1', + 'captionFormat': 'data', + }, headers={ + 'X-Request-CID': cid, + 'X-Request-Token': request_token, + }) + + general = video['general'] + title = general['title'] + + stream_data = video['streamdata'] + language = general.get('language_raw') or '' + + # TODO: reverse more cdns and formats + + cdn = stream_data['cdnType'] + assert cdn == 'azure' + + azure_locator = stream_data['azureLocator'] + + AZURE_URL = 'http://nx-p%02d.akamaized.net/' + + for secure in ('s', ''): + cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper()) + if cdn_shield: + azure_base = 'http%s://%s' % (secure, cdn_shield) + break + else: + azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', '')) + + is_ml = ',' in language + azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % ( + azure_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + + protection_token = try_get( + video, lambda x: x['protectiondata']['token'], compat_str) + if protection_token: + azure_m3u8_url += '?hdnts=%s' % protection_token + + formats = self._extract_m3u8_formats( + azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='%s-hls' % cdn) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'alt_title': general.get('subtitle'), + 'description': general.get('description'), + 'release_year': int_or_none(general.get('year')), + 'creator': general.get('studio') or general.get('studio_adref'), + 'thumbnail': try_get( + video, lambda x: x['imagedata']['thumb'], compat_str), + 'duration': parse_duration(general.get('runtime')), + 'timestamp': int_or_none(general.get('uploaded')), + 'episode_number': int_or_none(try_get( + video, lambda x: x['episodedata']['episode'])), + 'season_number': int_or_none(try_get( + video, lambda x: x['episodedata']['season'])), + 'formats': formats, + } From c7604d79e9993e76845141a61fdf3c308af917e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 06:52:23 +0700 Subject: [PATCH 0048/2417] [spiegeltv] Delegate extraction to nexx (closes #13159) --- youtube_dl/extractor/spiegeltv.py | 113 +++--------------------------- 1 file changed, 8 insertions(+), 105 deletions(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index e1cfb8698..6ccf4c342 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -1,114 +1,17 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse -from ..utils import ( - determine_ext, - float_or_none, -) +from .nexx import NexxIE class SpiegeltvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)' - _TESTS = [{ - 'url': 'http://www.spiegel.tv/filme/flug-mh370/', - 'info_dict': { - 'id': 'flug-mh370', - 'ext': 'm4v', - 'title': 'Flug MH370', - 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', - 'thumbnail': r're:http://.*\.jpg$', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/', + _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)' + _TEST = { + 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/', 'only_matching': True, - }] + } def _real_extract(self, url): - if '/#/' in url: - url = url.replace('/#/', '/') - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title') - - apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com' - version_json = self._download_json( - '%s/version.json' % apihost, video_id, - note='Downloading version information') - version_name = version_json['version_name'] - - slug_json = self._download_json( - '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), - video_id, - note='Downloading object information') - oid = slug_json['object_id'] - - media_json = self._download_json( - '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), - video_id, note='Downloading media information') - uuid = media_json['uuid'] - is_wide = media_json['is_wide'] - - server_json = self._download_json( - 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json', - video_id, note='Downloading server information') - - format = '16x9' if is_wide else '4x3' - - formats = [] - for streamingserver in server_json['streamingserver']: - endpoint = streamingserver.get('endpoint') - if not endpoint: - continue - play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format) - if endpoint.startswith('rtmp'): - formats.append({ - 'url': endpoint, - 'format_id': 'rtmp', - 'app': compat_urllib_parse_urlparse(endpoint).path[1:], - 'play_path': play_path, - 'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf', - 'ext': 'flv', - 'rtmp_live': True, - }) - elif determine_ext(endpoint) == 'm3u8': - formats.append({ - 'url': endpoint.replace('[video]', play_path), - 'ext': 'm4v', - 'format_id': 'hls', # Prefer hls since it allows to workaround georestriction - 'protocol': 'm3u8', - 'preference': 1, - 'http_headers': { - 'Accept-Encoding': 'deflate', # gzip causes trouble on the server side - }, - }) - else: - formats.append({ - 'url': endpoint, - }) - self._check_formats(formats, video_id) - - thumbnails = [] - for image in media_json['images']: - thumbnails.append({ - 'url': image['url'], - 'width': image['width'], - 'height': image['height'], - }) - - description = media_json['subtitle'] - duration = float_or_none(media_json.get('duration_in_ms'), scale=1000) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'formats': formats, - } + return self.url_result( + 'https://api.nexx.cloud/v3/748/videos/byid/%s' + % self._match_id(url), ie=NexxIE.ie_key()) From ea3f20494f64c18123c61f722a7864e3dbdde566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:02:05 +0700 Subject: [PATCH 0049/2417] [youtube] PEP 8 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4597ccb3a..2e71795e7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1660,7 +1660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): <a[^>]* (?: \bhref=["\']/red[^>]*>| # drop possible - >\s*Listen ad-free with YouTube Red # YouTube Red ad + >\s*Listen ad-free with YouTube Red # YouTube Red ad ) .*? )?</li From 961ea474b6c6965d49a58d0400d0368fa0300b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:02:14 +0700 Subject: [PATCH 0050/2417] [YoutubeDL] PEP 8 --- youtube_dl/YoutubeDL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8730d32ef..89c07be29 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -20,13 +20,14 @@ import re import shutil import subprocess import socket -import string import sys import time import tokenize import traceback import random +from string import ascii_letters + from .compat import ( compat_basestring, compat_cookiejar, @@ -679,7 +680,7 @@ class YoutubeDL(object): # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)]) + sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) # outtmpl should be expand_path'ed before template dict substitution From ef78563e9c148d8a24382c282d74a960e244569e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:33:26 +0700 Subject: [PATCH 0051/2417] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index a5de3c232..5f3ea6b5f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,25 @@ version <unreleased> +Core +* [YoutubeDL] Don't expand environment variables in meta fields (#13637) + Extractors +* [spiegeltv] Delegate extraction to nexx extractor (#13159) ++ [nexx] Add support for nexx.cloud (#10807, #13465) +* [generic] Fix rutube embeds extraction (#13641) +* [karrierevideos] Fix title extraction (#13641) +* [youtube] Don't capture YouTube Red ad for creator meta field (#13621) +* [slideshare] Fix extraction (#13617) ++ [5tv] Add another video URL pattern (#13354, #13606) +* [drtv] Make HLS and HDS extraction non fatal +* [ted] Fix subtitles extraction (#13628, #13629) * [vine] Make sure the title won't be empty + [twitter] Support HLS streams in vmap URLs + [periscope] Support pscp.tv URLs in embedded frames +* [twitter] Extract mp4 urls via mobile API (#12726) * [niconico] Fix authentication error handling (#12486) * [giantbomb] Extract m3u8 formats (#13626) ++ [vlive:playlist] Add support for playlists (#13613) version 2017.07.09 From cea931a9e5cf682eafe5b7cdacbbe22630b7a9e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Jul 2017 07:36:05 +0700 Subject: [PATCH 0052/2417] release 2017.07.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c4314855d..0f20d0485 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.09 +[debug] youtube-dl version 2017.07.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5f3ea6b5f..7d71fc5e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.07.15 Core * [YoutubeDL] Don't expand environment variables in meta fields (#13637) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b6a147faf..d7304ba06 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -521,6 +521,7 @@ - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 + - **Nexx** - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** @@ -942,6 +943,7 @@ - **vk:wallpost** - **vlive** - **vlive:channel** + - **vlive:playlist** - **Vodlocker** - **VODPl** - **VODPlatform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 14358a74c..82e166fef 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.09' +__version__ = '2017.07.15' From 94b817edebb63c3d8485e1ae27cc394dd9e21f9d Mon Sep 17 00:00:00 2001 From: troywith77 <ruitang307@gmail.com> Date: Tue, 9 May 2017 13:10:18 +0800 Subject: [PATCH 0053/2417] [pearvideo] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pear.py | 34 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/pear.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d34447a9..75c1a3d0e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -762,6 +762,7 @@ from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .pear import PearIE from .people import PeopleIE from .periscope import ( PeriscopeIE, diff --git a/youtube_dl/extractor/pear.py b/youtube_dl/extractor/pear.py new file mode 100644 index 000000000..77fd46852 --- /dev/null +++ b/youtube_dl/extractor/pear.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PearIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': '小浣熊找到一个小石头,仿佛发现了一个宝贝。它不停地用石头按在玻璃上,滚来滚去,吸引主人注意。', + 'url': 'http://video.pearvideo.com/mp4/short/20170508/cont-1076290-10438018-hd.mp4' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<h1[^>]+class="video-tt">(.+)</h1>', webpage, 'title', fatal=False) + description = self._html_search_regex(r'<div[^>]+class="summary"[^>]*>([^<]+)<', webpage, 'description', fatal=False) + url = self._html_search_regex(r'hdUrl="(.*?)"', webpage, 'url', fatal=False) + + return { + 'id': video_id, + 'ext': 'mp4', + 'title': title, + 'description': description, + 'url': url + } From decf86044d17a8ec04e43a4805a0092622d976ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 03:06:04 +0700 Subject: [PATCH 0054/2417] [pearvideo] Improve (closes #13031) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/pear.py | 34 ---------------- youtube_dl/extractor/pearvideo.py | 63 ++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 35 deletions(-) delete mode 100644 youtube_dl/extractor/pear.py create mode 100644 youtube_dl/extractor/pearvideo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 75c1a3d0e..28f0d3f0d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -762,7 +762,7 @@ from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE -from .pear import PearIE +from .pearvideo import PearVideoIE from .people import PeopleIE from .periscope import ( PeriscopeIE, diff --git a/youtube_dl/extractor/pear.py b/youtube_dl/extractor/pear.py deleted file mode 100644 index 77fd46852..000000000 --- a/youtube_dl/extractor/pear.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class PearIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.pearvideo.com/video_1076290', - 'info_dict': { - 'id': '1076290', - 'ext': 'mp4', - 'title': '小浣熊在主人家玻璃上滚石头:没砸', - 'description': '小浣熊找到一个小石头,仿佛发现了一个宝贝。它不停地用石头按在玻璃上,滚来滚去,吸引主人注意。', - 'url': 'http://video.pearvideo.com/mp4/short/20170508/cont-1076290-10438018-hd.mp4' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_regex(r'<h1[^>]+class="video-tt">(.+)</h1>', webpage, 'title', fatal=False) - description = self._html_search_regex(r'<div[^>]+class="summary"[^>]*>([^<]+)<', webpage, 'description', fatal=False) - url = self._html_search_regex(r'hdUrl="(.*?)"', webpage, 'url', fatal=False) - - return { - 'id': video_id, - 'ext': 'mp4', - 'title': title, - 'description': description, - 'url': url - } diff --git a/youtube_dl/extractor/pearvideo.py b/youtube_dl/extractor/pearvideo.py new file mode 100644 index 000000000..1d777221c --- /dev/null +++ b/youtube_dl/extractor/pearvideo.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + qualities, + unified_timestamp, +) + + +class PearVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', + 'timestamp': 1494275280, + 'upload_date': '20170508', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + quality = qualities( + ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) + + formats = [{ + 'url': mobj.group('url'), + 'format_id': mobj.group('id'), + 'quality': quality(mobj.group('id')), + } for mobj in re.finditer( + r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2', + webpage)] + self._sort_formats(formats) + + title = self._search_regex( + (r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)', + r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'), + webpage, 'title', group='value') + description = self._search_regex( + (r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)', + r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'), + webpage, 'description', default=None, + group='value') or self._html_search_meta('Description', webpage) + timestamp = unified_timestamp(self._search_regex( + r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)', + webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + } From 089b97cfee8553886d33cd52b7ede178cebd7034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:30:48 +0700 Subject: [PATCH 0055/2417] [nexx] Improve JS embed extraction --- youtube_dl/extractor/nexx.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 60b42cb7d..12450d4c5 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -72,13 +72,17 @@ class NexxIE(InfoExtractor): entries = [] # JavaScript Integration - for domain_id, video_id in re.findall( - r'''(?isx) - <script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(\d+).+? - onPLAYReady.+? - _play\.init\s*\(.+?\s*,\s*(\d+)\s*,\s*.+?\) - ''', webpage): - entries.append('https://api.nexx.cloud/v3/%s/videos/byid/%s' % (domain_id, video_id)) + mobj = re.search( + r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)', + webpage) + if mobj: + domain_id = mobj.group('id') + for video_id in re.findall( + r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', + webpage): + entries.append( + 'https://api.nexx.cloud/v3/%s/videos/byid/%s' + % (domain_id, video_id)) # TODO: support more embed formats From 3f59b0154a8b6dc85425edfbb3dfdc64f41a6ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:32:37 +0700 Subject: [PATCH 0056/2417] [nexx:embed] Add extractor for iframe embeds --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/generic.py | 31 +++++++++++++++++++- youtube_dl/extractor/nexx.py | 46 ++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 28f0d3f0d..e8a066b83 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -653,7 +653,10 @@ from .nextmedia import ( AppleDailyIE, NextTVIE, ) -from .nexx import NexxIE +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 123a21296..0ab2ef2d6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -36,7 +36,10 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) -from .nexx import NexxIE +from .nexx import ( + NexxIE, + NexxEmbedIE, +) from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -1566,6 +1569,27 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # Nexx iFrame embed + { + 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, # Facebook <iframe> embed { 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -2155,6 +2179,11 @@ class GenericIE(InfoExtractor): if nexx_urls: return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key()) + # Look for Nexx iFrame embeds + nexx_embed_urls = NexxEmbedIE._extract_urls(webpage) + if nexx_embed_urls: + return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key()) + # Look for ThePlatform embeds tp_urls = ThePlatformIE._extract_urls(webpage) if tp_urls: diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 12450d4c5..e2960273e 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -88,6 +88,10 @@ class NexxIE(InfoExtractor): return entries + @staticmethod + def _extract_url(webpage): + return NexxIE._extract_urls(webpage)[0] + def _handle_error(self, response): status = int_or_none(try_get( response, lambda x: x['metadata']['status']) or 200) @@ -223,3 +227,45 @@ class NexxIE(InfoExtractor): video, lambda x: x['episodedata']['season'])), 'formats': formats, } + + +class NexxEmbedIE(InfoExtractor): + _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1', + 'md5': '16746bfc28c42049492385c989b26c4a', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + } + + @staticmethod + def _extract_urls(webpage): + # Reference: + # 1. https://nx-s.akamaized.net/files/201510/44.pdf + + # iFrame Embed Integration + return [mobj.group('url') for mobj in re.finditer( + r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', + webpage)] + + def _real_extract(self, url): + embed_id = self._match_id(url) + + webpage = self._download_webpage(url, embed_id) + + return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key()) From 749ca5eced0b9a8ed1ef79f4ee80908e0ac242c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:33:14 +0700 Subject: [PATCH 0057/2417] [extractor/common] Fix playlist_from_matches --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index daa10885f..748b4d59f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -730,12 +730,12 @@ class InfoExtractor(object): video_info['title'] = video_title return video_info - def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None): - urlrs = orderedSet( + def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None): + urls = orderedSet( self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) for m in matches) return self.playlist_result( - urlrs, playlist_id=video_id, playlist_title=video_title) + urls, playlist_id=playlist_id, playlist_title=playlist_title) @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): From 00d06e3cfcb7bfffa3b585694525f05a6ce36af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 04:38:20 +0700 Subject: [PATCH 0058/2417] [spiegel:article] Add support for nexx iframe embeds (closes #13029) --- youtube_dl/extractor/spiegel.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index ec1b60388..8598377b0 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .nexx import NexxEmbedIE from .spiegeltv import SpiegeltvIE from ..compat import compat_urlparse from ..utils import ( @@ -143,6 +144,9 @@ class SpiegelArticleIE(InfoExtractor): entries = [ self.url_result(compat_urlparse.urljoin( self.http_scheme() + '//spiegel.de/', embed_path)) - for embed_path in embeds - ] - return self.playlist_result(entries) + for embed_path in embeds] + if embeds: + return self.playlist_result(entries) + + return self.playlist_from_matches( + NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key()) From 13eb526f111dc9b98421f3e05287980b88766409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 05:23:19 +0700 Subject: [PATCH 0059/2417] [nexx:embed] PEP 8 --- youtube_dl/extractor/nexx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index e2960273e..d0235fdfe 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -260,7 +260,7 @@ class NexxEmbedIE(InfoExtractor): # iFrame Embed Integration return [mobj.group('url') for mobj in re.finditer( - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', + r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', webpage)] def _real_extract(self, url): From 7abed4e06c8935026873a9c01821d5a5b2d80d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 12:40:45 +0700 Subject: [PATCH 0060/2417] [crunchyroll] Relax series and season regex (closes #13659) --- youtube_dl/extractor/crunchyroll.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 2ffa4a7f8..8bdaf0c2c 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -510,7 +510,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text # webpage provide more accurate data than series_title from XML series = self._html_search_regex( - r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)', + r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', webpage, 'series', fatal=False) season = xpath_text(metadata, 'series_title') @@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text episode_number = int_or_none(xpath_text(metadata, 'episode_number')) season_number = int_or_none(self._search_regex( - r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)', + r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', webpage, 'season number', default=None)) return { From 83d00044c1c9aef487f1c30bb50246e9ad039636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Jul 2017 20:48:09 +0700 Subject: [PATCH 0061/2417] [adn] Improve error reporting (#13663) --- youtube_dl/extractor/adn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 39f80b23f..cffdab6ca 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -107,11 +107,13 @@ class ADNIE(InfoExtractor): metas = options.get('metas') or {} title = metas.get('title') or video_info['title'] links = player_config.get('links') or {} + error = None if not links: links_url = player_config['linksurl'] links_data = self._download_json(urljoin( self._BASE_URL, links_url), video_id) links = links_data.get('links') or {} + error = links_data.get('error') formats = [] for format_id, qualities in links.items(): @@ -130,7 +132,8 @@ class ADNIE(InfoExtractor): for f in m3u8_formats: f['language'] = 'fr' formats.extend(m3u8_formats) - error = options.get('error') + if not error: + error = options.get('error') if not formats and error: raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) self._sort_formats(formats) From bb176df3bbef62dcf958dcc542f778686e44db63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 17 Jul 2017 22:19:40 +0700 Subject: [PATCH 0062/2417] [spiegel:article] Move test --- youtube_dl/extractor/generic.py | 21 --------------------- youtube_dl/extractor/spiegel.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0ab2ef2d6..36c81eda9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1569,27 +1569,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, - # Nexx iFrame embed - { - 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', - 'info_dict': { - 'id': '161464', - 'ext': 'mp4', - 'title': 'Nervenkitzel Achterbahn', - 'alt_title': 'Karussellbauer in Deutschland', - 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', - 'release_year': 2005, - 'creator': 'SPIEGEL TV', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 2761, - 'timestamp': 1394021479, - 'upload_date': '20140305', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, # Facebook <iframe> embed { 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 8598377b0..84298fee4 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -122,6 +122,26 @@ class SpiegelArticleIE(InfoExtractor): }, 'playlist_count': 6, + }, { + # Nexx iFrame embed + 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, }] def _real_extract(self, url): From d20b1c6725fce956b44413cced449b7d09b12de2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 18:14:14 +0800 Subject: [PATCH 0063/2417] [dispeak] Recognize sevt subdomain (closes #13276) --- ChangeLog | 6 ++++++ youtube_dl/extractor/dispeak.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 7d71fc5e1..8e442d159 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [dispeak] Recognize sevt subdomain (#13276) + + version 2017.07.15 Core diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index a78cb8a2a..c05f601e2 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -13,7 +13,7 @@ from ..utils import ( class DigitallySpeakingIE(InfoExtractor): - _VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml' + _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml' _TESTS = [{ # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface @@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor): # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml', 'only_matching': True, + }, { + # From http://www.gdcvault.com/play/1013700/Advanced-Material + 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', + 'only_matching': True, }] def _parse_mp4(self, metadata): From 85f5a74b6cf44b0c8b612c264c36eaabd958f501 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 21:19:09 +0800 Subject: [PATCH 0064/2417] [tbs] Mark as broken and skip invalid tests --- youtube_dl/extractor/tbs.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index bf93eb868..e9474533f 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -8,6 +8,9 @@ from ..utils import extract_attributes class TBSIE(TurnerBaseIE): + # https://github.com/rg3/youtube-dl/issues/13658 + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' _TESTS = [{ 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', @@ -17,7 +20,8 @@ class TBSIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Theatrical Trailer', 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', - } + }, + 'skip': 'TBS videos are deleted after a while', }, { 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', @@ -26,7 +30,8 @@ class TBSIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'You Better Run', 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', - } + }, + 'skip': 'TBS videos are deleted after a while', }] def _real_extract(self, url): From fa63cf6c2301972b7d0ae76fb7a11c7d1a2786a9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 22:57:51 +0800 Subject: [PATCH 0065/2417] [youku:show] Fix playlist extraction (closes #13248) --- ChangeLog | 1 + youtube_dl/extractor/youku.py | 66 ++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e442d159..a83523cb9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index dcce15d77..4ae9adb51 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import random import re import string @@ -222,50 +221,61 @@ class YoukuShowIE(InfoExtractor): _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html' IE_NAME = 'youku:show' - _TEST = { + _TESTS = [{ 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html', 'info_dict': { 'id': 'zc7c670be07ff11e48b3f', - 'title': '花千骨 未删减版', + 'title': '花千骨 DVD版', 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558', }, 'playlist_count': 50, - } + }, { + # Episode number not starting from 1 + 'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html', + 'info_dict': { + 'id': 'zefbfbd70efbfbd780bef', + 'title': '超级飞侠3', + 'description': 'md5:275715156abebe5ccc2a1992e9d56b98', + }, + 'playlist_count': 24, + }] - _PAGE_SIZE = 40 + def _extract_entries(self, playlist_data_url, show_id, idx, query, url): + query['callback'] = 'cb' + playlist_data = self._download_json( + playlist_data_url, show_id, query=query, + note='Downloading playlist data page %d' % (idx + 1), + transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] + drama_list = (get_element_by_class('p-drama-grid', playlist_data) or + get_element_by_class('p-drama-half-row', playlist_data)) + if drama_list is None: + raise ExtractorError('No episodes found') + video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list) + return playlist_data, [ + self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) + for video_url in video_urls] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) - entries = [] page_config = self._parse_json(self._search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'), show_id, transform_source=js_to_json) - for idx in itertools.count(0): - if idx == 0: - playlist_data_url = 'http://list.youku.com/show/module' - query = {'id': page_config['showid'], 'tab': 'point'} - else: - playlist_data_url = 'http://list.youku.com/show/point' - query = { + first_page, entries = self._extract_entries( + 'http://list.youku.com/show/module', show_id, 0, { + 'id': page_config['showid'], + 'tab': 'showInfo', + }, url) + # The first reload_id has the same items as first_page + reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)[1:] + for idx, reload_id in enumerate(reload_ids): + _, new_entries = self._extract_entries( + 'http://list.youku.com/show/episode', show_id, idx + 1, { 'id': page_config['showid'], - 'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1), - } - query['callback'] = 'cb' - playlist_data = self._download_json( - playlist_data_url, show_id, query=query, - note='Downloading playlist data page %d' % (idx + 1), - transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] - video_urls = re.findall( - r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"', - playlist_data) - new_entries = [ - self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) - for video_url in video_urls] + 'stage': reload_id, + }, url) entries.extend(new_entries) - if len(new_entries) < self._PAGE_SIZE: - break desc = self._html_search_meta('description', webpage, fatal=False) playlist_title = desc.split(',')[0] if desc else None From 3fcf346ac16e6fe1963a3eab861d6bd9c32ce6db Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Jul 2017 23:20:46 +0800 Subject: [PATCH 0066/2417] [youku:show] Refine playlist extraction Handle playlists that the initial page is not the first page --- youtube_dl/extractor/youku.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 4ae9adb51..0c4bc2eda 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -13,7 +13,6 @@ from ..utils import ( js_to_json, str_or_none, strip_jsonp, - urljoin, ) @@ -238,13 +237,16 @@ class YoukuShowIE(InfoExtractor): 'description': 'md5:275715156abebe5ccc2a1992e9d56b98', }, 'playlist_count': 24, + }, { + # Ongoing playlist. The initial page is the last one + 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html', + 'only_matchine': True, }] - def _extract_entries(self, playlist_data_url, show_id, idx, query, url): + def _extract_entries(self, playlist_data_url, show_id, note, query): query['callback'] = 'cb' playlist_data = self._download_json( - playlist_data_url, show_id, query=query, - note='Downloading playlist data page %d' % (idx + 1), + playlist_data_url, show_id, query=query, note=note, transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] drama_list = (get_element_by_class('p-drama-grid', playlist_data) or get_element_by_class('p-drama-half-row', playlist_data)) @@ -252,29 +254,39 @@ class YoukuShowIE(InfoExtractor): raise ExtractorError('No episodes found') video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list) return playlist_data, [ - self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) + self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key()) for video_url in video_urls] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) + entries = [] page_config = self._parse_json(self._search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'), show_id, transform_source=js_to_json) - first_page, entries = self._extract_entries( - 'http://list.youku.com/show/module', show_id, 0, { + first_page, initial_entries = self._extract_entries( + 'http://list.youku.com/show/module', show_id, + note='Downloading initial playlist data page', + query={ 'id': page_config['showid'], 'tab': 'showInfo', - }, url) + }) + first_page_reload_id = self._html_search_regex( + r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id') # The first reload_id has the same items as first_page - reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)[1:] + reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page) for idx, reload_id in enumerate(reload_ids): + if reload_id == first_page_reload_id: + entries.extend(initial_entries) + continue _, new_entries = self._extract_entries( - 'http://list.youku.com/show/episode', show_id, idx + 1, { + 'http://list.youku.com/show/episode', show_id, + note='Downloading playlist data page %d' % (idx + 1), + query={ 'id': page_config['showid'], 'stage': reload_id, - }, url) + }) entries.extend(new_entries) desc = self._html_search_meta('description', webpage, fatal=False) From c653326a1425f4c271f387fde7a706bf4b52a7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 20 Jul 2017 22:49:52 +0700 Subject: [PATCH 0067/2417] [funnyordie] Extract more metadata (closes #13677) --- youtube_dl/extractor/funnyordie.py | 64 ++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 49409369c..f85e7de14 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -1,10 +1,14 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + unified_timestamp, +) class FunnyOrDieIE(InfoExtractor): @@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Heart-Shaped Box: Literal Video Version', 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', 'thumbnail': r're:^http:.*\.jpg$', + 'uploader': 'DASjr', + 'timestamp': 1317904928, + 'upload_date': '20111006', + 'duration': 318.3, }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', @@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Please Use This Song (Jon Lajoie)', 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': r're:^http:.*\.jpg$', + 'timestamp': 1398988800, + 'upload_date': '20140502', }, 'params': { 'skip_download': True, @@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor): 'url': 'http://www.funnyordie.com%s' % src, }] - post_json = self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') - post = json.loads(post_json) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp', default=None)) + + uploader = self._html_search_regex( + r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h', + webpage, 'uploader', default=None) + + title, description, thumbnail, duration = [None] * 4 + + medium = self._parse_json( + self._search_regex( + r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', + default='{}'), + video_id, fatal=False) + if medium: + title = medium.get('title') + duration = float_or_none(medium.get('duration')) + if not timestamp: + timestamp = unified_timestamp(medium.get('publishDate')) + + post = self._parse_json( + self._search_regex( + r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', + default='{}'), + video_id, fatal=False) + if post: + if not title: + title = post.get('name') + description = post.get('description') + thumbnail = post.get('picture') + + if not title: + title = self._og_search_title(webpage) + if not description: + description = self._og_search_description(webpage) + if not duration: + duration = int_or_none(self._html_search_meta( + ('video:duration', 'duration'), webpage, 'duration', default=False)) return { 'id': video_id, - 'title': post['name'], - 'description': post.get('description'), - 'thumbnail': post.get('picture'), + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'timestamp': timestamp, + 'duration': duration, 'formats': formats, 'subtitles': subtitles, } From dc6520aa3d1fe7afc52613e392f15dde90af4844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 20 Jul 2017 23:22:36 +0700 Subject: [PATCH 0068/2417] [egghead:lesson] Add extractor (#6635) --- youtube_dl/extractor/egghead.py | 49 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index c86f52319..e4a3046af 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -2,6 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) class EggheadCourseIE(InfoExtractor): @@ -33,3 +38,47 @@ class EggheadCourseIE(InfoExtractor): return self.playlist_result( entries, playlist_id, course.get('title'), course.get('description')) + + +class EggheadLessonIE(InfoExtractor): + IE_DESC = 'egghead.io lesson' + IE_NAME = 'egghead:lesson' + _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', + 'info_dict': { + 'id': 'fv5yotjxcg', + 'ext': 'mp4', + 'title': 'Create linear data flow with container style types (Box)', + 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e', + 'thumbnail': r're:^https?:.*\.jpg$', + 'timestamp': 1481296768, + 'upload_date': '20161209', + 'duration': 304, + 'view_count': 0, + 'tags': ['javascript', 'free'], + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + lesson_id = self._match_id(url) + + lesson = self._download_json( + 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id) + + return { + '_type': 'url_transparent', + 'ie_key': 'Wistia', + 'url': 'wistia:%s' % lesson['wistia_id'], + 'id': lesson['wistia_id'], + 'title': lesson.get('title'), + 'description': lesson.get('summary'), + 'thumbnail': lesson.get('thumb_nail'), + 'timestamp': unified_timestamp(lesson.get('published_at')), + 'duration': int_or_none(lesson.get('duration')), + 'view_count': int_or_none(lesson.get('plays_count')), + 'tags': try_get(lesson, lambda x: x['tag_list'], list), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e8a066b83..db7616caa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -298,7 +298,10 @@ from .dw import ( from .eagleplatform import EaglePlatformIE from .ebaumsworld import EbaumsWorldIE from .echomsk import EchoMskIE -from .egghead import EggheadCourseIE +from .egghead import ( + EggheadCourseIE, + EggheadLessonIE, +) from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE From 0396806f671e5828c2abdeb8048acf8b654507b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 21 Jul 2017 00:13:32 +0700 Subject: [PATCH 0069/2417] [YoutubeDL] Do not override id, extractor and extractor_key in url_transparent All these meta fields must be borrowed from final extractor that actually performs extraction. This commit fixes extractor id in download archives for url_transparent downloads. Previously, 'transparent' extractor was erroneously used for extractor archive id, e.g. 'eggheadlesson 4n8ugwwj5t' instead of 'wistia 4n8ugwwj5t'. --- test/test_YoutubeDL.py | 7 ++++++- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 70989e232..e0decb81c 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -41,6 +41,7 @@ def _make_result(formats, **kwargs): 'id': 'testid', 'title': 'testttitle', 'extractor': 'testex', + 'extractor_key': 'TestEx', } res.update(**kwargs) return res @@ -761,7 +762,8 @@ class TestYoutubeDL(unittest.TestCase): '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', - 'title': 'foo1 title' + 'title': 'foo1 title', + 'id': 'foo1_id', } class Foo2IE(InfoExtractor): @@ -787,6 +789,9 @@ class TestYoutubeDL(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'foo1 title') + self.assertEqual(downloaded['id'], 'testid') + self.assertEqual(downloaded['extractor'], 'testex') + self.assertEqual(downloaded['extractor_key'], 'TestEx') if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 89c07be29..f94836d06 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -860,7 +860,7 @@ class YoutubeDL(object): force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url', 'ie_key'): + for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): if f in force_properties: del force_properties[f] new_result = info.copy() From 7d9a1db1110b13e8e6b65613ebb3daf7f0ff3c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Jul 2017 11:40:46 +0700 Subject: [PATCH 0070/2417] [dramafever] Remove video id from title (closes #13699) --- youtube_dl/extractor/dramafever.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index e7abc8889..03fa3aabc 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, clean_html, int_or_none, + remove_end, sanitized_Request, urlencode_postdata ) @@ -73,7 +74,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'info_dict': { 'id': '4512.1', 'ext': 'mp4', - 'title': 'Cooking with Shin 4512.1', + 'title': 'Cooking with Shin', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', 'episode_number': 1, @@ -91,7 +92,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'info_dict': { 'id': '4826.4', 'ext': 'mp4', - 'title': 'Mnet Asian Music Awards 2015 4826.4', + 'title': 'Mnet Asian Music Awards 2015', 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', 'episode_number': 4, @@ -122,6 +123,10 @@ class DramaFeverIE(DramaFeverBaseIE): countries=self._GEO_COUNTRIES) raise + # title is postfixed with video id for some reason, removing + if info.get('title'): + info['title'] = remove_end(info['title'], video_id).strip() + series_id, episode_number = video_id.split('.') episode_info = self._download_json( # We only need a single episode info, so restricting page size to one episode From f76c02c87b479310b0e090216895879257b1062a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Jul 2017 11:41:40 +0700 Subject: [PATCH 0071/2417] [dramafever] Fix tests --- youtube_dl/extractor/dramafever.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 03fa3aabc..9a498d72a 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -73,7 +73,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { 'id': '4512.1', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Cooking with Shin', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', @@ -81,7 +81,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1404336058, 'upload_date': '20140702', - 'duration': 343, + 'duration': 344, }, 'params': { # m3u8 download @@ -91,7 +91,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', 'info_dict': { 'id': '4826.4', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Mnet Asian Music Awards 2015', 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', @@ -99,7 +99,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1450213200, 'upload_date': '20151215', - 'duration': 5602, + 'duration': 5359, }, 'params': { # m3u8 download From 359aa2fdd145d11a29a04f620fed95acbf142f66 Mon Sep 17 00:00:00 2001 From: dubber0 <rexa.mose@gmail.com> Date: Sat, 22 Jul 2017 14:15:55 +0200 Subject: [PATCH 0072/2417] [npo] Add support for npo3.nl URLs --- youtube_dl/extractor/npo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 516b1e941..fa4ef20c5 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -28,7 +28,7 @@ class NPOBaseIE(InfoExtractor): class NPOIE(NPOBaseIE): IE_NAME = 'npo' - IE_DESC = 'npo.nl and ntr.nl' + IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl' _VALID_URL = r'''(?x) (?: npo:| @@ -38,7 +38,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/[^/]+/ + (?:zapp|npo3)\.nl/(?:[^/]+/){2} ) ) (?P<id>[^/?#]+) @@ -146,6 +146,9 @@ class NPOIE(NPOBaseIE): }, { 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', 'only_matching': True, + }, { + 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870', + 'only_matching': True, }, { # live stream 'url': 'npo:LI_NL1_4188102', From 327c8364f11f23dd919e8009c6adb021c34054fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Jul 2017 21:35:14 +0700 Subject: [PATCH 0073/2417] [sportbox:embed] Fix extraction --- youtube_dl/extractor/sportbox.py | 61 ++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index e7bd5bf91..54497c880 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, +) class SportBoxEmbedIE(InfoExtractor): @@ -14,8 +18,10 @@ class SportBoxEmbedIE(InfoExtractor): 'info_dict': { 'id': '211355', 'ext': 'mp4', - 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'title': '211355', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 292, + 'view_count': int, }, 'params': { # m3u8 download @@ -24,6 +30,9 @@ class SportBoxEmbedIE(InfoExtractor): }, { 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/193095', + 'only_matching': True, }] @staticmethod @@ -37,36 +46,34 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + wjplayer_data = self._parse_json( + self._search_regex( + r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), + video_id, transform_source=js_to_json) + formats = [] - - def cleanup_js(code): - # desktop_advert_config contains complex Javascripts and we don't need it - return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, - transform_source=cleanup_js) - - hls_url = jwplayer_data.get('hls_url') - if hls_url: - formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', m3u8_id='hls')) - - rtsp_url = jwplayer_data.get('rtsp_url') - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - + for source in wjplayer_data['sources']: + src = source.get('src') + if not src: + continue + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) self._sort_formats(formats) - title = jwplayer_data['node_title'] - thumbnail = jwplayer_data.get('image_url') + view_count = int_or_none(self._search_regex( + r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, + 'title': video_id, + 'thumbnail': wjplayer_data.get('poster'), + 'duration': int_or_none(wjplayer_data.get('duration')), + 'view_count': view_count, 'formats': formats, } From 0017d9ad6de831384e74db14a821e4c94020c9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:12:01 +0700 Subject: [PATCH 0074/2417] [YoutubeDL] Improve default format specification (closes #13704) --- test/test_YoutubeDL.py | 11 +++++++++++ youtube_dl/YoutubeDL.py | 31 +++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e0decb81c..4af14f9db 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -449,6 +449,17 @@ class TestFormatSelection(unittest.TestCase): pass self.assertEqual(ydl.downloaded_info_dicts, []) + def test_default_format_spec(self): + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + + ydl = YDL({'outtmpl': '-'}) + self.assertEqual(ydl._default_format_spec({}), 'best') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best') + class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f94836d06..367ae3533 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1064,6 +1064,25 @@ class YoutubeDL(object): return op(actual_value, comparison_value) return _filter + def _default_format_spec(self, info_dict, download=True): + req_format_list = [] + + def can_have_partial_formats(): + if self.params.get('simulate', False): + return True + if not download: + return True + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return False + if info_dict.get('is_live'): + return False + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + if can_have_partial_formats(): + req_format_list.append('bestvideo+bestaudio') + req_format_list.append('best') + return '/'.join(req_format_list) + def build_format_selector(self, format_spec): def syntax_error(note, start): message = ( @@ -1534,14 +1553,10 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: - req_format_list = [] - if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - not info_dict.get('is_live')): - merger = FFmpegMergerPP(self) - if merger.available and merger.can_merge(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') - req_format = '/'.join(req_format_list) + req_format = self._default_format_spec(info_dict, download=download) + if self.params.get('verbose'): + self.to_stdout('[debug] Default format spec: %s' % req_format) + format_selector = self.build_format_selector(req_format) # While in format selection we may need to have an access to the original From e0f1fb0a27612c2398df59dd85194edfdf8cbc2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:25:23 +0700 Subject: [PATCH 0075/2417] [mtv] Skip missing video parts (closes #13690) --- youtube_dl/extractor/mtv.py | 28 ++++++++++++++++++++++------ youtube_dl/extractor/vh1.py | 12 ++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8acea1461..fc098cd13 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -83,7 +83,7 @@ class MTVServicesInfoExtractor(InfoExtractor): hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls')) + m3u8_id='hls', fatal=False)) else: # fms try: @@ -106,7 +106,8 @@ class MTVServicesInfoExtractor(InfoExtractor): }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') - self._sort_formats(formats) + if formats: + self._sort_formats(formats) return formats def _extract_subtitles(self, mdoc, mtvn_id): @@ -133,8 +134,11 @@ class MTVServicesInfoExtractor(InfoExtractor): mediagen_url += 'acceptMethods=' mediagen_url += 'hls' if use_hls else 'fms' - mediagen_doc = self._download_xml(mediagen_url, video_id, - 'Downloading video urls') + mediagen_doc = self._download_xml( + mediagen_url, video_id, 'Downloading video urls', fatal=False) + + if mediagen_doc is False: + return None item = mediagen_doc.find('./video/item') if item is not None and item.get('type') == 'text': @@ -174,6 +178,13 @@ class MTVServicesInfoExtractor(InfoExtractor): formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id) + # Some parts of complete video may be missing (e.g. missing Act 3 in + # http://www.southpark.de/alle-episoden/s14e01-sexual-healing) + if not formats: + return None + + self._sort_formats(formats) + return { 'title': title, 'formats': formats, @@ -205,9 +216,14 @@ class MTVServicesInfoExtractor(InfoExtractor): title = xpath_text(idoc, './channel/title') description = xpath_text(idoc, './channel/description') + entries = [] + for item in idoc.findall('.//item'): + info = self._get_video_info(item, use_hls) + if info: + entries.append(info) + return self.playlist_result( - [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], - playlist_title=title, playlist_description=description) + entries, playlist_title=title, playlist_description=description) def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py index 6be3774b7..570fa45ea 100644 --- a/youtube_dl/extractor/vh1.py +++ b/youtube_dl/extractor/vh1.py @@ -121,7 +121,11 @@ class VH1IE(MTVIE): idoc = self._download_xml( doc_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) - return self.playlist_result( - [self._get_video_info(item) for item in idoc.findall('.//item')], - playlist_id=video_id, - ) + + entries = [] + for item in idoc.findall('.//item'): + info = self._get_video_info(item) + if info: + entries.append(info) + + return self.playlist_result(entries, playlist_id=video_id) From 935d6c20c00536cf39cf2844295266e64492bb10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:44:50 +0700 Subject: [PATCH 0076/2417] [vidio] Make duration non fatal and fix typo --- youtube_dl/extractor/vidio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index 701bb1d01..01da32f1c 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -56,7 +56,8 @@ class VidioIE(InfoExtractor): self._sort_formats(formats) duration = int_or_none(duration or self._search_regex( - r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration')) + r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage, + 'duration', fatal=False, group='duration')) thumbnail = thumbnail or self._og_search_thumbnail(webpage) like_count = int_or_none(self._search_regex( From 71dde5eecf07ae3a8871e5d4a05a944097e17fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 00:59:07 +0700 Subject: [PATCH 0077/2417] [itv] Fix production id extraction (closes #13671) --- youtube_dl/extractor/itv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index f3156804d..26c48e4b8 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -59,12 +59,18 @@ class ITVIE(InfoExtractor): def _add_sub_element(element, name): return etree.SubElement(element, _add_ns(name)) + production_id = ( + params.get('data-video-autoplay-id') or + '%s#001' % ( + params.get('data-video-episode-id') or + video_id.replace('a', '/'))) + req_env = etree.Element(_add_ns('soapenv:Envelope')) _add_sub_element(req_env, 'soapenv:Header') body = _add_sub_element(req_env, 'soapenv:Body') get_playlist = _add_sub_element(body, ('tem:GetPlaylist')) request = _add_sub_element(get_playlist, 'tem:request') - _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id'] + _add_sub_element(request, 'itv:ProductionId').text = production_id _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper() vodcrid = _add_sub_element(request, 'itv:Vodcrid') _add_sub_element(vodcrid, 'com:Id') From 425f41319aec6940195818e980005cef4946eb75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 01:06:08 +0700 Subject: [PATCH 0078/2417] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index a83523cb9..cd8a34a86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,28 @@ version <unreleased> +Core +* [YoutubeDL] Improve default format specification (#13704) +* [YoutubeDL] Do not override id, extractor and extractor_key for + url_transparent entities +* [extractor/common] Fix playlist_from_matches + Extractors +* [itv] Fix production id extraction (#13671, #13703) +* [vidio] Make duration non fatal and fix typo +* [mtv] Skip missing video parts (#13690) +* [sportbox:embed] Fix extraction ++ [npo] Add support for npo3.nl URLs (#13695) +* [dramafever] Remove video id from title (#13699) ++ [egghead:lesson] Add support for lessons (#6635) +* [funnyordie] Extract more metadata (#13677) * [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) +* [adn] Improve error reporting (#13663) +* [crunchyroll] Relax series and season regex (#13659) ++ [spiegel:article] Add support for nexx iframe embeds (#13029) ++ [nexx:embed] Add support for iframe embeds +* [nexx] Improve JS embed extraction ++ [pearvideo] Add support for pearvideo.com (#13031) version 2017.07.15 From 0db492c02a60dbfb44514833445bf267f5319ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 01:09:09 +0700 Subject: [PATCH 0079/2417] release 2017.07.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 13 ++++++++----- youtube_dl/version.py | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0f20d0485..37d09d796 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.15 +[debug] youtube-dl version 2017.07.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index cd8a34a86..302d32aab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.07.23 Core * [YoutubeDL] Improve default format specification (#13704) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d7304ba06..eb09c470c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -42,7 +42,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** - - **anderetijden**: npo.nl and ntr.nl + - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimeOnDemand** - **anitube.se** - **Anvato** @@ -238,6 +238,7 @@ - **EbaumsWorld** - **EchoMsk** - **egghead:course**: egghead.io course + - **egghead:lesson**: egghead.io lesson - **eHow** - **Einthusan** - **eitb.tv** @@ -522,6 +523,7 @@ - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 - **Nexx** + - **NexxEmbed** - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** @@ -552,7 +554,7 @@ - **NowTVList** - **nowvideo**: NowVideo - **Noz** - - **npo**: npo.nl and ntr.nl + - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** @@ -596,6 +598,7 @@ - **Patreon** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pcmag** + - **PearVideo** - **People** - **periscope**: Periscope - **periscope:user**: Periscope user videos @@ -772,7 +775,7 @@ - **tagesschau:player** - **Tass** - **TastyTrade** - - **TBS** + - **TBS** (Currently broken) - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos @@ -950,7 +953,7 @@ - **VoiceRepublic** - **VoxMedia** - **Vporn** - - **vpro**: npo.nl and ntr.nl + - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be - **vrv** @@ -976,7 +979,7 @@ - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - - **wnl**: npo.nl and ntr.nl + - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** - **wrzuta.pl** - **wrzuta.pl:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 82e166fef..a8dbb93e3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.15' +__version__ = '2017.07.23' From 905d18a7aa42263c66f311ac0cdf46b2caa2f4d6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 23 Jul 2017 16:21:35 +0800 Subject: [PATCH 0080/2417] [options] Correctly hide login info from debug outputs (#13696) Iterate over opts instead of PRIVATE_OPTS for both performance and correctness --- ChangeLog | 6 ++++++ test/test_options.py | 26 ++++++++++++++++++++++++++ youtube_dl/options.py | 38 ++++++++++++++++++-------------------- 3 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 test/test_options.py diff --git a/ChangeLog b/ChangeLog index 302d32aab..8e63b5c11 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [options] Correctly hide login info from debug outputs (#13696) + + version 2017.07.23 Core diff --git a/test/test_options.py b/test/test_options.py new file mode 100644 index 000000000..785281fe3 --- /dev/null +++ b/test/test_options.py @@ -0,0 +1,26 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.options import _hide_login_info + + +class TestOptions(unittest.TestCase): + def test_hide_login_inf(self): + self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(_hide_login_info(['-u']), ['-u']) + self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(_hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 79e9fd12c..38439c971 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -20,6 +20,24 @@ from .utils import ( from .version import __version__ +def _hide_login_info(opts): + PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']) + eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') + + def _scrub_eq(o): + m = eqre.match(o) + if m: + return m.group('key') + '=PRIVATE' + else: + return o + + opts = list(map(_scrub_eq, opts)) + for idx, opt in enumerate(opts): + if opt in PRIVATE_OPTS and idx + 1 < len(opts): + opts[idx + 1] = 'PRIVATE' + return opts + + def parseOpts(overrideArguments=None): def _readOptions(filename_bytes, default=[]): try: @@ -93,26 +111,6 @@ def parseOpts(overrideArguments=None): def _comma_separated_values_options_callback(option, opt_str, value, parser): setattr(parser.values, option.dest, value.split(',')) - def _hide_login_info(opts): - PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'] - eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') - - def _scrub_eq(o): - m = eqre.match(o) - if m: - return m.group('key') + '=PRIVATE' - else: - return o - - opts = list(map(_scrub_eq, opts)) - for private_opt in PRIVATE_OPTS: - try: - i = opts.index(private_opt) - opts[i + 1] = 'PRIVATE' - except ValueError: - pass - return opts - # No need to wrap help messages if we're on a wide console columns = compat_get_terminal_size().columns max_width = columns if columns else 80 From 73095e013fb1bc4a1e676d7be77a103f0013a227 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 23 Jul 2017 16:24:18 +0800 Subject: [PATCH 0081/2417] [options] Typo --- test/test_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_options.py b/test/test_options.py index 785281fe3..3a25a6ba3 100644 --- a/test/test_options.py +++ b/test/test_options.py @@ -12,7 +12,7 @@ from youtube_dl.options import _hide_login_info class TestOptions(unittest.TestCase): - def test_hide_login_inf(self): + def test_hide_login_info(self): self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']), ['-u', 'PRIVATE', '-p', 'PRIVATE']) self.assertEqual(_hide_login_info(['-u']), ['-u']) From e3ce912c3d767fcb1a1225d05ac64da1acab94aa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 23 Jul 2017 16:25:17 +0800 Subject: [PATCH 0082/2417] [niconico] improve error reporting (#13696) --- youtube_dl/extractor/niconico.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 695e32e59..79b9952c3 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -147,6 +147,9 @@ class NiconicoIE(InfoExtractor): elif 'closed' in flv_info: raise ExtractorError('Niconico videos now require logging in', expected=True) + elif 'error' in flv_info: + raise ExtractorError('%s reports error: %s' % ( + self.IE_NAME, flv_info['error'][0]), expected=True) else: raise ExtractorError('Unable to find video URL') From 3150976669ef2ffc9f4eee9e99a6e70730bc22fb Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Sun, 23 Jul 2017 15:33:18 +0200 Subject: [PATCH 0083/2417] [ISSUE_TEMPLATE_tmpl.md] Minor improvements --- .github/ISSUE_TEMPLATE_tmpl.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index df79503d3..26f61d3b4 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -1,16 +1,16 @@ ## Please follow the guide below - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) -- Use *Preview* tab to see how your issue will actually look like +- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) +- Use the *Preview* tab to see what your issue will actually look like --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones ### What is the purpose of your *issue*? @@ -28,9 +28,9 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: -Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): + ``` -$ youtube-dl -v <your command line> [debug] System config: [] [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] From f0e31e32c940d8529353f40bd2426163c3199216 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Sun, 23 Jul 2017 15:40:04 +0200 Subject: [PATCH 0084/2417] [nick] Automate geo-restriction bypass (#13711) --- youtube_dl/extractor/nick.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 08a75929e..1fa19cde4 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -12,6 +12,7 @@ class NickIE(MTVServicesInfoExtractor): IE_NAME = 'nick.com' _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' + _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', 'playlist': [ From 70bfab0e9ac8ddb7da67d71633c8c4b0704054cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 21:00:19 +0700 Subject: [PATCH 0085/2417] [mtv] Improve thumbnal extraction --- youtube_dl/extractor/mtv.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index fc098cd13..25af5ddfd 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -50,8 +50,7 @@ class MTVServicesInfoExtractor(InfoExtractor): thumb_node = itemdoc.find(search_path) if thumb_node is None: return None - else: - return thumb_node.attrib['url'] + return thumb_node.get('url') or thumb_node.text or None def _extract_mobile_video_formats(self, mtvn_id): webpage_url = self._MOBILE_TEMPLATE % mtvn_id From c99d6890cb46626870474e5c1092d9772096c4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Jul 2017 21:00:56 +0700 Subject: [PATCH 0086/2417] [nickru] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nick.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index db7616caa..2513f2587 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -673,6 +673,7 @@ from .nick import ( NickIE, NickDeIE, NickNightIE, + NickRuIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 1fa19cde4..b688637bc 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -125,3 +125,21 @@ class NickNightIE(NickDeIE): return self._search_regex( r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url') + + +class NickRuIE(MTVServicesInfoExtractor): + IE_NAME = 'nickelodeonru' + _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6', + 'only_matching': True, + }, { + 'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) From f9c48d895b5600c82e9b55f703e68b060f25de07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 26 Jul 2017 23:12:43 +0700 Subject: [PATCH 0087/2417] [cloudy] Fix extraction (closes #13737) --- youtube_dl/extractor/cloudy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 9bc8dbea4..85ca20ecc 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -30,7 +30,11 @@ class CloudyIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id) + 'https://www.cloudy.ec/embed.php', video_id, query={ + 'id': video_id, + 'playerPage': 1, + 'autoplay': 1, + }) info = self._parse_html5_media_entries(url, webpage, video_id)[0] From 9682666bdadec955fb8600fa3721f59b2a4b8099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 27 Jul 2017 02:04:51 +0700 Subject: [PATCH 0088/2417] [amcnetworks] Make rating optional (closes #12453) --- youtube_dl/extractor/amcnetworks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 3a0ec6776..dd3b18d72 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals from .theplatform import ThePlatformIE from ..utils import ( - update_url_query, - parse_age_limit, int_or_none, + parse_age_limit, + try_get, + update_url_query, ) @@ -68,7 +69,8 @@ class AMCNetworksIE(ThePlatformIE): info = self._parse_theplatform_metadata(theplatform_metadata) video_id = theplatform_metadata['pid'] title = theplatform_metadata['title'] - rating = theplatform_metadata['ratings'][0]['rating'] + rating = try_get( + theplatform_metadata, lambda x: x['ratings'][0]['rating']) auth_required = self._search_regex( r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') From 24e966e8dab954136dabbc497064ac63b252495b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 28 Jul 2017 12:13:19 +0200 Subject: [PATCH 0089/2417] [megaphone] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 8 +++++ youtube_dl/extractor/megaphone.py | 55 ++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/megaphone.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2513f2587..668248648 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -558,6 +558,7 @@ from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE from .medici import MediciIE +from .megaphone import MegaphoneIE from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36c81eda9..9678c32c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -97,6 +97,7 @@ from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE from .mediaset import MediasetIE from .joj import JojIE +from .megaphone import MegaphoneIE class GenericIE(InfoExtractor): @@ -2790,6 +2791,13 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( joj_urls, video_id, video_title, ie=JojIE.ie_key()) + # Look for megaphone.fm embeds + mpfn_urls = MegaphoneIE._extract_urls(webpage) + if mpfn_urls: + return self.playlist_from_matches( + mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dl/extractor/megaphone.py new file mode 100644 index 000000000..60e3caf0d --- /dev/null +++ b/youtube_dl/extractor/megaphone.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class MegaphoneIE(InfoExtractor): + IE_NAME = 'megaphone.fm' + IE_DESC = 'megaphone.fm embedded players' + _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)' + _TEST = { + 'url': 'https://player.megaphone.fm/GLT9749789991?"', + 'md5': '4816a0de523eb3e972dc0dda2c191f96', + 'info_dict': { + 'id': 'GLT9749789991', + 'ext': 'mp3', + 'title': '#97 What Kind Of Idiot Gets Phished?', + 'thumbnail': 're:^https://.*\.png.*$', + 'duration': 1776.26375, + 'author': 'Reply All', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_property('audio:title', webpage) + author = self._og_search_property('audio:artist', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON') + episode_data = self._parse_json(episode_json, video_id, js_to_json) + video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:') + + formats = [{ + 'url': video_url, + }] + + return { + 'id': video_id, + 'thumbnail': thumbnail, + 'title': title, + 'author': author, + 'duration': episode_data['duration'], + 'formats': formats, + } + + @classmethod + def _extract_urls(cls, webpage): + return [m[0] for m in re.findall( + r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)] From c5a49ff08413411174837f1034ef439b79ff774b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:02:41 +0700 Subject: [PATCH 0090/2417] [downloader/hls] Use redirect URL as manifest base (#13755) --- youtube_dl/downloader/hls.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 0e29c8a2a..46308cf07 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -59,9 +59,9 @@ class HlsFD(FragmentFD): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read() - - s = manifest.decode('utf-8', 'ignore') + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + s = urlh.read().decode('utf-8', 'ignore') if not self.can_download(s, info_dict): if info_dict.get('extra_param_to_segment_url'): From cbbe66635f3c23316f04a6f56ad57e025bc47263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:10:19 +0700 Subject: [PATCH 0091/2417] [yandexdisk] Add extractor (closes #13755) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yandexdisk.py | 115 +++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 youtube_dl/extractor/yandexdisk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 668248648..852942e0d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1298,6 +1298,7 @@ from .yandexmusic import ( YandexMusicAlbumIE, YandexMusicPlaylistIE, ) +from .yandexdisk import YandexDiskIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py new file mode 100644 index 000000000..11729f0f7 --- /dev/null +++ b/youtube_dl/extractor/yandexdisk.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + try_get, + urlencode_postdata, +) + + +class YandexDiskIE(InfoExtractor): + _VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', + 'md5': '33955d7ae052f15853dc41f35f17581c', + 'info_dict': { + 'id': 'VdOeDou8eZs6Y', + 'ext': 'mp4', + 'title': '4.mp4', + 'duration': 168.6, + 'uploader': 'y.botova', + 'uploader_id': '300043621', + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + status = self._download_webpage( + 'https://disk.yandex.com/auth/status', video_id, query={ + 'urlOrigin': url, + 'source': 'public', + 'md5': 'false', + }) + + sk = self._search_regex( + r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2', + status, 'sk', group='value') + + webpage = self._download_webpage(url, video_id) + + models = self._parse_json( + self._search_regex( + r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script', + webpage, 'video JSON'), + video_id) + + data = next( + model['data'] for model in models + if model.get('model') == 'resource') + + video_hash = data['id'] + title = data['name'] + + models = self._download_json( + 'https://disk.yandex.com/models/', video_id, + data=urlencode_postdata({ + '_model.0': 'videoInfo', + 'id.0': video_hash, + '_model.1': 'do-get-resource-url', + 'id.1': video_hash, + 'version': '13.6', + 'sk': sk, + }), query={'_m': 'videoInfo'})['models'] + + videos = try_get(models, lambda x: x[0]['data']['videos'], list) or [] + source_url = try_get( + models, lambda x: x[1]['data']['file'], compat_str) + + formats = [] + if source_url: + formats.append({ + 'url': source_url, + 'format_id': 'source', + 'ext': determine_ext(title, 'mp4'), + 'quality': 1, + }) + for video in videos: + format_url = video.get('url') + if not format_url: + continue + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': format_url, + }) + self._sort_formats(formats) + + duration = float_or_none(try_get( + models, lambda x: x[0]['data']['duration']), 1000) + uploader = try_get( + data, lambda x: x['user']['display_name'], compat_str) + uploader_id = try_get( + data, lambda x: x['user']['uid'], compat_str) + view_count = int_or_none(try_get( + data, lambda x: x['meta']['views_counter'])) + + return { + 'id': video_id, + 'title': title, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'formats': formats, + } From 95908ce45382a72be26e918f8db1809b0ce81190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:13:12 +0700 Subject: [PATCH 0092/2417] [extractor/generic] PEP 8 --- youtube_dl/extractor/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9678c32c4..4b83e861b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2797,7 +2797,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) - def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): From 2a7a82321135bc59364c91caddde4211f378785b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:25:32 +0700 Subject: [PATCH 0093/2417] [svtplay] Update API URL (closes #13767) --- youtube_dl/extractor/svt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 1b5afb73e..38a505f9c 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -181,7 +181,7 @@ class SVTPlayIE(SVTBaseIE): if video_id: data = self._download_json( - 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) + 'https://api.svt.se/videoplayer-api/video/%s' % video_id, video_id) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): info_dict['title'] = re.sub( From c04017519da74a375d6c1c95733d921e96d8ee82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 15:30:53 +0700 Subject: [PATCH 0094/2417] [svtplay] Use geo verification proxy for API request --- youtube_dl/extractor/svt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 38a505f9c..48bc4529e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -181,7 +181,8 @@ class SVTPlayIE(SVTBaseIE): if video_id: data = self._download_json( - 'https://api.svt.se/videoplayer-api/video/%s' % video_id, video_id) + 'https://api.svt.se/videoplayer-api/video/%s' % video_id, + video_id, headers=self.geo_verification_headers()) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): info_dict['title'] = re.sub( From 836ef2648613f4ca565b319af4769c02e35f60f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 18:41:42 +0700 Subject: [PATCH 0095/2417] [soundcloud:trackstation] Add extractor (closes #13733) --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/soundcloud.py | 141 ++++++++++++++++++----------- 2 files changed, 89 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 852942e0d..d2c5e8030 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -935,8 +935,9 @@ from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, + SoundcloudTrackStationIE, SoundcloudPlaylistIE, - SoundcloudSearchIE + SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3f1a46bb2..2f1b2978c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -31,6 +31,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ + (?!stations/track) (?P<uploader>[\w\d-]+)/ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? @@ -330,7 +331,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): } -class SoundcloudUserIE(SoundcloudPlaylistBaseIE): +class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): + _API_BASE = 'https://api.soundcloud.com' + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _extract_playlist(self, base_url, playlist_id, playlist_title): + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) + + entries = [] + for i in itertools.count(): + response = self._download_json( + next_href, playlist_id, 'Downloading track page %s' % (i + 1)) + + collection = response['collection'] + if not collection: + break + + def resolve_permalink_url(candidates): + for cand in candidates: + if isinstance(cand, dict): + permalink_url = cand.get('permalink_url') + entry_id = self._extract_id(cand) + if permalink_url and permalink_url.startswith('http'): + return permalink_url, entry_id + + for e in collection: + permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + if permalink_url: + entries.append(self.url_result(permalink_url, video_id=entry_id)) + + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': entries, + } + + +class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -385,16 +442,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): 'playlist_mincount': 1, }] - _API_BASE = 'https://api.soundcloud.com' - _API_V2_BASE = 'https://api-v2.soundcloud.com' - _BASE_URL_MAP = { - 'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % _API_BASE, - 'sets': '%s/users/%%s/playlists' % _API_V2_BASE, - 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE, - 'likes': '%s/users/%%s/likes' % _API_V2_BASE, - 'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE, + 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE, + 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, } _TITLE_MAP = { @@ -416,57 +470,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): resolv_url, uploader, 'Downloading user info') resource = mobj.group('rsrc') or 'all' - base_url = self._BASE_URL_MAP[resource] % user['id'] - COMMON_QUERY = { - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - } + return self._extract_playlist( + self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), + '%s (%s)' % (user['username'], self._TITLE_MAP[resource])) - query = COMMON_QUERY.copy() - query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse_urlencode(query) +class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' + IE_NAME = 'soundcloud:trackstation' + _TESTS = [{ + 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', + 'info_dict': { + 'id': '286017854', + 'title': 'Track station: your-text', + }, + 'playlist_mincount': 47, + }] - entries = [] - for i in itertools.count(): - response = self._download_json( - next_href, uploader, 'Downloading track page %s' % (i + 1)) + def _real_extract(self, url): + track_name = self._match_id(url) - collection = response['collection'] - if not collection: - break + webpage = self._download_webpage(url, track_name) - def resolve_permalink_url(candidates): - for cand in candidates: - if isinstance(cand, dict): - permalink_url = cand.get('permalink_url') - entry_id = self._extract_id(cand) - if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id + track_id = self._search_regex( + r'soundcloud:track-stations:(\d+)', webpage, 'track id') - for e in collection: - permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) - if permalink_url: - entries.append(self.url_result(permalink_url, video_id=entry_id)) - - next_href = response.get('next_href') - if not next_href: - break - - parsed_next_href = compat_urlparse.urlparse(response['next_href']) - qs = compat_urlparse.parse_qs(parsed_next_href.query) - qs.update(COMMON_QUERY) - next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) - - return { - '_type': 'playlist', - 'id': compat_str(user['id']), - 'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]), - 'entries': entries, - } + return self._extract_playlist( + '%s/stations/soundcloud:track-stations:%s/tracks' + % (self._API_V2_BASE, track_id), + track_id, 'Track station: %s' % track_name) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): From e445850e69990502b171765343fc38317e932aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 18:45:57 +0700 Subject: [PATCH 0096/2417] [soundcloud] Update client id --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2f1b2978c..2e52e092b 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -122,7 +122,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z' + _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod From ca127ab2c174cdee4428eb4e192393c6ca942ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 29 Jul 2017 23:07:28 +0700 Subject: [PATCH 0097/2417] [ard] Add support for lives (closes #13771) --- youtube_dl/extractor/ard.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 2d5599456..3f248b147 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -93,6 +93,7 @@ class ARDMediathekIE(InfoExtractor): duration = int_or_none(media_info.get('_duration')) thumbnail = media_info.get('_previewImage') + is_live = media_info.get('_isLive') is True subtitles = {} subtitle_url = media_info.get('_subtitleUrl') @@ -106,6 +107,7 @@ class ARDMediathekIE(InfoExtractor): 'id': video_id, 'duration': duration, 'thumbnail': thumbnail, + 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, } @@ -166,9 +168,11 @@ class ARDMediathekIE(InfoExtractor): # determine video id from url m = re.match(self._VALID_URL, url) + document_id = None + numid = re.search(r'documentId=([0-9]+)', url) if numid: - video_id = numid.group(1) + document_id = video_id = numid.group(1) else: video_id = m.group('video_id') @@ -228,12 +232,16 @@ class ARDMediathekIE(InfoExtractor): 'formats': formats, } else: # request JSON file + if not document_id: + video_id = self._search_regex( + r'/play/(?:config|media)/(\d+)', webpage, 'media id') info = self._extract_media_info( - 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) + 'http://www.ardmediathek.de/play/media/%s' % video_id, + webpage, video_id) info.update({ 'id': video_id, - 'title': title, + 'title': self._live_title(title) if info.get('is_live') else title, 'description': description, 'thumbnail': thumbnail, }) From 198d4cb40ce9d819e8e4079058642ee96dae213b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Ruci=C5=84ski?= <grucin@gmail.com> Date: Sat, 29 Jul 2017 20:30:04 +0200 Subject: [PATCH 0098/2417] [generic] Add support for another ooyala embed pattern (closes #13727) --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4b83e861b..34e814988 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -575,6 +575,19 @@ class GenericIE(InfoExtractor): }, 'skip': 'movie expired', }, + # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js + { + 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', + 'info_dict': { + 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', + 'ext': 'mp4', + 'title': 'Steampunk Fest Comes to Honesdale', + 'duration': 43.276, + }, + 'params': { + 'skip_download': True, + } + }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -2293,6 +2306,7 @@ class GenericIE(InfoExtractor): # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or + re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) if mobj is not None: From a0a477b885dc1dd688058924357c4935f3c935cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 30 Jul 2017 15:48:22 +0700 Subject: [PATCH 0099/2417] [youjizz] Fix extraction (closes #13744) --- youtube_dl/extractor/youjizz.py | 80 ++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index b50f34e9b..f33fabe19 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,39 +1,95 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, +) class YouJizzIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])' + _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))' _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', - 'md5': '78fc1901148284c69af12640e01c6310', + 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4', 'info_dict': { 'id': '2189178', 'ext': 'mp4', 'title': 'Zeichentrick 1', 'age_limit': 18, + 'duration': 2874, } }, { 'url': 'http://www.youjizz.com/videos/-2189178.html', 'only_matching': True, + }, { + 'url': 'https://www.youjizz.com/videos/embed/31991001', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - # YouJizz's HTML5 player has invalid HTML - webpage = webpage.replace('"controls', '" controls') - age_limit = self._rta_search(webpage) - video_title = self._html_search_regex( - r'<title>\s*(.*)\s*', webpage, 'title') + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('embed_id') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'(.+?)', webpage, 'title') + + formats = [] + + encodings = self._parse_json( + self._search_regex( + r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', + default='[]'), + video_id, fatal=False) + for encoding in encodings: + if not isinstance(encoding, dict): + continue + format_url = encoding.get('filename') + if not isinstance(format_url, compat_str): + continue + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + format_id = encoding.get('name') or encoding.get('quality') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]', format_id, 'height', default=None)) + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': height, + }) + + if formats: + info_dict = { + 'formats': formats, + } + else: + # YouJizz's HTML5 player has invalid HTML + webpage = webpage.replace('"controls', '" controls') + info_dict = self._parse_html5_media_entries( + url, webpage, video_id)[0] + + duration = parse_duration(self._search_regex( + r'Runtime:([^<]+)', webpage, 'duration', + default=None)) + uploader = self._search_regex( + r'Uploaded By:.*?]*>([^<]+)', webpage, 'uploader', + default=None) info_dict.update({ 'id': video_id, - 'title': video_title, - 'age_limit': age_limit, + 'title': title, + 'age_limit': self._rta_search(webpage), + 'duration': duration, + 'uploader': uploader, }) return info_dict From 0ed4758023ddfb4d9630ba9114ef70ef7e6ac09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 19:08:44 +0700 Subject: [PATCH 0100/2417] [clipfish] Remove extractor --- youtube_dl/extractor/clipfish.py | 67 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 68 deletions(-) delete mode 100644 youtube_dl/extractor/clipfish.py diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py deleted file mode 100644 index 0920f6219..000000000 --- a/youtube_dl/extractor/clipfish.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) - - -class ClipfishIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/', - 'md5': 'b9a5dc46294154c1193e2d10e0c95693', - 'info_dict': { - 'id': '4343170', - 'ext': 'mp4', - 'title': 'S01 E01 - Ugly Americans - Date in der Hölle', - 'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.', - 'upload_date': '20161005', - 'duration': 1291, - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_info = self._download_json( - 'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id, - video_id)['items'][0] - - formats = [] - - m3u8_url = video_info.get('media_videourl_hls') - if m3u8_url: - formats.append({ - 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), - 'ext': 'mp4', - 'format_id': 'hls', - }) - - mp4_url = video_info.get('media_videourl') - if mp4_url: - formats.append({ - 'url': mp4_url, - 'format_id': 'mp4', - 'width': int_or_none(video_info.get('width')), - 'height': int_or_none(video_info.get('height')), - 'tbr': int_or_none(video_info.get('bitrate')), - }) - - descr = video_info.get('descr') - if descr: - descr = descr.strip() - - return { - 'id': video_id, - 'title': video_info['title'], - 'description': descr, - 'formats': formats, - 'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'), - 'duration': int_or_none(video_info.get('media_length')), - 'upload_date': unified_strdate(video_info.get('pubDate')), - 'view_count': int_or_none(video_info.get('media_views')) - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d2c5e8030..bdc7370cd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,6 @@ from .chirbit import ( ) from .cinchcast import CinchcastIE from .cjsw import CJSWIE -from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .cliprs import ClipRsIE from .clipsyndicate import ClipsyndicateIE From 8b9f50d7cb4cfab5d505f4233c3e176a8106d6db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 19:09:44 +0700 Subject: [PATCH 0101/2417] [watchbox] Add extractor (#13739) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/watchbox.py | 151 +++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 youtube_dl/extractor/watchbox.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bdc7370cd..3489e86f0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1244,6 +1244,7 @@ from .washingtonpost import ( WashingtonPostArticleIE, ) from .wat import WatIE +from .watchbox import WatchBoxIE from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dl/extractor/watchbox.py new file mode 100644 index 000000000..b382338fa --- /dev/null +++ b/youtube_dl/extractor/watchbox.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + js_to_json, + strip_or_none, + try_get, + unified_timestamp, +) + + +class WatchBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?Pserien|filme)/(?:[^/]+/)*[^/]+-(?P\d+)' + _TESTS = [{ + # film + 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html', + 'info_dict': { + 'id': '341368', + 'ext': 'mp4', + 'title': 'Free Jimmy', + 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4890, + 'age_limit': 16, + 'release_year': 2009, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + # episode + 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html', + 'info_dict': { + 'id': '328286', + 'ext': 'mp4', + 'title': 'S01 E01 - Date in der Hölle', + 'description': 'md5:2f31c74a8186899f33cb5114491dae2b', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1291, + 'age_limit': 12, + 'release_year': 2010, + 'series': 'Ugly Americans', + 'season_number': 1, + 'episode': 'Date in der Hölle', + 'episode_number': 1, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id = mobj.group('kind', 'id') + + webpage = self._download_webpage(url, video_id) + + source = self._parse_json( + self._search_regex( + r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) or {} + + video_id = compat_str(source.get('videoId') or video_id) + + devapi = self._download_json( + 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={ + 'format': 'json', + 'apikey': 'hbbtv', + }, fatal=False) + + item = try_get(devapi, lambda x: x['items'][0], dict) or {} + + title = item.get('title') or try_get( + item, lambda x: x['movie']['headline_movie'], + compat_str) or source['title'] + + formats = [] + hls_url = item.get('media_videourl_hls') or source.get('hls') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + dash_url = item.get('media_videourl_wv') or source.get('dash') + if dash_url: + formats.extend(self._extract_mpd_formats( + dash_url, video_id, mpd_id='dash', fatal=False)) + mp4_url = item.get('media_videourl') + if mp4_url: + formats.append({ + 'url': mp4_url, + 'format_id': 'mp4', + 'width': int_or_none(item.get('width')), + 'height': int_or_none(item.get('height')), + 'tbr': int_or_none(item.get('bitrate')), + }) + self._sort_formats(formats) + + description = strip_or_none(item.get('descr')) + thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail') + duration = int_or_none(item.get('media_length') or source.get('length')) + timestamp = unified_timestamp(item.get('pubDate')) + view_count = int_or_none(item.get('media_views')) + age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk'])) + release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year'])) + + info = { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'age_limit': age_limit, + 'release_year': release_year, + 'formats': formats, + } + + if kind.lower() == 'serien': + series = try_get( + item, lambda x: x['special']['title'], + compat_str) or source.get('format') + season_number = int_or_none(self._search_regex( + r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number', + default=None) or self._search_regex( + r'/staffel-(\d+)/', url, 'season number', default=None)) + episode = source.get('title') + episode_number = int_or_none(self._search_regex( + r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number', + default=None)) + info.update({ + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) + + return info From f701827e319963ca783d012f3647aa44fc0efcd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 19:43:09 +0700 Subject: [PATCH 0102/2417] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8e63b5c11..ca3ee8a93 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,28 @@ version Core +* [downloader/hls] Use redirect URL as manifest base (#13755) * [options] Correctly hide login info from debug outputs (#13696) +Extractors ++ [watchbox] Add support for watchbox.de (#13739) +- [clipfish] Remove extractor ++ [youjizz] Fix extraction (#13744) ++ [generic] Add support for another ooyala embed pattern (#13727) ++ [ard] Add support for lives (#13771) +* [soundcloud] Update client id ++ [soundcloud:trackstation] Add support for track stations (#13733) +* [svtplay] Use geo verification proxy for API request +* [svtplay] Update API URL (#13767) ++ [yandexdisk] Add support for yadi.sk (#13755) ++ [megaphone] Add support for megaphone.fm +* [amcnetworks] Make rating optional (#12453) +* [cloudy] Fix extraction (#13737) ++ [nickru] Add support for nickelodeon.ru +* [mtv] Improve thumbnal extraction +* [nick] Automate geo-restriction bypass (#13711) +* [niconico] Improve error reporting (#13696) + version 2017.07.23 From 5c9ea67bc0dedd15a0ed9ad05d8fcf09946ca461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 Jul 2017 20:47:31 +0700 Subject: [PATCH 0103/2417] release 2017.07.30.1 --- .github/ISSUE_TEMPLATE.md | 16 ++++++++-------- ChangeLog | 2 +- docs/supportedsites.md | 6 +++++- youtube_dl/version.py | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 37d09d796..0421de755 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,16 +1,16 @@ ## Please follow the guide below - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) -- Use *Preview* tab to see how your issue will actually look like +- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) +- Use the *Preview* tab to see what your issue will actually look like --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.30.1** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones ### What is the purpose of your *issue*? @@ -28,14 +28,14 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: -Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v `), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): + ``` -$ youtube-dl -v [debug] System config: [] [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.23 +[debug] youtube-dl version 2017.07.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ca3ee8a93..4f03ef064 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.07.30.1 Core * [downloader/hls] Use redirect URL as manifest base (#13755) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index eb09c470c..77aac8249 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -155,7 +155,6 @@ - **chirbit:profile** - **Cinchcast** - **CJSW** - - **Clipfish** - **cliphunter** - **ClipRs** - **Clipsyndicate** @@ -440,6 +439,7 @@ - **Medialaan** - **Mediaset** - **Medici** + - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 - **MelonVOD** - **META** @@ -533,6 +533,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** @@ -734,6 +735,7 @@ - **soundcloud:playlist** - **soundcloud:search**: Soundcloud search - **soundcloud:set** + - **soundcloud:trackstation** - **soundcloud:user** - **soundgasm** - **soundgasm:profile** @@ -968,6 +970,7 @@ - **washingtonpost** - **washingtonpost:article** - **wat.tv** + - **WatchBox** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** @@ -1003,6 +1006,7 @@ - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies + - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a8dbb93e3..38162157d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.23' +__version__ = '2017.07.30.1' From 9118c9f18a43a2f3e4814fcc02ac8e5180077df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Aug 2017 05:20:14 +0700 Subject: [PATCH 0104/2417] [nrktv] Update API host (closes #13796) --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3b4f51f61..18ead9426 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -237,7 +237,7 @@ class NRKTVIE(NRKBaseIE): (?:/\d{2}-\d{2}-\d{4})? (?:\#del=(?P\d+))? ''' % _EPISODE_RE - _API_HOST = 'psapi-we.nrk.no' + _API_HOST = 'psapi-ne.nrk.no' _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', From 8cda78ef72c52c0424ddf90c22105dbc3b1d16f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Aug 2017 23:12:34 +0700 Subject: [PATCH 0105/2417] [test_YoutubeDL] Add a test for #10083 --- test/test_YoutubeDL.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 4af14f9db..e70cbcd37 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -371,6 +371,19 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'format': 'best[height>360]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + def test_format_selection_issue_10083(self): + # See https://github.com/rg3/youtube-dl/issues/10083 + formats = [ + {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio') + def test_invalid_format_specs(self): def assert_syntax_error(format_spec): ydl = YDL({'format': format_spec}) From feee8d32e45c9521426cf4a089c70f37542f0065 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Thu, 3 Aug 2017 14:17:25 +0200 Subject: [PATCH 0106/2417] [phantomjs] add exe version to debug info --- youtube_dl/YoutubeDL.py | 2 ++ youtube_dl/utils.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eb465c425..033b50702 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -86,6 +86,7 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, + PhantomJSwrapper, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER @@ -2146,6 +2147,7 @@ class YoutubeDL(object): exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() + exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( '%s %s' % (exe, v) for exe, v in sorted(exe_versions.items()) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c67f95ac9..4d0685d83 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3895,6 +3895,10 @@ class PhantomJSwrapper(object): _TMP_FILE_NAMES = ['script', 'html', 'cookies'] + @staticmethod + def _version(): + return get_exe_version('phantomjs', version_re=r'([0-9.]+)') + def __init__(self, extractor, required_version=None, timeout=10000): self.exe = check_executable('phantomjs', ['-v']) if not self.exe: @@ -3905,7 +3909,7 @@ class PhantomJSwrapper(object): self.extractor = extractor if required_version: - version = get_exe_version(self.exe, version_re=r'([0-9.]+)') + version = self._version() if is_outdated_version(version, required_version): self.extractor._downloader.report_warning( 'Your copy of PhantomJS is outdated, update it to version ' From 183062a4ab2f698f5096e69602fb2b5c861c01a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Aug 2017 23:19:59 +0700 Subject: [PATCH 0107/2417] [pbs] Add support for new URL schema (closes #13801) --- youtube_dl/extractor/pbs.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 16cc667d0..8889e4a1a 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -189,7 +189,7 @@ class PBSIE(InfoExtractor): # Direct video URL (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | # Article with embedded player (or direct video) - (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | + (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P[^/]+)/ ) @@ -345,6 +345,21 @@ class PBSIE(InfoExtractor): 'formats': 'mincount:8', }, }, + { + # https://github.com/rg3/youtube-dl/issues/13801 + 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/', + 'info_dict': { + 'id': '3003333873', + 'ext': 'mp4', + 'title': 'PBS NewsHour - full episode July 31, 2017', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'duration': 3265, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, @@ -433,6 +448,9 @@ class PBSIE(InfoExtractor): if url: break + if not url: + url = self._og_search_url(webpage) + mobj = re.match(self._VALID_URL, url) player_id = mobj.group('player_id') From 1f03fef994e076a827cfd818eb4d76fe2eb85130 Mon Sep 17 00:00:00 2001 From: Justin Quan Date: Fri, 4 Aug 2017 08:43:44 -0700 Subject: [PATCH 0108/2417] [README.md] Improve grammar --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fe2bebc2a..0067184be 100644 --- a/README.md +++ b/README.md @@ -584,7 +584,7 @@ If you are using an output template inside a Windows batch file then you must es #### Output template examples -Note on Windows you may need to use double quotes instead of single. +Note that on Windows you may need to use double quotes instead of single. ```bash $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc @@ -671,7 +671,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2 #### Format selection examples -Note on Windows you may need to use double quotes instead of single. +Note that on Windows you may need to use double quotes instead of single. ```bash # Download best mp4 format available or any other best if no mp4 available From 11a6793f8013f37045d769e5b166f75e17f275d1 Mon Sep 17 00:00:00 2001 From: Matt Crupi Date: Fri, 4 Aug 2017 08:46:54 -0700 Subject: [PATCH 0109/2417] [mlb] Extend _VALID_URL (closes #13740) --- youtube_dl/extractor/mlb.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 59cd4b838..4d45f960e 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -15,7 +15,7 @@ class MLBIE(InfoExtractor): (?:[\da-z_-]+\.)*mlb\.com/ (?: (?: - (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v| + (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)| (?: shared/video/embed/(?:embed|m-internal-embed)\.html| (?:[^/]+/)+(?:play|index)\.jsp| @@ -94,6 +94,10 @@ class MLBIE(InfoExtractor): 'upload_date': '20150415', } }, + { + 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694', + 'only_matching': True, + }, { 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', 'only_matching': True, From 57a38a38c32ea2eb1ca54ee4ba3fcd31a9b7f328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Aug 2017 23:44:07 +0700 Subject: [PATCH 0110/2417] [udemy] Fix subtitles extraction (closes #13812) --- youtube_dl/extractor/udemy.py | 47 ++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 160be1b1b..3b02f43e3 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -15,6 +15,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + js_to_json, sanitized_Request, unescapeHTML, urlencode_postdata, @@ -268,6 +269,25 @@ class UdemyIE(InfoExtractor): f = add_output_format_meta(f, format_id) formats.append(f) + def extract_subtitles(track_list): + if not isinstance(track_list, list): + return + for track in track_list: + if not isinstance(track, dict): + continue + if track.get('kind') != 'captions': + continue + src = track.get('src') + if not src or not isinstance(src, compat_str): + continue + lang = track.get('language') or track.get( + 'srclang') or track.get('label') + sub_dict = automatic_captions if track.get( + 'autogenerated') is True else subtitles + sub_dict.setdefault(lang, []).append({ + 'url': src, + }) + download_urls = asset.get('download_urls') if isinstance(download_urls, dict): extract_formats(download_urls.get('Video')) @@ -315,23 +335,16 @@ class UdemyIE(InfoExtractor): extract_formats(data.get('sources')) if not duration: duration = int_or_none(data.get('duration')) - tracks = data.get('tracks') - if isinstance(tracks, list): - for track in tracks: - if not isinstance(track, dict): - continue - if track.get('kind') != 'captions': - continue - src = track.get('src') - if not src or not isinstance(src, compat_str): - continue - lang = track.get('language') or track.get( - 'srclang') or track.get('label') - sub_dict = automatic_captions if track.get( - 'autogenerated') is True else subtitles - sub_dict.setdefault(lang, []).append({ - 'url': src, - }) + extract_subtitles(data.get('tracks')) + + if not subtitles and not automatic_captions: + text_tracks = self._parse_json( + self._search_regex( + r'text-tracks=(["\'])(?P\[.+?\])\1', view_html, + 'text tracks', default='{}', group='data'), video_id, + transform_source=lambda s: js_to_json(unescapeHTML(s)), + fatal=False) + extract_subtitles(text_tracks) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) From b3b5870cba46d84b7482f120f550822d3b64c3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Aug 2017 23:51:03 +0700 Subject: [PATCH 0111/2417] [pornhd] Fix extraction (closes #13783) --- youtube_dl/extractor/pornhd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 842317e6c..36761788d 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor): r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') sources = self._parse_json(js_to_json(self._search_regex( - r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", + r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]", webpage, 'sources', default='{}')), video_id) if not sources: From 799802f368012f579750b26db117b3a9dfdcbe05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Aug 2017 23:54:28 +0700 Subject: [PATCH 0112/2417] [teamfour] Remove extractor (closes #13782) Now covered with generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/teamfourstar.py | 48 ---------------------------- 2 files changed, 49 deletions(-) delete mode 100644 youtube_dl/extractor/teamfourstar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3489e86f0..d0e04dd7d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -994,7 +994,6 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE -from .teamfourstar import TeamFourStarIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele13 import Tele13IE diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py deleted file mode 100644 index a8c6ed7be..000000000 --- a/youtube_dl/extractor/teamfourstar.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from .jwplatform import JWPlatformIE -from ..utils import unified_strdate - - -class TeamFourStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P[a-z0-9\-]+)' - _TEST = { - 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', - 'info_dict': { - 'id': '0WdZO31W', - 'title': 'TFS Abridged Parody Episode 1', - 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae', - 'ext': 'mp4', - 'timestamp': 1394168400, - 'upload_date': '20080508', - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - jwplatform_url = JWPlatformIE._extract_url(webpage) - - video_title = self._html_search_regex( - r']+class="entry-title"[^>]*>(?P.+?)</h1>', - webpage, 'title') - video_date = unified_strdate(self._html_search_regex( - r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>', - webpage, 'date', fatal=False)) - video_description = self._html_search_regex( - r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>', - webpage, 'description', fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': jwplatform_url, - } From f31fd0693b674e73f9273f0afba2a54853e4ca35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 00:00:21 +0700 Subject: [PATCH 0113/2417] [vidme] Extract DASH and HLS formats --- youtube_dl/extractor/vidme.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index e9ff336c4..a7971d72e 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import itertools from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, @@ -161,13 +164,28 @@ class VidmeIE(InfoExtractor): 'or for violating the terms of use.', expected=True) - formats = [{ - 'format_id': f.get('type'), - 'url': f['uri'], - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'preference': 0 if f.get('type', '').endswith('clip') else 1, - } for f in video.get('formats', []) if f.get('uri')] + formats = [] + for f in video.get('formats', []): + format_url = f.get('uri') + if not format_url or not isinstance(format_url, compat_str): + continue + format_type = f.get('type') + if format_type == 'dash': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + elif format_type == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': f.get('type'), + 'url': format_url, + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'preference': 0 if f.get('type', '').endswith( + 'clip') else 1, + }) if not formats and video.get('complete_url'): formats.append({ From bbbe1cebfce3cfb63e9c01d29105fcba693ba54c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 00:09:36 +0700 Subject: [PATCH 0114/2417] [mlb] Update test (closes #13777) --- youtube_dl/extractor/mlb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 4d45f960e..675ff6873 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -84,7 +84,7 @@ class MLBIE(InfoExtractor): }, { 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', - 'md5': 'b190e70141fb9a1552a85426b4da1b5d', + 'md5': 'aafaf5b0186fee8f32f20508092f8111', 'info_dict': { 'id': '75609783', 'ext': 'mp4', From 8519b88f67de9c0c11cd2edd8dc55b9a4f13d110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 00:59:07 +0700 Subject: [PATCH 0115/2417] [yandexdisk] Relax _VALID_URL (closes #13824) --- youtube_dl/extractor/yandexdisk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py index 11729f0f7..e8f6ae10f 100644 --- a/youtube_dl/extractor/yandexdisk.py +++ b/youtube_dl/extractor/yandexdisk.py @@ -13,9 +13,9 @@ from ..utils import ( class YandexDiskIE(InfoExtractor): - _VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', 'md5': '33955d7ae052f15853dc41f35f17581c', 'info_dict': { @@ -27,7 +27,10 @@ class YandexDiskIE(InfoExtractor): 'uploader_id': '300043621', 'view_count': int, }, - } + }, { + 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 1141e9104bc0f8d577f18cf28a1af58adea1248e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 06:57:19 +0700 Subject: [PATCH 0116/2417] Use relative paths for DASH fragments (closes #12990) 10x reduced JSON size refs #13810 --- youtube_dl/downloader/dash.py | 14 ++++++++++---- youtube_dl/extractor/common.py | 16 ++++++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 7491fdad8..576ece6db 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .fragment import FragmentFD from ..compat import compat_urllib_error +from ..utils import urljoin class DashSegmentsFD(FragmentFD): @@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - segments = info_dict['fragments'][:1] if self.params.get( + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, - 'total_frags': len(segments), + 'total_frags': len(fragments), } self._prepare_and_start_frag_download(ctx) @@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 - for i, segment in enumerate(segments): + for i, fragment in enumerate(fragments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue @@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) if not success: return False self._append_fragment(ctx, frag_content) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 748b4d59f..459e7ffd6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1892,9 +1892,13 @@ class InfoExtractor(object): 'Bandwidth': bandwidth, } + def location_key(location): + return 'url' if re.match(r'^https?://', location) else 'path' + if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) + media_location_key = location_key(media_template) # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time @@ -1904,7 +1908,7 @@ class InfoExtractor(object): segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['fragments'] = [{ - 'url': media_template % { + media_location_key: media_template % { 'Number': segment_number, 'Bandwidth': bandwidth, }, @@ -1928,7 +1932,7 @@ class InfoExtractor(object): 'Number': segment_number, } representation_ms_info['fragments'].append({ - 'url': segment_url, + media_location_key: segment_url, 'duration': float_or_none(segment_d, representation_ms_info['timescale']), }) @@ -1952,8 +1956,9 @@ class InfoExtractor(object): for s in representation_ms_info['s']: duration = float_or_none(s['d'], timescale) for r in range(s.get('r', 0) + 1): + segment_uri = representation_ms_info['segment_urls'][segment_index] fragments.append({ - 'url': representation_ms_info['segment_urls'][segment_index], + location_key(segment_uri): segment_uri, 'duration': duration, }) segment_index += 1 @@ -1962,6 +1967,7 @@ class InfoExtractor(object): # No fragments key is present in this case. if 'fragments' in representation_ms_info: f.update({ + 'fragment_base_url': base_url, 'fragments': [], 'protocol': 'http_dash_segments', }) @@ -1969,10 +1975,8 @@ class InfoExtractor(object): initialization_url = representation_ms_info['initialization_url'] if not f.get('url'): f['url'] = initialization_url - f['fragments'].append({'url': initialization_url}) + f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) - for fragment in f['fragments']: - fragment['url'] = urljoin(base_url, fragment['url']) try: existing_format = next( fo for fo in formats From c983cc3b71e3b2c80df920481dfa90bbc2ad7937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 08:17:01 +0700 Subject: [PATCH 0117/2417] [cinchcast] Extend _VALID_URL --- youtube_dl/extractor/cinchcast.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py index 562c9bbbb..b861d54b0 100644 --- a/youtube_dl/extractor/cinchcast.py +++ b/youtube_dl/extractor/cinchcast.py @@ -9,12 +9,20 @@ from ..utils import ( class CinchcastIE(InfoExtractor): - _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single', + 'info_dict': { + 'id': '5258197', + 'ext': 'mp3', + 'title': 'Train Your Brain to Up Your Game with Coach Mandy', + 'upload_date': '20130816', + }, + }, { # Actual test is run in generic, look for undergroundwellness 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', 'only_matching': True, - } + }] def _real_extract(self, url): video_id = self._match_id(url) From 1d5472290f8c426c13e3403cb95fd44cc71b5a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 08:28:12 +0700 Subject: [PATCH 0118/2417] [podomatic] Extend _VALID_URL (closes #13827) --- youtube_dl/extractor/podomatic.py | 63 +++++++++++++++++-------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py index f20946a2b..25fcebf9f 100644 --- a/youtube_dl/extractor/podomatic.py +++ b/youtube_dl/extractor/podomatic.py @@ -9,39 +9,46 @@ from ..utils import int_or_none class PodomaticIE(InfoExtractor): IE_NAME = 'podomatic' - _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' + _VALID_URL = r'''(?x) + (?P<proto>https?):// + (?: + (?P<channel>[^.]+)\.podomatic\.com/entry| + (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes + )/ + (?P<id>[^/?#&]+) + ''' - _TESTS = [ - { - 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', - 'md5': '84bb855fcf3429e6bf72460e1eed782d', - 'info_dict': { - 'id': '2009-01-02T16_03_35-08_00', - 'ext': 'mp3', - 'uploader': 'Science Teaching Tips', - 'uploader_id': 'scienceteachingtips', - 'title': '64. When the Moon Hits Your Eye', - 'duration': 446, - } - }, - { - 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', - 'md5': 'd2cf443931b6148e27638650e2638297', - 'info_dict': { - 'id': '2013-11-15T16_31_21-08_00', - 'ext': 'mp3', - 'uploader': 'Ostbahnhof / Techno Mix', - 'uploader_id': 'ostbahnhof', - 'title': 'Einunddreizig', - 'duration': 3799, - } - }, - ] + _TESTS = [{ + 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', + 'md5': '84bb855fcf3429e6bf72460e1eed782d', + 'info_dict': { + 'id': '2009-01-02T16_03_35-08_00', + 'ext': 'mp3', + 'uploader': 'Science Teaching Tips', + 'uploader_id': 'scienceteachingtips', + 'title': '64. When the Moon Hits Your Eye', + 'duration': 446, + } + }, { + 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', + 'md5': 'd2cf443931b6148e27638650e2638297', + 'info_dict': { + 'id': '2013-11-15T16_31_21-08_00', + 'ext': 'mp3', + 'uploader': 'Ostbahnhof / Techno Mix', + 'uploader_id': 'ostbahnhof', + 'title': 'Einunddreizig', + 'duration': 3799, + } + }, { + 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - channel = mobj.group('channel') + channel = mobj.group('channel') or mobj.group('channel_2') json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + '?permalink=true&rtmp=0') % From f172c86dcdb46e484afc63732db56df5633028ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Aug 2017 21:17:55 +0700 Subject: [PATCH 0119/2417] [vlive:channel] Limit number of videos per page to 100 (closes #13830) --- youtube_dl/extractor/vlive.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 77c120a57..64d0224e6 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -236,7 +236,12 @@ class VLiveChannelIE(InfoExtractor): query={ 'app_id': app_id, 'channelSeq': channel_seq, - 'maxNumOfRows': 1000, + # Large values of maxNumOfRows (~300 or above) may cause + # empty responses (see [1]), e.g. this happens for [2] that + # has more than 300 videos. + # 1. https://github.com/rg3/youtube-dl/issues/13830 + # 2. http://channels.vlive.tv/EDBF. + 'maxNumOfRows': 100, '_': int(time.time()), 'pageNo': page_num } From daaaf5f5942252e9fbc367957bd3b8a96d0dd5bb Mon Sep 17 00:00:00 2001 From: Ashutosh Chaudhary <ashutosh.chaudhary@gmail.com> Date: Mon, 23 Jan 2017 05:12:52 +0530 Subject: [PATCH 0120/2417] [voot] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/voot.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/voot.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0e04dd7d..48dda8b8e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1333,3 +1333,4 @@ from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE +from .voot import VootIE diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py new file mode 100644 index 000000000..db5bda660 --- /dev/null +++ b/youtube_dl/extractor/voot.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VootIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/(?:.+?[/-]?)/1/(?:.+?[0-9]?)/(?:.+?[/-]?)/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', + 'info_dict': { + 'id': '441353', + 'ext': 'mp4', + 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + _GET_CONTENT_TEMPLATE = 'https://wapi.voot.com/ws/ott/getMediaInfo.json?platform=Web&pId=3&mediaId=%s' + + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): + json_data = super(VootIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) + if json_data['status']['code'] != 0: + if fatal: + raise ExtractorError(json_data['status']['message']) + return None + return json_data['assets'] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + self._GET_CONTENT_TEMPLATE % video_id, + video_id) + + thumbnail = '' + formats = [] + + if video_data: + format_url = video_data.get('URL') + formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + + if video_data['Pictures']: + for picture in video_data['Pictures']: + #Get only first available thumbnail + thumbnail = picture.get('URL') + break + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data.get('MediaName'), + 'thumbnail': thumbnail, + 'formats':formats, + } From e2b4808fd8ed49424deaa6d800daf0950e55ffff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 08:04:51 +0700 Subject: [PATCH 0121/2417] [voot] Improve extraction (#10255, closes #11814) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/voot.py | 111 ++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 48dda8b8e..ebe414dae 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1222,6 +1222,7 @@ from .vodlocker import VodlockerIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE +from .voot import VootIE from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE @@ -1333,4 +1334,3 @@ from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE -from .voot import VootIE diff --git a/youtube_dl/extractor/voot.py b/youtube_dl/extractor/voot.py index db5bda660..5de3deb8c 100644 --- a/youtube_dl/extractor/voot.py +++ b/youtube_dl/extractor/voot.py @@ -2,54 +2,97 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + ExtractorError, + int_or_none, + try_get, + unified_timestamp, +) class VootIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/(?:.+?[/-]?)/1/(?:.+?[0-9]?)/(?:.+?[/-]?)/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)' + _GEO_COUNTRIES = ['IN'] + _TESTS = [{ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', 'info_dict': { - 'id': '441353', + 'id': '0_8ledb18o', 'ext': 'mp4', 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - _GET_CONTENT_TEMPLATE = 'https://wapi.voot.com/ws/ott/getMediaInfo.json?platform=Web&pId=3&mediaId=%s' - - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): - json_data = super(VootIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) - if json_data['status']['code'] != 0: - if fatal: - raise ExtractorError(json_data['status']['message']) - return None - return json_data['assets'] + 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', + 'uploader_id': 'batchUser', + 'timestamp': 1472162937, + 'upload_date': '20160825', + 'duration': 1146, + 'series': 'Ishq Ka Rang Safed', + 'season_number': 1, + 'episode': 'Is this the end of Kamini?', + 'episode_number': 340, + 'view_count': int, + 'like_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925', + 'only_matching': True, + }, { + 'url': 'https://www.voot.com/movies/pandavas-5/424627', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - self._GET_CONTENT_TEMPLATE % video_id, - video_id) - thumbnail = '' - formats = [] + media_info = self._download_json( + 'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id, + query={ + 'platform': 'Web', + 'pId': 2, + 'mediaId': video_id, + }) - if video_data: - format_url = video_data.get('URL') - formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + status_code = try_get(media_info, lambda x: x['status']['code'], int) + if status_code != 0: + raise ExtractorError(media_info['status']['message'], expected=True) - if video_data['Pictures']: - for picture in video_data['Pictures']: - #Get only first available thumbnail - thumbnail = picture.get('URL') - break + media = media_info['assets'] - self._sort_formats(formats) + entry_id = media['EntryId'] + title = media['MediaName'] + + description, series, season_number, episode, episode_number = [None] * 5 + + for meta in try_get(media, lambda x: x['Metas'], list) or []: + key, value = meta.get('Key'), meta.get('Value') + if not key or not value: + continue + if key == 'ContentSynopsis': + description = value + elif key == 'RefSeriesTitle': + series = value + elif key == 'RefSeriesSeason': + season_number = int_or_none(value) + elif key == 'EpisodeMainTitle': + episode = value + elif key == 'EpisodeNo': + episode_number = int_or_none(value) return { - 'id': video_id, - 'title': video_data.get('MediaName'), - 'thumbnail': thumbnail, - 'formats':formats, + '_type': 'url_transparent', + 'url': 'kaltura:1982551:%s' % entry_id, + 'ie_key': KalturaIE.ie_key(), + 'title': title, + 'description': description, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'timestamp': unified_timestamp(media.get('CreationDate')), + 'duration': int_or_none(media.get('Duration')), + 'view_count': int_or_none(media.get('ViewCounter')), + 'like_count': int_or_none(media.get('like_counter')), } From 16afce174ea71690844d37776d518ae374b896ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 08:18:16 +0700 Subject: [PATCH 0122/2417] [mpora] Remove extractor (closes #13826) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/mpora.py | 62 ------------------------------ 2 files changed, 63 deletions(-) delete mode 100644 youtube_dl/extractor/mpora.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ebe414dae..897557f93 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -584,7 +584,6 @@ from .mixcloud import ( ) from .mlb import MLBIE from .mnet import MnetIE -from .mpora import MporaIE from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py deleted file mode 100644 index 5a1bee5c8..000000000 --- a/youtube_dl/extractor/mpora.py +++ /dev/null @@ -1,62 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import int_or_none - - -class MporaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' - IE_NAME = 'MPORA' - - _TEST = { - 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', - 'md5': 'a7a228473eedd3be741397cf452932eb', - 'info_dict': { - 'id': 'AAdo8okx4wiz', - 'ext': 'mp4', - 'title': 'Katy Curd - Winter in the Forest', - 'duration': 416, - 'uploader': 'Peter Newman Media', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - data_json = self._search_regex( - [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", - r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"], - webpage, 'json') - data = self._parse_json(data_json, video_id) - - uploader = data['info_overlay'].get('username') - duration = data['video']['duration'] // 1000 - thumbnail = data['video']['encodings']['sd']['poster'] - title = data['info_overlay']['title'] - - formats = [] - for encoding_id, edata in data['video']['encodings'].items(): - for src in edata['sources']: - width_str = self._search_regex( - r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'], - False, default=None) - vcodec = src['type'].partition('/')[2] - - formats.append({ - 'format_id': encoding_id + '-' + vcodec, - 'url': src['src'], - 'vcodec': vcodec, - 'width': int_or_none(width_str), - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'uploader': uploader, - 'duration': duration, - 'thumbnail': thumbnail, - } From fac188c6954edcccf3104abc3ac0155125a7d427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 08:44:28 +0700 Subject: [PATCH 0123/2417] [pluralsight] Fix format selection --- youtube_dl/extractor/pluralsight.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index e45d9fe55..d35f54ce8 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -224,6 +224,7 @@ class PluralsightIE(PluralsightBaseIE): req_format_split = req_format.split('-', 1) if len(req_format_split) > 1: req_ext, req_quality = req_format_split + req_quality = '-'.join(req_quality.split('-')[:2]) for allowed_quality in ALLOWED_QUALITIES: if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: return (AllowedQuality(req_ext, (req_quality, )), ) From 92740e42414cb47f785daf257b9726fa361977b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 09:02:14 +0700 Subject: [PATCH 0124/2417] [ChangeLog] Actualize --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4f03ef064..e2515866e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version <unreleased> + +Core +* Use relative paths for DASH fragments (#12990) + +Extractors +* [pluralsight] Fix format selection +- [mpora] Remove extractor (#13826) ++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218) +* [vlive:channel] Limit number of videos per page to 100 (#13830) +* [podomatic] Extend URL regular expression (#13827) +* [cinchcast] Extend URL regular expression +* [yandexdisk] Relax URL regular expression (#13824) +* [vidme] Extract DASH and HLS formats +- [teamfour] Remove extractor (#13782) +* [pornhd] Fix extraction (#13783) +* [udemy] Fix subtitles extraction (#13812) +* [mlb] Extend URL regular expression (#13740, #13773) ++ [pbs] Add support for new URL schema (#13801) +* [nrktv] Update API host (#13796) + + version 2017.07.30.1 Core From 903a183b6adc60808f04294a7003b6d4bd250304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Aug 2017 09:05:36 +0700 Subject: [PATCH 0125/2417] release 2017.08.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0421de755..5b72032bc 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.30.1 +[debug] youtube-dl version 2017.08.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e2515866e..18893bba8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.08.06 Core * Use relative paths for DASH fragments (#12990) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 77aac8249..a3bd07726 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -472,7 +472,6 @@ - **MovieFap** - **Moviezine** - **MovingImage** - - **MPORA** - **MSN** - **mtg**: MTG services - **mtv** @@ -783,7 +782,6 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamFourStar** - **TechTalks** - **techtv.mit.edu** - **ted** @@ -953,6 +951,7 @@ - **VODPl** - **VODPlatform** - **VoiceRepublic** + - **Voot** - **VoxMedia** - **Vporn** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 38162157d..11d3bf29f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.30.1' +__version__ = '2017.08.06' From 463e7216c87814edf1453aa3a5bfad89474ba6b1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 6 Aug 2017 23:07:06 +0800 Subject: [PATCH 0126/2417] [niconico] Support HTML5-only videos (closes #13806) --- ChangeLog | 6 ++ youtube_dl/extractor/niconico.py | 124 +++++++++++++++++++++---------- 2 files changed, 90 insertions(+), 40 deletions(-) diff --git a/ChangeLog b/ChangeLog index 18893bba8..7cd385e68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [niconico] Support HTML5-only videos (#13806) + + version 2017.08.06 Core diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 79b9952c3..b13dc0035 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -11,10 +11,13 @@ from ..compat import ( ) from ..utils import ( determine_ext, + dict_get, ExtractorError, int_or_none, parse_duration, parse_iso8601, + try_get, + unified_timestamp, urlencode_postdata, xpath_text, ) @@ -31,12 +34,15 @@ class NiconicoIE(InfoExtractor): 'id': 'sm22312215', 'ext': 'mp4', 'title': 'Big Buck Bunny', + 'thumbnail': r're:https?://.*', 'uploader': 'takuya0301', 'uploader_id': '2698420', 'upload_date': '20131123', 'timestamp': 1385182762, 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, + 'view_count': int, + 'comment_count': int, }, 'skip': 'Requires an account', }, { @@ -48,6 +54,7 @@ class NiconicoIE(InfoExtractor): 'ext': 'swf', 'title': '【鏡音リン】Dance on media【オリジナル】take2!', 'description': 'md5:689f066d74610b3b22e0f1739add0f58', + 'thumbnail': r're:https?://.*', 'uploader': 'りょうた', 'uploader_id': '18822557', 'upload_date': '20110429', @@ -64,9 +71,11 @@ class NiconicoIE(InfoExtractor): 'ext': 'unknown_video', 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', + 'thumbnail': r're:https?://.*', 'upload_date': '20071224', 'timestamp': int, # timestamp field has different value if logged in 'duration': 304, + 'view_count': int, }, 'skip': 'Requires an account', }, { @@ -76,12 +85,31 @@ class NiconicoIE(InfoExtractor): 'ext': 'mp4', 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', + 'thumbnail': r're:https?://.*', 'timestamp': 1388851200, 'upload_date': '20140104', 'uploader': 'アニメロチャンネル', 'uploader_id': '312', }, 'skip': 'The viewing period of the video you were searching for has expired.', + }, { + # video not available via `getflv` + 'url': 'http://www.nicovideo.jp/watch/sm1151009', + 'info_dict': { + 'id': 'sm1151009', + 'ext': 'flv', + 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)', + 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7', + 'thumbnail': r're:https?://.*', + 'duration': 184, + 'timestamp': 1190868283, + 'upload_date': '20070927', + 'uploader': 'denden2', + 'uploader_id': '1392194', + 'view_count': int, + 'comment_count': int, + }, + 'skip': 'Requires an account', }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, @@ -130,33 +158,51 @@ class NiconicoIE(InfoExtractor): if video_id.startswith('so'): video_id = self._match_id(handle.geturl()) - video_info = self._download_xml( - 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, - note='Downloading video info page') + api_data = self._parse_json(self._html_search_regex( + 'data-api-data="([^"]+)"', webpage, + 'API data', default='{}'), video_id) + video_real_url = try_get( + api_data, lambda x: x['video']['smileInfo']['url']) - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', - video_id, 'Downloading flv info') + if video_real_url: + def get_video_info(items): + return dict_get(api_data['video'], items) + else: + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', + video_id, 'Downloading flv info') - flv_info = compat_urlparse.parse_qs(flv_info_webpage) - if 'url' not in flv_info: - if 'deleted' in flv_info: - raise ExtractorError('The video has been deleted.', - expected=True) - elif 'closed' in flv_info: - raise ExtractorError('Niconico videos now require logging in', - expected=True) - elif 'error' in flv_info: - raise ExtractorError('%s reports error: %s' % ( - self.IE_NAME, flv_info['error'][0]), expected=True) - else: - raise ExtractorError('Unable to find video URL') + flv_info = compat_urlparse.parse_qs(flv_info_webpage) + if 'url' not in flv_info: + if 'deleted' in flv_info: + raise ExtractorError('The video has been deleted.', + expected=True) + elif 'closed' in flv_info: + raise ExtractorError('Niconico videos now require logging in', + expected=True) + elif 'error' in flv_info: + raise ExtractorError('%s reports error: %s' % ( + self.IE_NAME, flv_info['error'][0]), expected=True) + else: + raise ExtractorError('Unable to find video URL') - video_real_url = flv_info['url'][0] + video_real_url = flv_info['url'][0] + + video_info_xml = self._download_xml( + 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, + video_id, note='Downloading video info page') + + def get_video_info(items): + if not isinstance(items, list): + items = [items] + for item in items: + ret = xpath_text(video_info_xml, './/' + item) + if ret: + return ret # Start extracting information - title = xpath_text(video_info, './/title') + title = get_video_info('title') if not title: title = self._og_search_title(webpage, default=None) if not title: @@ -170,18 +216,19 @@ class NiconicoIE(InfoExtractor): watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} video_detail = watch_api_data.get('videoDetail', {}) - extension = xpath_text(video_info, './/movie_type') + extension = get_video_info(['movie_type', 'movieType']) if not extension: extension = determine_ext(video_real_url) thumbnail = ( - xpath_text(video_info, './/thumbnail_url') or + get_video_info(['thumbnail_url', 'thumbnailURL']) or self._html_search_meta('image', webpage, 'thumbnail', default=None) or video_detail.get('thumbnail')) - description = xpath_text(video_info, './/description') + description = get_video_info('description') - timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) + timestamp = (parse_iso8601(get_video_info('first_retrieve')) or + unified_timestamp(get_video_info('postedDateTime'))) if not timestamp: match = self._html_search_meta('datePublished', webpage, 'date published', default=None) if match: @@ -191,7 +238,7 @@ class NiconicoIE(InfoExtractor): video_detail['postedAt'].replace('/', '-'), delimiter=' ', timezone=datetime.timedelta(hours=9)) - view_count = int_or_none(xpath_text(video_info, './/view_counter')) + view_count = int_or_none(get_video_info(['view_counter', 'viewCount'])) if not view_count: match = self._html_search_regex( r'>Views: <strong[^>]*>([^<]+)</strong>', @@ -200,31 +247,28 @@ class NiconicoIE(InfoExtractor): view_count = int_or_none(match.replace(',', '')) view_count = view_count or video_detail.get('viewCount') - comment_count = int_or_none(xpath_text(video_info, './/comment_num')) + comment_count = (int_or_none(get_video_info('comment_num')) or + video_detail.get('commentCount') or + try_get(api_data, lambda x: x['thread']['commentCount'])) if not comment_count: match = self._html_search_regex( r'>Comments: <strong[^>]*>([^<]+)</strong>', webpage, 'comment count', default=None) if match: comment_count = int_or_none(match.replace(',', '')) - comment_count = comment_count or video_detail.get('commentCount') duration = (parse_duration( - xpath_text(video_info, './/length') or + get_video_info('length') or self._html_search_meta( 'video:duration', webpage, 'video duration', default=None)) or - video_detail.get('length')) + video_detail.get('length') or + get_video_info('duration')) - webpage_url = xpath_text(video_info, './/watch_url') or url + webpage_url = get_video_info('watch_url') or url - if video_info.find('.//ch_id') is not None: - uploader_id = video_info.find('.//ch_id').text - uploader = video_info.find('.//ch_name').text - elif video_info.find('.//user_id') is not None: - uploader_id = video_info.find('.//user_id').text - uploader = video_info.find('.//user_nickname').text - else: - uploader_id = uploader = None + owner = api_data.get('owner', {}) + uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id') + uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname') return { 'id': video_id, From ee6a611665a1ee8583ce84bd9d36d03b6f697895 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 7 Aug 2017 00:19:46 +0800 Subject: [PATCH 0127/2417] [niconico] Support videos with multiple formats (closes #13522) --- ChangeLog | 1 + youtube_dl/extractor/niconico.py | 161 +++++++++++++++++++++++++++---- 2 files changed, 145 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7cd385e68..084e98c0e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [niconico] Support videos with multiple formats (#13522) + [niconico] Support HTML5-only videos (#13806) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index b13dc0035..026329d3e 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -14,8 +14,10 @@ from ..utils import ( dict_get, ExtractorError, int_or_none, + float_or_none, parse_duration, parse_iso8601, + remove_start, try_get, unified_timestamp, urlencode_postdata, @@ -93,11 +95,12 @@ class NiconicoIE(InfoExtractor): }, 'skip': 'The viewing period of the video you were searching for has expired.', }, { - # video not available via `getflv` + # video not available via `getflv`; "old" HTML5 video 'url': 'http://www.nicovideo.jp/watch/sm1151009', + 'md5': '8fa81c364eb619d4085354eab075598a', 'info_dict': { 'id': 'sm1151009', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)', 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7', 'thumbnail': r're:https?://.*', @@ -110,6 +113,25 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, }, 'skip': 'Requires an account', + }, { + # "New" HTML5 video + 'url': 'http://www.nicovideo.jp/watch/sm31464864', + 'md5': '351647b4917660986dc0fa8864085135', + 'info_dict': { + 'id': 'sm31464864', + 'ext': 'mp4', + 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質', + 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb', + 'timestamp': 1498514060, + 'upload_date': '20170626', + 'uploader': 'ゲス', + 'uploader_id': '40826363', + 'thumbnail': r're:https?://.*', + 'duration': 198, + 'view_count': int, + 'comment_count': int, + }, + 'skip': 'Requires an account', }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, @@ -147,6 +169,84 @@ class NiconicoIE(InfoExtractor): self._downloader.report_warning('unable to log in: bad username or password') return login_ok + def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality): + def yesno(boolean): + return 'yes' if boolean else 'no' + + session_api_data = api_data['video']['dmcInfo']['session_api'] + session_api_endpoint = session_api_data['urls'][0] + + format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality])) + + session_response = self._download_json( + session_api_endpoint['url'], video_id, + query={'_format': 'json'}, + headers={'Content-Type': 'application/json'}, + note='Downloading JSON metadata for %s' % format_id, + data=json.dumps({ + 'session': { + 'client_info': { + 'player_id': session_api_data['player_id'], + }, + 'content_auth': { + 'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]], + 'content_key_timeout': session_api_data['content_key_timeout'], + 'service_id': 'nicovideo', + 'service_user_id': session_api_data['service_user_id'] + }, + 'content_id': session_api_data['content_id'], + 'content_src_id_sets': [{ + 'content_src_ids': [{ + 'src_id_to_mux': { + 'audio_src_ids': [audio_quality['id']], + 'video_src_ids': [video_quality['id']], + } + }] + }], + 'content_type': 'movie', + 'content_uri': '', + 'keep_method': { + 'heartbeat': { + 'lifetime': session_api_data['heartbeat_lifetime'] + } + }, + 'priority': session_api_data['priority'], + 'protocol': { + 'name': 'http', + 'parameters': { + 'http_parameters': { + 'parameters': { + 'http_output_download_parameters': { + 'use_ssl': yesno(session_api_endpoint['is_ssl']), + 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), + } + } + } + } + }, + 'recipe_id': session_api_data['recipe_id'], + 'session_operation_auth': { + 'session_operation_auth_by_signature': { + 'signature': session_api_data['signature'], + 'token': session_api_data['token'], + } + }, + 'timing_constraint': 'unlimited' + } + })) + + resolution = video_quality.get('resolution', {}) + + return { + 'url': session_response['data']['session']['content_uri'], + 'format_id': format_id, + 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 + 'abr': float_or_none(audio_quality.get('bitrate'), 1000), + 'vbr': float_or_none(video_quality.get('bitrate'), 1000), + 'height': resolution.get('height'), + 'width': resolution.get('width'), + } + def _real_extract(self, url): video_id = self._match_id(url) @@ -161,13 +261,13 @@ class NiconicoIE(InfoExtractor): api_data = self._parse_json(self._html_search_regex( 'data-api-data="([^"]+)"', webpage, 'API data', default='{}'), video_id) - video_real_url = try_get( - api_data, lambda x: x['video']['smileInfo']['url']) - if video_real_url: - def get_video_info(items): - return dict_get(api_data['video'], items) - else: + def _format_id_from_url(video_url): + return 'economy' if video_real_url.endswith('low') else 'normal' + + try: + video_real_url = api_data['video']['smileInfo']['url'] + except KeyError: # Flash videos # Get flv info flv_info_webpage = self._download_webpage( 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', @@ -187,8 +287,6 @@ class NiconicoIE(InfoExtractor): else: raise ExtractorError('Unable to find video URL') - video_real_url = flv_info['url'][0] - video_info_xml = self._download_xml( 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') @@ -201,6 +299,41 @@ class NiconicoIE(InfoExtractor): if ret: return ret + video_real_url = flv_info['url'][0] + + extension = get_video_info('movie_type') + if not extension: + extension = determine_ext(video_real_url) + + formats = [{ + 'url': video_real_url, + 'ext': extension, + 'format_id': _format_id_from_url(video_real_url), + }] + else: + formats = [] + + dmc_info = api_data['video'].get('dmcInfo') + if dmc_info: # "New" HTML5 videos + quality_info = dmc_info['quality'] + for audio_quality in quality_info['audios']: + for video_quality in quality_info['videos']: + if not audio_quality['available'] or not video_quality['available']: + continue + formats.append(self._extract_format_for_quality( + api_data, video_id, audio_quality, video_quality)) + + self._sort_formats(formats) + else: # "Old" HTML5 videos + formats = [{ + 'url': video_real_url, + 'ext': 'mp4', + 'format_id': _format_id_from_url(video_real_url), + }] + + def get_video_info(items): + return dict_get(api_data['video'], items) + # Start extracting information title = get_video_info('title') if not title: @@ -216,10 +349,6 @@ class NiconicoIE(InfoExtractor): watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} video_detail = watch_api_data.get('videoDetail', {}) - extension = get_video_info(['movie_type', 'movieType']) - if not extension: - extension = determine_ext(video_real_url) - thumbnail = ( get_video_info(['thumbnail_url', 'thumbnailURL']) or self._html_search_meta('image', webpage, 'thumbnail', default=None) or @@ -272,10 +401,8 @@ class NiconicoIE(InfoExtractor): return { 'id': video_id, - 'url': video_real_url, 'title': title, - 'ext': extension, - 'format_id': 'economy' if video_real_url.endswith('low') else 'normal', + 'formats': formats, 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, From 15d1e8a23dbaa28635cae30ff6c5cfb095b4c7c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 7 Aug 2017 22:43:42 +0700 Subject: [PATCH 0128/2417] [dplayit] Fix extraction (closes #13851) --- youtube_dl/extractor/dplay.py | 66 ++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 1a41760f8..76e784105 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -7,16 +7,18 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_HTTPError, + compat_str, + compat_urlparse, ) from ..utils import ( - USER_AGENTS, ExtractorError, int_or_none, - unified_strdate, remove_end, + try_get, + unified_strdate, update_url_query, + USER_AGENTS, ) @@ -183,28 +185,44 @@ class DPlayItIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - info_url = self._search_regex( - r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', - webpage, 'video id') - title = remove_end(self._og_search_title(webpage), ' | Dplay') - try: - info = self._download_json( - info_url, display_id, headers={ - 'Authorization': 'Bearer %s' % self._get_cookies(url).get( - 'dplayit_token').value, - 'Referer': url, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): - info = self._parse_json(e.cause.read().decode('utf-8'), display_id) - error = info['errors'][0] - if error.get('code') == 'access.denied.geoblocked': - self.raise_geo_restricted( - msg=error.get('detail'), countries=self._GEO_COUNTRIES) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - raise + video_id = None + + info = self._search_regex( + r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', + webpage, 'playback JSON', default=None) + if info: + for _ in range(2): + info = self._parse_json(info, display_id, fatal=False) + if not info: + break + else: + video_id = try_get(info, lambda x: x['data']['id']) + + if not info: + info_url = self._search_regex( + r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', + webpage, 'info url') + + video_id = info_url.rpartition('/')[-1] + + try: + info = self._download_json( + info_url, display_id, headers={ + 'Authorization': 'Bearer %s' % self._get_cookies(url).get( + 'dplayit_token').value, + 'Referer': url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): + info = self._parse_json(e.cause.read().decode('utf-8'), display_id) + error = info['errors'][0] + if error.get('code') == 'access.denied.geoblocked': + self.raise_geo_restricted( + msg=error.get('detail'), countries=self._GEO_COUNTRIES) + raise ExtractorError(info['errors'][0]['detail'], expected=True) + raise hls_url = info['data']['attributes']['streaming']['hls']['url'] @@ -230,7 +248,7 @@ class DPlayItIE(InfoExtractor): season_number = episode_number = upload_date = None return { - 'id': info_url.rpartition('/')[-1], + 'id': compat_str(video_id or display_id), 'display_id': display_id, 'title': title, 'description': self._og_search_description(webpage), From 4bf22f7a1014c55e3358b5a419945071b152eafc Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Tue, 8 Aug 2017 00:41:38 +0200 Subject: [PATCH 0129/2417] [20min] Fix embeds extraction --- youtube_dl/extractor/twentymin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 4fd1aa4bf..a42977f39 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -50,7 +50,7 @@ class TwentyMinutenIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return [m.group('url') for m in re.finditer( - r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', webpage)] def _real_extract(self, url): From 5b232f46dcbdc805507c02edd4fd598f31d544d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 22:28:19 +0700 Subject: [PATCH 0130/2417] [utils] Skip missing params in cli_bool_option (closes #13865) --- test/test_utils.py | 4 ++++ youtube_dl/utils.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 7803e5bc7..2aab16b97 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1182,6 +1182,10 @@ part 3</font></u> cli_bool_option( {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), ['--check-certificate=true']) + self.assertEqual( + cli_bool_option( + {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + []) def test_ohdave_rsa_encrypt(self): N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fdf5e29e7..c9cbd5842 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2733,6 +2733,8 @@ def cli_option(params, command_option, param): def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): param = params.get(param) + if param is None: + return [] assert isinstance(param, bool) if separator: return [command_option + separator + (true_value if param else false_value)] From 5b3ddadcc3012c4ef390a7cf70dbcb8b83f07428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 22:55:13 +0700 Subject: [PATCH 0131/2417] [mixcloud] Fix play info decryption (closes #13867) --- youtube_dl/extractor/mixcloud.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 0efbe660a..40cd2e389 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -54,15 +54,22 @@ class MixcloudIE(InfoExtractor): }] # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js - @staticmethod - def _decrypt_play_info(play_info): - KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' - + def _decrypt_play_info(self, play_info, video_id): + KEYS = ( + 'pleasedontdownloadourmusictheartistswontgetpaid', + '(function() { return new Date().toLocaleDateString(); })()' + ) play_info = base64.b64decode(play_info.encode('ascii')) - - return ''.join([ - compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) - for idx, ch in enumerate(play_info)]) + for num, key in enumerate(KEYS, start=1): + try: + return self._parse_json( + ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)])) + for idx, ch in enumerate(play_info)]), + video_id) + except ExtractorError: + if num == len(KEYS): + raise def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -78,8 +85,8 @@ class MixcloudIE(InfoExtractor): encrypted_play_info = self._search_regex( r'm-play-info="([^"]+)"', webpage, 'play info') - play_info = self._parse_json( - self._decrypt_play_info(encrypted_play_info), track_id) + + play_info = self._decrypt_play_info(encrypted_play_info, track_id) if message and 'stream_url' not in play_info: raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) From dee04d24a422c0ea5586d2f1d1f97f1e3e4ecf70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 23:12:02 +0700 Subject: [PATCH 0132/2417] [nick] Add support for nick.com.pl (closes #13860) --- youtube_dl/extractor/nick.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index b688637bc..510b1c41f 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, @@ -88,6 +88,9 @@ class NickDeIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht', 'only_matching': True, + }, { + 'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom', + 'only_matching': True, }] def _extract_mrss_url(self, webpage, host): From baba5f4d1daa29c42b2ad56c06e3880f10b7b03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 9 Aug 2017 23:46:49 +0700 Subject: [PATCH 0133/2417] [xxxymovies] Fix title extraction (closes #13868) --- youtube_dl/extractor/xxxymovies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xxxymovies.py b/youtube_dl/extractor/xxxymovies.py index 5c8f17eb2..e34ebe3a6 100644 --- a/youtube_dl/extractor/xxxymovies.py +++ b/youtube_dl/extractor/xxxymovies.py @@ -39,8 +39,8 @@ class XXXYMoviesIE(InfoExtractor): r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') title = self._html_search_regex( - [r'<div class="block_header">\s*<h1>([^<]+)</h1>', - r'<title>(.*?)\s*-\s*XXXYMovies\.com'], + [r']+\bclass="block_header"[^>]*>\s*

    ([^<]+)<', + r'(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)'], webpage, 'title') thumbnail = self._search_regex( From 0e7dfa7d16e9e013bfaa085a59b9bbe4b4d1dfb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Aug 2017 23:49:53 +0700 Subject: [PATCH 0134/2417] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 084e98c0e..7c8eb92aa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,14 @@ version +Core +* [utils] Skip missing params in cli_bool_option (#13865) + Extractors +* [xxxymovies] Fix title extraction (#13868) ++ [nick] Add support for nick.com.pl (#13860) +* [mixcloud] Fix play info decryption (#13867) +* [20min] Fix embeds extraction (#13852) +* [dplayit] Fix extraction (#13851) + [niconico] Support videos with multiple formats (#13522) + [niconico] Support HTML5-only videos (#13806) From 6ed99754bb6074454b5d4875cc7b8b442e763ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Aug 2017 23:52:22 +0700 Subject: [PATCH 0135/2417] release 2017.08.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5b72032bc..7ee704e48 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.06 +[debug] youtube-dl version 2017.08.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7c8eb92aa..b28ade446 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.09 Core * [utils] Skip missing params in cli_bool_option (#13865) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 11d3bf29f..022172375 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.06' +__version__ = '2017.08.09' From 41918eaa5ce1225f7e0a94882e7c77919342210d Mon Sep 17 00:00:00 2001 From: tetra-eder <30865771+tetra-eder@users.noreply.github.com> Date: Fri, 11 Aug 2017 17:00:39 +0200 Subject: [PATCH 0136/2417] [generic] Add support for vzaar embeds --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ youtube_dl/extractor/vzaar.py | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 34e814988..51acead66 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -98,6 +98,7 @@ from .wistia import WistiaIE from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE +from .vzaar import VzaarIE class GenericIE(InfoExtractor): @@ -1840,6 +1841,16 @@ class GenericIE(InfoExtractor): 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', }, }, + { + # vzaar embed + 'url': 'http://www.xruniversity.com/bdsm-lets-begin-melissa-moore/', + 'md5': 'cddc9fb8a8644a0a7742149eee95080b', + 'info_dict': { + 'id': '11002506', + 'ext': 'mp4', + 'title': 'XR-U SHOW: Ready Player Fuck - EP. 61', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2781,6 +2792,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) + # Look for vzaar embeds + vzaar_urls = VzaarIE._extract_urls(webpage) + if vzaar_urls: + return self.playlist_from_matches( + vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + # Look for Rutube embeds rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index b270f08d1..02fcd52c7 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor): }, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( From 1663bd6e1c11bf6cbf290fcbbf12358207570faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Aug 2017 22:02:00 +0700 Subject: [PATCH 0137/2417] [generic] Replace vzaar embed test --- youtube_dl/extractor/generic.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51acead66..8362d9a36 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1843,12 +1843,12 @@ class GenericIE(InfoExtractor): }, { # vzaar embed - 'url': 'http://www.xruniversity.com/bdsm-lets-begin-melissa-moore/', - 'md5': 'cddc9fb8a8644a0a7742149eee95080b', + 'url': 'http://help.vzaar.com/article/165-embedding-video', + 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4', 'info_dict': { - 'id': '11002506', + 'id': '8707641', 'ext': 'mp4', - 'title': 'XR-U SHOW: Ready Player Fuck - EP. 61', + 'title': 'Building A Business Online: Principal Chairs Q & A', }, }, # { @@ -2792,12 +2792,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) - # Look for vzaar embeds - vzaar_urls = VzaarIE._extract_urls(webpage) - if vzaar_urls: - return self.playlist_from_matches( - vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) - # Look for Rutube embeds rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: @@ -2828,6 +2822,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + # Look for vzaar embeds + vzaar_urls = VzaarIE._extract_urls(webpage) + if vzaar_urls: + return self.playlist_from_matches( + vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): From 92a5c415328953851d0a6b7893de5387a1b7b469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 16:30:50 +0700 Subject: [PATCH 0138/2417] [mixcloud] Fix play info decryption (closes #13885) --- youtube_dl/extractor/mixcloud.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 40cd2e389..52f7428e0 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -57,7 +57,8 @@ class MixcloudIE(InfoExtractor): def _decrypt_play_info(self, play_info, video_id): KEYS = ( 'pleasedontdownloadourmusictheartistswontgetpaid', - '(function() { return new Date().toLocaleDateString(); })()' + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()', ) play_info = base64.b64decode(play_info.encode('ascii')) for num, key in enumerate(KEYS, start=1): From 82889d4ae517640df217f99e7744002e0deba47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 16:48:11 +0700 Subject: [PATCH 0139/2417] [extractor/common] Respect source's type attribute for HTML5 media (closes #13892) --- youtube_dl/extractor/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 459e7ffd6..4d61275fd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2114,9 +2114,9 @@ class InfoExtractor(object): return f return {} - def _media_formats(src, cur_media_type): + def _media_formats(src, type_info, cur_media_type): full_url = absolute_url(src) - ext = determine_ext(full_url) + ext = type_info.get('ext') or determine_ext(full_url) if ext == 'm3u8': is_plain_url = False formats = self._extract_m3u8_formats( @@ -2165,9 +2165,9 @@ class InfoExtractor(object): src = source_attributes.get('src') if not src: continue - is_plain_url, formats = _media_formats(src, media_type) + f = parse_content_type(source_attributes.get('type')) + is_plain_url, formats = _media_formats(src, f, media_type) if is_plain_url: - f = parse_content_type(source_attributes.get('type')) f.update(formats[0]) media_info['formats'].append(f) else: From ac8491fcca6f9c0f6c7904e1cf13953f912eeb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 17:11:35 +0700 Subject: [PATCH 0140/2417] [extractor/common] Make _family_friendly_search optional --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4d61275fd..e565901af 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -940,7 +940,8 @@ class InfoExtractor(object): def _family_friendly_search(self, html): # See http://schema.org/VideoObject - family_friendly = self._html_search_meta('isFamilyFriendly', html) + family_friendly = self._html_search_meta( + 'isFamilyFriendly', html, default=None) if not family_friendly: return None From e74e3b63e3cdb31a61af1fc21c703e912c029b96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 17:14:11 +0700 Subject: [PATCH 0141/2417] [YoutubeDL] Make sure format id is not empty --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 367ae3533..df7378f83 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1500,7 +1500,7 @@ class YoutubeDL(object): sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) - if format.get('format_id') is None: + if not format.get('format_id'): format['format_id'] = compat_str(i) else: # Sanitize format_id from characters used in format selector expression From 70851a95c307880f016fcb6f37427a8eeae73cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 17:18:23 +0700 Subject: [PATCH 0142/2417] [aparat] Extract all formats (closes #13887) --- youtube_dl/extractor/aparat.py | 49 +++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 025e29aa4..e394cb661 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -3,13 +3,13 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - ExtractorError, - HEADRequest, + int_or_none, + mimetype2ext, ) class AparatIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' _TEST = { 'url': 'http://www.aparat.com/v/wP8On', @@ -29,30 +29,41 @@ class AparatIE(InfoExtractor): # Note: There is an easier-to-parse configuration at # http://www.aparat.com/video/video/config/videohash/%video_id # but the URL in there does not work - embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id - webpage = self._download_webpage(embed_url, video_id) - - file_list = self._parse_json(self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) - for i, item in enumerate(file_list[0]): - video_url = item['file'] - req = HEADRequest(video_url) - res = self._request_webpage( - req, video_id, note='Testing video URL %d' % i, errnote=False) - if res: - break - else: - raise ExtractorError('No working video URLs found') + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') + + file_list = self._parse_json( + self._search_regex( + r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, + 'file list'), + video_id) + + formats = [] + for item in file_list[0]: + file_url = item.get('file') + if not file_url: + continue + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': label or ext, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', default=None)), + }) + self._sort_formats(formats) + thumbnail = self._search_regex( r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) return { 'id': video_id, 'title': title, - 'url': video_url, - 'ext': 'mp4', 'thumbnail': thumbnail, 'age_limit': self._family_friendly_search(webpage), + 'formats': formats, } From 868f79db41a4d81a87ef12c8bd5ef73205c9c029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 19:24:26 +0700 Subject: [PATCH 0143/2417] [extractor/common] Fix _media_formats --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e565901af..7fe888462 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2115,7 +2115,7 @@ class InfoExtractor(object): return f return {} - def _media_formats(src, type_info, cur_media_type): + def _media_formats(src, cur_media_type, type_info={}): full_url = absolute_url(src) ext = type_info.get('ext') or determine_ext(full_url) if ext == 'm3u8': @@ -2167,7 +2167,7 @@ class InfoExtractor(object): if not src: continue f = parse_content_type(source_attributes.get('type')) - is_plain_url, formats = _media_formats(src, f, media_type) + is_plain_url, formats = _media_formats(src, media_type, f) if is_plain_url: f.update(formats[0]) media_info['formats'].append(f) From 0c43a481b91c657643eb42f72d293f245a410c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Aug 2017 23:24:46 +0700 Subject: [PATCH 0144/2417] [reddit] Add extractors (closes #13847) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/reddit.py | 114 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 youtube_dl/extractor/reddit.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 897557f93..f1a9f6edf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -840,6 +840,10 @@ from .rai import ( from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redbulltv import RedBullTVIE +from .reddit import ( + RedditIE, + RedditRIE, +) from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py new file mode 100644 index 000000000..01c85ee01 --- /dev/null +++ b/youtube_dl/extractor/reddit.py @@ -0,0 +1,114 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, +) + + +class RedditIE(InfoExtractor): + _VALID_URL = r'https?://v\.redd\.it/(?P[^/?#&]+)' + _TEST = { + # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '655d06ace653ea3b87bccfb1b27ec99d', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'zv89llsvexdz', + }, + 'params': { + 'format': 'bestvideo', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = self._extract_m3u8_formats( + 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, + 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + + formats.extend(self._extract_mpd_formats( + 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, + mpd_id='dash', fatal=False)) + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + +class RedditRIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P[^/]+)' + _TESTS = [{ + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'That small heart attack.', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1501941939, + 'upload_date': '20170805', + 'uploader': 'Antw87', + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', + 'only_matching': True, + }, { + # imgur + 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # streamable + 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', + 'only_matching': True, + }, { + # youtube + 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + data = self._download_json( + url + '.json', video_id)[0]['data']['children'][0]['data'] + + video_url = data['url'] + + # Avoid recursing into the same reddit URL + if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: + raise ExtractorError('No media found', expected=True) + + over_18 = data.get('over_18') + if over_18 is True: + age_limit = 18 + elif over_18 is False: + age_limit = 0 + else: + age_limit = None + + return { + '_type': 'url_transparent', + 'url': video_url, + 'title': data.get('title'), + 'thumbnail': data.get('thumbnail'), + 'timestamp': float_or_none(data.get('created_utc')), + 'uploader': data.get('author'), + 'like_count': int_or_none(data.get('ups')), + 'dislike_count': int_or_none(data.get('downs')), + 'comment_count': int_or_none(data.get('num_comments')), + 'age_limit': age_limit, + } From 4ef9152428c4a000cb5fc76732fc579f1f4c1d69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 00:58:39 +0700 Subject: [PATCH 0145/2417] [limelight] Improve embeds detection (closes #13895) --- youtube_dl/extractor/limelight.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 0a5a3956c..ad65b2759 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -26,14 +26,16 @@ class LimelightBaseIE(InfoExtractor): 'Channel': 'channel', 'ChannelList': 'channel_list', } + + def smuggle(url): + return smuggle_url(url, {'source_url': source_url}) + entries = [] for kind, video_id in re.findall( r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage): entries.append(cls.url_result( - smuggle_url( - 'limelight:%s:%s' % (lm[kind], video_id), - {'source_url': source_url}), + smuggle('limelight:%s:%s' % (lm[kind], video_id)), 'Limelight%s' % kind, video_id)) for mobj in re.finditer( # As per [1] class attribute should be exactly equal to @@ -49,10 +51,15 @@ class LimelightBaseIE(InfoExtractor): ''', webpage): kind, video_id = mobj.group('kind'), mobj.group('id') entries.append(cls.url_result( - smuggle_url( - 'limelight:%s:%s' % (kind, video_id), - {'source_url': source_url}), + smuggle('limelight:%s:%s' % (kind, video_id)), 'Limelight%s' % kind.capitalize(), video_id)) + # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page) + for video_id in re.findall( + r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P[a-z0-9]{32})', + webpage): + entries.append(cls.url_result( + smuggle('limelight:media:%s' % video_id), + LimelightMediaIE.ie_key(), video_id)) return entries def _call_playlist_service(self, item_id, method, fatal=True, referer=None): From eb02940cc7dc2233f2d7873c12165245a3c3c14e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 01:11:27 +0700 Subject: [PATCH 0146/2417] [generic] Add test for #13895 --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8362d9a36..eff5fbfe8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1785,6 +1785,21 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 5, }, + { + # Limelight embed (LimelightPlayerUtil.embed) + 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri', + 'info_dict': { + 'id': '95d035dc5c8a401588e9c0e6bd1e9c92', + 'ext': 'mp4', + 'title': '07448641', + 'timestamp': 1499890639, + 'upload_date': '20170712', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['LimelightMedia'], + }, { 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', 'info_dict': { From b3c6515365ed415bbf813c0c2e6c12585824b77a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 07:23:29 +0700 Subject: [PATCH 0147/2417] [fourtube] Add support for other sites (closes #6022, closes #7859, closes #13901) --- youtube_dl/extractor/extractors.py | 7 +- youtube_dl/extractor/fourtube.py | 176 +++++++++++++++++++++++------ 2 files changed, 147 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f1a9f6edf..fb79a1736 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -350,7 +350,12 @@ from .flipagram import FlipagramIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE -from .fourtube import FourTubeIE +from .fourtube import ( + FourTubeIE, + PornTubeIE, + PornerBrosIE, + FuxIE, +) from .fox import FOXIE from .fox9 import FOX9IE from .foxgay import FoxgayIE diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index e3fd08bcf..ad273a0e7 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,39 +3,22 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( parse_duration, parse_iso8601, - sanitized_Request, str_to_int, ) -class FourTubeIE(InfoExtractor): - IE_NAME = '4tube' - _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P\d+)' - - _TEST = { - 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', - 'md5': '6516c8ac63b03de06bc8eac14362db4f', - 'info_dict': { - 'id': '209733', - 'ext': 'mp4', - 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', - 'uploader': 'WCP Club', - 'uploader_id': 'wcp-club', - 'upload_date': '20131031', - 'timestamp': 1383263892, - 'duration': 583, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 18, - } - } - +class FourTubeBaseIE(InfoExtractor): def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') + + if kind == 'm' or not display_id: + url = self._URL_TEMPLATE % video_id + webpage = self._download_webpage(url, video_id) title = self._html_search_meta('name', webpage) @@ -43,10 +26,10 @@ class FourTubeIE(InfoExtractor): 'uploadDate', webpage)) thumbnail = self._html_search_meta('thumbnailUrl', webpage) uploader_id = self._html_search_regex( - r'', + r'', webpage, 'uploader id', fatal=False) uploader = self._html_search_regex( - r'', + r'', webpage, 'uploader', fatal=False) categories_html = self._search_regex( @@ -60,10 +43,10 @@ class FourTubeIE(InfoExtractor): view_count = str_to_int(self._search_regex( r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', - webpage, 'view count', fatal=False)) + webpage, 'view count', default=None)) like_count = str_to_int(self._search_regex( r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', - webpage, 'like count', fatal=False)) + webpage, 'like count', default=None)) duration = parse_duration(self._html_search_meta('duration', webpage)) media_id = self._search_regex( @@ -87,12 +70,12 @@ class FourTubeIE(InfoExtractor): token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format( media_id, '+'.join(sources)) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - b'Origin': b'https://www.4tube.com', - } - token_req = sanitized_Request(token_url, b'{}', headers) - tokens = self._download_json(token_req, video_id) + + parsed_url = compat_urlparse.urlparse(url) + tokens = self._download_json(token_url, video_id, data=b'', headers={ + 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Referer': url, + }) formats = [{ 'url': tokens[format]['token'], 'format_id': format + 'p', @@ -115,3 +98,126 @@ class FourTubeIE(InfoExtractor): 'duration': duration, 'age_limit': 18, } + + +class FourTubeIE(FourTubeBaseIE): + IE_NAME = '4tube' + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?4tube\.com/(?:videos|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' + _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' + _TESTS = [{ + 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', + 'md5': '6516c8ac63b03de06bc8eac14362db4f', + 'info_dict': { + 'id': '209733', + 'ext': 'mp4', + 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', + 'uploader': 'WCP Club', + 'uploader_id': 'wcp-club', + 'upload_date': '20131031', + 'timestamp': 1383263892, + 'duration': 583, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + }, + }, { + 'url': 'http://www.4tube.com/embed/209733', + 'only_matching': True, + }, { + 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', + 'only_matching': True, + }] + + +class FuxIE(FourTubeBaseIE): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?fux\.com/(?:video|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' + _URL_TEMPLATE = 'https://www.fux.com/video/%s/video' + _TESTS = [{ + 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', + 'info_dict': { + 'id': '195359', + 'ext': 'mp4', + 'title': 'Awesome fucking in the kitchen ends with cum swallow', + 'uploader': 'alenci2342', + 'uploader_id': 'alenci2342', + 'upload_date': '20131230', + 'timestamp': 1388361660, + 'duration': 289, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.fux.com/embed/195359', + 'only_matching': True, + }, { + 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', + 'only_matching': True, + }] + + +class PornTubeIE(FourTubeBaseIE): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?porntube\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' + _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' + _TESTS = [{ + 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', + 'info_dict': { + 'id': '7089759', + 'ext': 'mp4', + 'title': 'Teen couple doing anal', + 'uploader': 'Alexy', + 'uploader_id': 'Alexy', + 'upload_date': '20150606', + 'timestamp': 1433595647, + 'duration': 5052, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.porntube.com/embed/7089759', + 'only_matching': True, + }, { + 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759', + 'only_matching': True, + }] + + +class PornerBrosIE(FourTubeBaseIE): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' + _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' + _TESTS = [{ + 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', + 'md5': '6516c8ac63b03de06bc8eac14362db4f', + 'info_dict': { + 'id': '181369', + 'ext': 'mp4', + 'title': 'Skinny brunette takes big cock down her anal hole', + 'uploader': 'PornerBros HD', + 'uploader_id': 'pornerbros-hd', + 'upload_date': '20130130', + 'timestamp': 1359527401, + 'duration': 1224, + 'view_count': int, + 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.pornerbros.com/embed/181369', + 'only_matching': True, + }, { + 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', + 'only_matching': True, + }] From 475bcb225f6046e38b47594c504da6ec15bac113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 07:53:02 +0700 Subject: [PATCH 0148/2417] [pornhub:playlistbase] Skip videos from drop-down menu for all playlists (closes #12819, closes #13902) --- youtube_dl/extractor/pornhub.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index e032817f2..f6777cd26 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -227,20 +227,6 @@ class PornHubIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor): def _extract_entries(self, webpage): - return [ - self.url_result( - 'http://www.pornhub.com/%s' % video_url, - PornHubIE.ie_key(), video_title=title) - for video_url, title in orderedSet(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', - webpage)) - ] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see # https://github.com/rg3/youtube-dl/issues/11594). @@ -248,7 +234,21 @@ class PornHubPlaylistBaseIE(InfoExtractor): r'(?s)(]+class=["\']container.+)', webpage, 'container', default=webpage) - entries = self._extract_entries(container) + return [ + self.url_result( + 'http://www.pornhub.com/%s' % video_url, + PornHubIE.ie_key(), video_title=title) + for video_url, title in orderedSet(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', + container)) + ] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = self._extract_entries(webpage) playlist = self._parse_json( self._search_regex( From 4f049e4aa866aef89d0f4b735fd89eb2ba84e809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 08:00:15 +0700 Subject: [PATCH 0149/2417] [ChangeLog] Actualize --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index b28ade446..daa3601ba 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version + +Core +* [YoutubeDL] Make sure format id is not empty +* [extractor/common] Make _family_friendly_search optional +* [extractor/common] Respect source's type attribute for HTML5 media (#13892) + +Extractors +* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902) ++ [fourtube] Add support pornerbros.com (#6022) ++ [fourtube] Add support porntube.com (#7859, #13901) ++ [fourtube] Add support fux.com +* [limelight] Improve embeds detection (#13895) ++ [reddit] Add support for v.redd.it and reddit.com (#13847) +* [aparat] Extract all formats (#13887) +* [mixcloud] Fix play info decryption (#13885) ++ [generic] Add support for vzaar embeds (#13876) + + version 2017.08.09 Core From 16393d65355cdb1118e528d6dcb6d82f5f1c2b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Aug 2017 08:58:30 +0700 Subject: [PATCH 0150/2417] release 2017.08.13 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7ee704e48..3bd61e0a6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.13*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.09 +[debug] youtube-dl version 2017.08.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index daa3601ba..6bafb1e8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.13 Core * [YoutubeDL] Make sure format id is not empty diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3bd07726..cc442742f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -294,6 +294,7 @@ - **Funimation** - **FunnyOrDie** - **Fusion** + - **Fux** - **FXNetworks** - **GameInformer** - **GameOne** @@ -621,6 +622,7 @@ - **PolskieRadio** - **PolskieRadioCategory** - **PornCom** + - **PornerBros** - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla @@ -629,6 +631,7 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PornTube** - **PressTV** - **PrimeShareTV** - **PromptFile** @@ -654,6 +657,8 @@ - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** + - **Reddit** + - **RedditR** - **RedTube** - **RegioTV** - **RENTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 022172375..da855a602 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.09' +__version__ = '2017.08.13' From da20951a57bddd4a0102cd776ff93a2adc6db77d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Aug 2017 22:39:05 +0700 Subject: [PATCH 0151/2417] [mixcloud] Extract decrypt key --- youtube_dl/extractor/mixcloud.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 52f7428e0..fcf7beeb2 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, + compat_str, compat_urllib_parse_unquote, compat_urlparse, ) @@ -53,15 +54,18 @@ class MixcloudIE(InfoExtractor): 'only_matching': True, }] + _keys = [ + 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', + 'pleasedontdownloadourmusictheartistswontgetpaid', + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()' + ] + _current_key = None + # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js def _decrypt_play_info(self, play_info, video_id): - KEYS = ( - 'pleasedontdownloadourmusictheartistswontgetpaid', - 'window.addEventListener = window.addEventListener || function() {};', - '(function() { return new Date().toLocaleDateString(); })()', - ) play_info = base64.b64decode(play_info.encode('ascii')) - for num, key in enumerate(KEYS, start=1): + for num, key in enumerate(self._keys, start=1): try: return self._parse_json( ''.join([ @@ -69,7 +73,7 @@ class MixcloudIE(InfoExtractor): for idx, ch in enumerate(play_info)]), video_id) except ExtractorError: - if num == len(KEYS): + if num == len(self._keys): raise def _real_extract(self, url): @@ -80,6 +84,20 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) + if not self._current_key: + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', + webpage, 'js url', default=None) + if js_url: + js = self._download_webpage(js_url, track_id, fatal=False) + if js: + key = self._search_regex( + r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + js, 'key', default=None, group='key') + if key and isinstance(key, compat_str): + self._keys.insert(0, key) + self._current_key = key + message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None) From 19ada898dc80a04ae1a2590c8886c9ec13958b03 Mon Sep 17 00:00:00 2001 From: forDream Date: Wed, 2 Aug 2017 11:12:17 +0800 Subject: [PATCH 0152/2417] fix QQ Music Url changed --- youtube_dl/extractor/qqmusic.py | 43 +++++++++++++++++---------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 17c27da46..6bff6baa8 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -2,8 +2,8 @@ from __future__ import unicode_literals import random -import time import re +import time from .common import InfoExtractor from ..utils import ( @@ -18,9 +18,9 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P[0-9A-Za-z]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', + 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', 'info_dict': { 'id': '004295Et37taLD', @@ -33,7 +33,7 @@ class QQMusicIE(InfoExtractor): } }, { 'note': 'There is no mp3-320 version of this song.', - 'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV', + 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', 'info_dict': { 'id': '004MsGEo3DdNxV', @@ -46,7 +46,7 @@ class QQMusicIE(InfoExtractor): } }, { 'note': 'lyrics not in .lrc format', - 'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6', + 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', 'info_dict': { 'id': '001JyApY11tIp6', 'ext': 'mp3', @@ -163,7 +163,8 @@ class QQPlaylistBaseIE(InfoExtractor): for item in re.findall(r'class="data"[^<>]*>([^<>]+)[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P[0-9A-Za-z]+)\.html' _TEST = { - 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', + 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', 'info_dict': { 'id': '001BLpXF2DyJe2', 'title': '林俊杰', @@ -217,10 +218,10 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P[0-9A-Za-z]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', + 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', 'info_dict': { 'id': '000gXCTb2AhRR1', 'title': '我们都是这样长大的', @@ -228,7 +229,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): }, 'playlist_count': 4, }, { - 'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3', + 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', 'info_dict': { 'id': '002Y5a3b3AlCu3', 'title': '그리고...', @@ -246,7 +247,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] ) for song in album['list'] ] album_name = album.get('name') @@ -260,17 +261,17 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P(top|global)_[0-9]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=toplist&p=global_123', + 'url': 'https://y.qq.com/n/yqq/toplist/123.html', 'info_dict': { 'id': 'global_123', 'title': '美国iTunes榜', }, 'playlist_count': 10, }, { - 'url': 'http://y.qq.com/#type=toplist&p=top_3', + 'url': 'https://y.qq.com/n/yqq/toplist/3.html', 'info_dict': { 'id': 'top_3', 'title': '巅峰榜·欧美', @@ -281,7 +282,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): }, 'playlist_count': 100, }, { - 'url': 'http://y.qq.com/#type=toplist&p=global_106', + 'url': 'https://y.qq.com/n/yqq/toplist/106.html', 'info_dict': { 'id': 'global_106', 'title': '韩国Mnet榜', @@ -301,7 +302,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + ".html", 'QQMusic', song['data']['songmid'] ) for song in toplist_json['songlist'] ] @@ -314,10 +315,10 @@ class QQMusicToplistIE(QQPlaylistBaseIE): class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:playlist' IE_DESC = 'QQ音乐 - 歌单' - _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P[0-9]+)' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P[0-9]+)\.html' _TESTS = [{ - 'url': 'http://y.qq.com/#type=taoge&id=3462654915', + 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', 'info_dict': { 'id': '3462654915', 'title': '韩国5月新歌精选下旬', @@ -326,7 +327,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): 'playlist_count': 40, 'skip': 'playlist gone', }, { - 'url': 'http://y.qq.com/#type=taoge&id=1374105607', + 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', 'info_dict': { 'id': '1374105607', 'title': '易入人心的华语民谣', @@ -352,7 +353,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): cdlist = list_json['cdlist'][0] entries = [ self.url_result( - 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] ) for song in cdlist['songlist'] ] From 5d1bd3b907d22eab7c47b8b408c07a26dbc358ea Mon Sep 17 00:00:00 2001 From: forDream Date: Wed, 2 Aug 2017 12:20:53 +0800 Subject: [PATCH 0153/2417] [qqmusic]update valid url --- youtube_dl/extractor/qqmusic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 6bff6baa8..7513acba9 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -261,7 +261,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P(top|global)_[0-9]+)\.html' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P[0-9]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/toplist/123.html', @@ -293,7 +293,9 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - list_type, num_id = list_id.split("_") + # list_type, num_id = list_id.split("_") + list_type = "toplist" + num_id = list_id toplist_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' From 5c037c0d1f155e951050533690d6e990654cfcc9 Mon Sep 17 00:00:00 2001 From: forDream Date: Wed, 2 Aug 2017 15:08:38 +0800 Subject: [PATCH 0154/2417] [qqmusic]support QQMusicSingerIE --- youtube_dl/extractor/qqmusic.py | 52 +++++++++++++++++---------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 7513acba9..42be6bc7b 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -156,16 +156,27 @@ class QQPlaylistBaseIE(InfoExtractor): def qq_static_url(category, mid): return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) - @classmethod - def get_entries_from_page(cls, page): + def get_singer_all_songs(self, singmid, num): + return self._download_webpage( + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?format=json&inCharset=utf8&outCharset=utf-8&platform=yqq&needNewCode=0&singermid=%s&order=listen&begin=0&num=%s&songstatus=1' % + (singmid, num), singmid) + + def get_entries_from_page(self, singmid): entries = [] - for item in re.findall(r'class="data"[^<>]*>([^<>]+) Date: Mon, 14 Aug 2017 08:28:41 +0800 Subject: [PATCH 0155/2417] [qqmusic] review --- youtube_dl/extractor/qqmusic.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 42be6bc7b..38f4b2cab 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -166,15 +166,15 @@ class QQPlaylistBaseIE(InfoExtractor): default_num = 1 json_text = self.get_singer_all_songs(singmid, default_num) - json_obj = self._parse_json(json_text, singmid) + json_obj_all_songs = self._parse_json(json_text, singmid) - if json_obj['code'] == 0: - total = json_obj['data']['total'] + if json_obj_all_songs['code'] == 0: + total = json_obj_all_songs['data']['total'] json_text = self.get_singer_all_songs(singmid, total) - json_obj = self._parse_json(json_text, singmid) + json_obj_all_songs = self._parse_json(json_text, singmid) - for item in json_obj['data']['list']: - if not (item['musicData'].get('songmid') is None): + for item in json_obj_all_songs['data']['list']: + if item['musicData'].get('songmid') is not None: songmid = item['musicData']['songmid'] entries.append(self.url_result(r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) @@ -248,7 +248,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] ) for song in album['list'] ] album_name = album.get('name') @@ -294,8 +294,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - # list_type, num_id = list_id.split("_") - list_type = "toplist" + list_type = 'toplist' num_id = list_id toplist_json = self._download_json( @@ -305,7 +304,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): entries = [ self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + ".html", 'QQMusic', + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', song['data']['songmid'] ) for song in toplist_json['songlist'] ] @@ -357,7 +356,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): cdlist = list_json['cdlist'][0] entries = [ self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + ".html", 'QQMusic', song['songmid'] + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] ) for song in cdlist['songlist'] ] From 485047854376465f95309daad4966971f56728ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 15 Aug 2017 23:58:00 +0700 Subject: [PATCH 0156/2417] [extractor/common] Add support for float durations in _parse_mpd_formats (closes #13919) --- test/test_InfoExtractor.py | 86 ++++++++++++++++++++++++++++ test/testdata/mpd/float_duration.mpd | 18 ++++++ youtube_dl/extractor/common.py | 2 +- 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 test/testdata/mpd/float_duration.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 6f52e11f7..f18a823fc 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -10,6 +10,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, expect_dict, expect_value +from youtube_dl.compat import compat_etree_fromstring from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError @@ -488,6 +489,91 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_mpd_formats(self): + _TEST_CASES = [ + ( + # https://github.com/rg3/youtube-dl/issues/13919 + 'float_duration', + 'http://unknown/manifest.mpd', + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '318597', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 318.597, + 'width': 340, + 'height': 192, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '638590', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 638.59, + 'width': 512, + 'height': 288, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1022565', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 1022.565, + 'width': 688, + 'height': 384, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '2046506', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 2046.506, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '3998017', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640029', + 'tbr': 3998.017, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '5997485', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640032', + 'tbr': 5997.485, + 'width': 1920, + 'height': 1080, + }] + ), + ] + + for mpd_file, mpd_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_mpd_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + mpd_url=mpd_url) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd new file mode 100644 index 000000000..8dc1d2d5e --- /dev/null +++ b/test/testdata/mpd/float_duration.mpd @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7fe888462..e747258aa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1786,7 +1786,7 @@ class InfoExtractor(object): ms_info['timescale'] = int(timescale) segment_duration = source.get('duration') if segment_duration: - ms_info['segment_duration'] = int(segment_duration) + ms_info['segment_duration'] = float(segment_duration) def extract_Initialization(source): initialization = source.find(_add_ns('Initialization')) From a1aa6596626a98d068780f092367b87398840c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 16 Aug 2017 23:03:42 +0700 Subject: [PATCH 0157/2417] [periscope] Renew HLS extraction (closes #13917) --- youtube_dl/extractor/periscope.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index bfa12edc9..e5e08538c 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -80,18 +80,24 @@ class PeriscopeIE(PeriscopeBaseIE): stream = self._call_api( 'getAccessPublic', {'broadcast_id': token}, token) + video_urls = set() formats = [] - for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): + for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): video_url = stream.get(format_id + '_url') - if not video_url: + if not video_url or video_url in video_urls: continue - f = { + video_urls.add(video_url) + if format_id != 'rtmp': + formats.extend(self._extract_m3u8_formats( + video_url, token, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=False)) + continue + formats.append({ 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', - } - if format_id != 'rtmp': - f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8' - formats.append(f) + }) self._sort_formats(formats) return { From 25a6e769a1af3a79f439369fb683a1d487777cb9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 16:39:57 +0800 Subject: [PATCH 0158/2417] [qqmusic] Fix tests and cleanup --- youtube_dl/extractor/qqmusic.py | 89 ++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 38f4b2cab..62f986050 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -7,11 +7,10 @@ import time from .common import InfoExtractor from ..utils import ( - sanitized_Request, - strip_jsonp, - unescapeHTML, clean_html, ExtractorError, + strip_jsonp, + unescapeHTML, ) @@ -21,14 +20,14 @@ class QQMusicIE(InfoExtractor): _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P[0-9A-Za-z]+)\.html' _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', - 'md5': '9ce1c1c8445f561506d2e3cfb0255705', + 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', 'info_dict': { 'id': '004295Et37taLD', 'ext': 'mp3', 'title': '可惜没如果', 'release_date': '20141227', 'creator': '林俊杰', - 'description': 'md5:d327722d0361576fde558f1ac68a7065', + 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', 'thumbnail': r're:^https?://.*\.jpg$', } }, { @@ -53,7 +52,7 @@ class QQMusicIE(InfoExtractor): 'title': 'Shadows Over Transylvania', 'release_date': '19970225', 'creator': 'Dark Funeral', - 'description': 'md5:ed14d5bd7ecec19609108052c25b2c11', + 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { @@ -105,7 +104,7 @@ class QQMusicIE(InfoExtractor): [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], detail_info_page, 'album mid', default=None) if albummid: - thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \ + thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ % (albummid[-2:-1], albummid[-1], albummid) guid = self.m_r_get_ruin() @@ -158,8 +157,19 @@ class QQPlaylistBaseIE(InfoExtractor): def get_singer_all_songs(self, singmid, num): return self._download_webpage( - r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?format=json&inCharset=utf8&outCharset=utf-8&platform=yqq&needNewCode=0&singermid=%s&order=listen&begin=0&num=%s&songstatus=1' % - (singmid, num), singmid) + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, + query={ + 'format': 'json', + 'inCharset': 'utf8', + 'outCharset': 'utf-8', + 'platform': 'yqq', + 'needNewCode': 0, + 'singermid': singmid, + 'order': 'listen', + 'begin': 0, + 'num': num, + 'songstatus': 1, + }) def get_entries_from_page(self, singmid): entries = [] @@ -176,7 +186,8 @@ class QQPlaylistBaseIE(InfoExtractor): for item in json_obj_all_songs['data']['list']: if item['musicData'].get('songmid') is not None: songmid = item['musicData']['songmid'] - entries.append(self.url_result(r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) + entries.append(self.url_result( + r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) return entries @@ -192,7 +203,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): 'title': '林俊杰', 'description': 'md5:870ec08f7d8547c29c93010899103751', }, - 'playlist_count': 12, + 'playlist_mincount': 12, } def _real_extract(self, url): @@ -200,16 +211,16 @@ class QQMusicSingerIE(QQPlaylistBaseIE): entries = self.get_entries_from_page(mid) singer_page = self._download_webpage(url, mid, 'Download singer page') - singer_name = self._html_search_regex(r"singername : '(.*?)'", singer_page, 'singer name', default=None) + singer_name = self._html_search_regex( + r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) singer_desc = None if mid: - req = sanitized_Request( - 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singermid=%s' % mid) - req.add_header( - 'Referer', 'https://y.qq.com/n/yqq/singer/') singer_desc_page = self._download_xml( - req, mid, 'Donwload singer description XML') + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, + 'Donwload singer description XML', + query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, + headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) singer_desc = singer_desc_page.find('./data/info/desc').text @@ -267,26 +278,25 @@ class QQMusicToplistIE(QQPlaylistBaseIE): _TESTS = [{ 'url': 'https://y.qq.com/n/yqq/toplist/123.html', 'info_dict': { - 'id': 'global_123', + 'id': '123', 'title': '美国iTunes榜', + 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', }, - 'playlist_count': 10, + 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/yqq/toplist/3.html', 'info_dict': { - 'id': 'top_3', + 'id': '3', 'title': '巅峰榜·欧美', - 'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统' - '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据' - '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:' - '登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放' + 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', }, 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/yqq/toplist/106.html', 'info_dict': { - 'id': 'global_106', + 'id': '106', 'title': '韩国Mnet榜', + 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', }, 'playlist_count': 50, }] @@ -298,16 +308,14 @@ class QQMusicToplistIE(QQPlaylistBaseIE): num_id = list_id toplist_json = self._download_json( - 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' - % (list_type, num_id), - list_id, 'Download toplist page') + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, + note='Download toplist page', + query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) - entries = [ - self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', - song['data']['songmid'] - ) for song in toplist_json['songlist'] - ] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', + song['data']['songmid']) + for song in toplist_json['songlist']] topinfo = toplist_json.get('topinfo', {}) list_name = topinfo.get('ListName') @@ -343,8 +351,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): list_id = self._match_id(url) list_json = self._download_json( - 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' - % list_id, list_id, 'Download list page', + 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', + list_id, 'Download list page', + query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, transform_source=strip_jsonp) if not len(list_json.get('cdlist', [])): if list_json.get('code'): @@ -354,11 +363,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): raise ExtractorError('Unable to get playlist info') cdlist = list_json['cdlist'][0] - entries = [ - self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] - ) for song in cdlist['songlist'] - ] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) + for song in cdlist['songlist']] list_name = cdlist.get('dissname') list_description = clean_html(unescapeHTML(cdlist.get('desc'))) From 12f5304556343fafb6a38ad5b4d5ef9fc908f15c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 16:40:56 +0800 Subject: [PATCH 0159/2417] [ChangeLog] Add entry for #13805 --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6bafb1e8f..5b897735e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors + ++ [qqmusic] Support new URL schemes (#13805) + + version 2017.08.13 Core From bfabd17b33d47f1e973121483623768010880845 Mon Sep 17 00:00:00 2001 From: Genki Sky Date: Tue, 8 Aug 2017 22:49:57 -0400 Subject: [PATCH 0160/2417] Add new extractor --- youtube_dl/extractor/clippit.py | 74 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 75 insertions(+) create mode 100644 youtube_dl/extractor/clippit.py diff --git a/youtube_dl/extractor/clippit.py b/youtube_dl/extractor/clippit.py new file mode 100644 index 000000000..a1a7a774c --- /dev/null +++ b/youtube_dl/extractor/clippit.py @@ -0,0 +1,74 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + qualities, +) + +import re + + +class ClippitIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P[a-z]+)' + _TEST = { + 'url': 'https://www.clippituser.tv/c/evmgm', + 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09', + 'info_dict': { + 'id': 'evmgm', + 'ext': 'mp4', + 'title': 'Bye bye Brutus. #BattleBots - Clippit', + 'uploader': 'lizllove', + 'uploader_url': 'https://www.clippituser.tv/p/lizllove', + 'timestamp': 1472183818, + 'upload_date': '20160826', + 'description': 'BattleBots | ABC', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + FORMATS = ('sd', 'hd') + quality = qualities(FORMATS) + formats = [] + for format_id in FORMATS: + url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + webpage, 'url', fatal=False) + if not url: + continue + match = re.search(r'/(?P\d+)\.mp4', url) + formats.append({ + 'url': url, + 'format_id': format_id, + 'quality': quality(format_id), + 'height': int(match.group('height')) if match else None, + }) + + uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n', + webpage, 'uploader', fatal=False) + uploader_url = ('https://www.clippituser.tv/p/' + uploader + if uploader else None) + + timestamp = self._html_search_regex(r'datetime="(.+?)"', + webpage, 'date', fatal=False) + thumbnail = self._html_search_regex(r'data-image="(.+?)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'uploader': uploader, + 'uploader_url': uploader_url, + 'timestamp': parse_iso8601(timestamp), + 'description': self._og_search_description(webpage), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fb79a1736..ccfa14e7a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -187,6 +187,7 @@ from .chirbit import ( from .cinchcast import CinchcastIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE +from .clippit import ClippitIE from .cliprs import ClipRsIE from .clipsyndicate import ClipsyndicateIE from .closertotruth import CloserToTruthIE From 7ddab7742cad2ff04ec087e3e1d19422c931782b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 16:56:37 +0800 Subject: [PATCH 0161/2417] [ChangeLog] Add an entry for Genki Sky's patch --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5b897735e..cf7d1beb0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version Extractors - ++ [clippit] Add support for clippituser.tv + [qqmusic] Support new URL schemes (#13805) From 5d28169747e34850fcb53760c77eccb7f3195ef2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 17 Aug 2017 21:21:17 +0800 Subject: [PATCH 0162/2417] Credit Genki Sky for clippit (bfabd17b33d) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 053159cc3..478c7872f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -223,3 +223,4 @@ Jan Kundrát Giuseppe Fabiano Örn Guðjónsson Parmjit Virk +Genki Sky From 93d0583e34b0cd826f081a766b00381bb5fed52d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 22:45:40 +0700 Subject: [PATCH 0163/2417] [pluralsight] Use RPC API for course extraction (closes #13937) --- youtube_dl/extractor/pluralsight.py | 52 ++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index d35f54ce8..f6a9131b1 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -18,6 +18,7 @@ from ..utils import ( parse_duration, qualities, srt_subtitles_timecode, + try_get, update_url_query, urlencode_postdata, ) @@ -26,6 +27,39 @@ from ..utils import ( class PluralsightBaseIE(InfoExtractor): _API_BASE = 'https://app.pluralsight.com' + def _download_course(self, course_id, url, display_id): + try: + return self._download_course_rpc(course_id, url, display_id) + except ExtractorError: + # Old API fallback + return self._download_json( + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_id}), + headers={'Referer': url}) + + def _download_course_rpc(self, course_id, url, display_id): + response = self._download_json( + '%s/player/functions/rpc' % self._API_BASE, display_id, + 'Downloading course JSON', + data=json.dumps({ + 'fn': 'bootstrapPlayer', + 'payload': { + 'courseId': course_id, + }, + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json;charset=utf-8', + 'Referer': url, + }) + + course = try_get(response, lambda x: x['payload']['course'], dict) + if course: + return course + + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']['message']), + expected=True) + class PluralsightIE(PluralsightBaseIE): IE_NAME = 'pluralsight' @@ -162,10 +196,7 @@ class PluralsightIE(PluralsightBaseIE): display_id = '%s-%s' % (name, clip_id) - course = self._download_json( - 'https://app.pluralsight.com/player/user/api/v1/player/payload', - display_id, data=urlencode_postdata({'courseId': course_name}), - headers={'Referer': url}) + course = self._download_course(course_name, url, display_id) collection = course['modules'] @@ -331,18 +362,7 @@ class PluralsightCourseIE(PluralsightBaseIE): # TODO: PSM cookie - course = self._download_json( - '%s/player/functions/rpc' % self._API_BASE, course_id, - 'Downloading course JSON', - data=json.dumps({ - 'fn': 'bootstrapPlayer', - 'payload': { - 'courseId': course_id, - } - }).encode('utf-8'), - headers={ - 'Content-Type': 'application/json;charset=utf-8' - })['payload']['course'] + course = self._download_course(course_id, url, course_id) title = course['title'] course_name = course['name'] From 5f5c7b92dda1da6a0f15af7e3999a6ff298a8c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 23:14:46 +0700 Subject: [PATCH 0164/2417] [udemy] Fix paid course detection (#13943) --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 3b02f43e3..207c4a6a7 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -74,7 +74,7 @@ class UdemyIE(InfoExtractor): return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url checkout_url = unescapeHTML(self._search_regex( - r'href=(["\'])(?P(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1', + r'href=(["\'])(?P(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1', webpage, 'checkout url', group='url', default=None)) if checkout_url: raise ExtractorError( From 5551d7714d53caaaae32cdedad11a0bdc95efcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 23:57:48 +0700 Subject: [PATCH 0165/2417] [generic] Convert redirect URLs to unicode strings (closes #13951) --- youtube_dl/extractor/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eff5fbfe8..d2fb2627d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2015,7 +2015,7 @@ class GenericIE(InfoExtractor): if head_response is not False: # Check for redirect - new_url = head_response.geturl() + new_url = compat_str(head_response.geturl()) if url != new_url: self.report_following_redirect(new_url) if force_videoid: @@ -2116,7 +2116,7 @@ class GenericIE(InfoExtractor): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, video_id, - mpd_base_url=full_response.geturl().rpartition('/')[0], + mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0], mpd_url=url) self._sort_formats(info_dict['formats']) return info_dict From a5ac0c475589fd1dcd3ba04802f28828c24be6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Aug 2017 23:59:12 +0700 Subject: [PATCH 0166/2417] [YoutubeDL] Sanitize byte string format URLs (#13951) --- youtube_dl/YoutubeDL.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index df7378f83..5f4c93ea3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1483,12 +1483,14 @@ class YoutubeDL(object): def is_wellformed(f): url = f.get('url') - valid_url = url and isinstance(url, compat_str) - if not valid_url: + if not url: self.report_warning( '"url" field is missing or empty - skipping format, ' 'there is an error in extractor') - return valid_url + return False + if isinstance(url, bytes): + sanitize_string_field(f, 'url') + return True # Filter out malformed formats for better extraction robustness formats = list(filter(is_wellformed, formats)) From c0892b2b465cff95d392eaa725e39bd47e4dff58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 00:58:23 +0700 Subject: [PATCH 0167/2417] [arte] Detect unavailable videos (closes #13945) --- youtube_dl/extractor/arte.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 56baef29d..02613cf5d 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -9,12 +9,13 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( + ExtractorError, find_xpath_attr, - unified_strdate, get_element_by_attribute, int_or_none, NO_DEFAULT, qualities, + unified_strdate, ) # There are different sources of video in arte.tv, the extraction process @@ -79,6 +80,13 @@ class ArteTVBaseIE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] + vsr = player_info['VSR'] + + if not vsr and not player_info.get('VRU'): + raise ExtractorError( + 'Video %s is not available' % player_info.get('VID') or video_id, + expected=True) + upload_date_str = player_info.get('shootingDate') if not upload_date_str: upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] @@ -107,7 +115,7 @@ class ArteTVBaseIE(InfoExtractor): langcode = LANGS.get(lang, lang) formats = [] - for format_id, format_dict in player_info['VSR'].items(): + for format_id, format_dict in vsr.items(): f = dict(format_dict) versionCode = f.get('versionCode') l = re.escape(langcode) From 4a919103651905d4e5954c5d655b45055384e283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 01:00:07 +0700 Subject: [PATCH 0168/2417] [qqmusic:toplist] PEP 8 --- youtube_dl/extractor/qqmusic.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 62f986050..084308aeb 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -304,9 +304,6 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - list_type = 'toplist' - num_id = list_id - toplist_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, note='Download toplist page', From 2738965d98e1883a781a1e9743de0af086c5acd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 01:03:20 +0700 Subject: [PATCH 0169/2417] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index cf7d1beb0..298e0b059 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,19 @@ version +Core +* [YoutubeDL] Sanitize byte string format URLs (#13951) ++ [extractor/common] Add support for float durations in _parse_mpd_formats + (#13919) + Extractors +* [arte] Detect unavailable videos (#13945) +* [generic] Convert redirect URLs to unicode strings (#13951) +* [udemy] Fix paid course detection (#13943) +* [pluralsight] Use RPC API for course extraction (#13937) + [clippit] Add support for clippituser.tv + [qqmusic] Support new URL schemes (#13805) +* [periscope] Renew HLS extraction (#13917) +* [mixcloud] Extract decrypt key version 2017.08.13 From ea004d34f83fd7dd9a00fc3e2deb5a101aff6ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 01:05:27 +0700 Subject: [PATCH 0170/2417] release 2017.08.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3bd61e0a6..66dd4c480 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.13*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.13** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.13 +[debug] youtube-dl version 2017.08.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 298e0b059..9a0fad673 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.18 Core * [YoutubeDL] Sanitize byte string format URLs (#13951) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cc442742f..1991975cc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -156,6 +156,7 @@ - **Cinchcast** - **CJSW** - **cliphunter** + - **Clippit** - **ClipRs** - **Clipsyndicate** - **CloserToTruth** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index da855a602..4358cd3f2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.13' +__version__ = '2017.08.18' From d14d9d8903a532e346dffc3b83730045f18f2c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Aug 2017 23:31:42 +0700 Subject: [PATCH 0171/2417] [mixcloud] Fix extraction (closes #13958) --- youtube_dl/extractor/mixcloud.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index fcf7beeb2..798968ae3 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -91,12 +91,14 @@ class MixcloudIE(InfoExtractor): if js_url: js = self._download_webpage(js_url, track_id, fatal=False) if js: - key = self._search_regex( - r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', - js, 'key', default=None, group='key') - if key and isinstance(key, compat_str): - self._keys.insert(0, key) - self._current_key = key + KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' + for key_name in ('value', 'key_value'): + key = self._search_regex( + KEY_RE_TEMPLATE % key_name, js, 'key', + default=None, group='key') + if key and isinstance(key, compat_str): + self._keys.insert(0, key) + self._current_key = key message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', From f5469da9e6e259c1690c7ef54f1da1c19f65036f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Aug 2017 19:48:20 +0700 Subject: [PATCH 0172/2417] [laola1tv] Add support for tv.ittf.com (closes #13965) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/laola1tv.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ccfa14e7a..bda6826f1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -509,6 +509,7 @@ from .la7 import LA7IE from .laola1tv import ( Laola1TvEmbedIE, Laola1TvIE, + ITTFIE, ) from .lci import LCIIE from .lcp import ( diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 1f91ba017..c7f813370 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -215,3 +215,21 @@ class Laola1TvIE(Laola1TvEmbedIE): 'formats': formats, 'is_live': is_live, } + + +class ITTFIE(InfoExtractor): + _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P\d+)' + _TEST = { + 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802', + 'only_matching': True, + } + + def _real_extract(self, url): + return self.url_result( + update_url_query('https://www.laola1.tv/titanplayer.php', { + 'videoid': self._match_id(url), + 'type': 'V', + 'lang': 'en', + 'portal': 'int', + 'customer': 1024, + }), Laola1TvEmbedIE.ie_key()) From 95f3f7c20a05e7ac490e768b8470b20538ef8581 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 21:40:53 +0800 Subject: [PATCH 0173/2417] [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) --- ChangeLog | 6 ++++++ test/test_utils.py | 1 + youtube_dl/utils.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a0fad673..9eab4d1e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) + + version 2017.08.18 Core diff --git a/test/test_utils.py b/test/test_utils.py index 2aab16b97..e50f3764e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + self.assertEqual(unescapeHTML('&a"'), '&a"') # HTML5 entities self.assertEqual(unescapeHTML('.''), '.\'') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c9cbd5842..2554a2abd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -596,7 +596,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding(): From f8f18f332f235bcfa2f8fc161887e0eef283fec0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 21:44:47 +0800 Subject: [PATCH 0174/2417] [cda] Fix extraction (closes #13935) --- ChangeLog | 1 + youtube_dl/extractor/cda.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9eab4d1e7..6c32747c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* [cda] Fix extraction (closes #13935) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 78b7a923c..0c3af23d5 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -124,7 +124,7 @@ class CDAIE(InfoExtractor): } def extract_format(page, version): - json_str = self._search_regex( + json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, '%s player_json' % version, fatal=False, group='player_data') if not json_str: From 09747ba7663a9c6f89530c7ffbd95cb4776db6bf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 22:27:53 +0800 Subject: [PATCH 0175/2417] [liveleak] Support another liveleak embedding pattern (closes #13336) --- ChangeLog | 3 +++ youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/generic.py | 23 +++++++++++++++----- youtube_dl/extractor/liveleak.py | 35 +++++++++++++++++++++++++----- 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c32747c4..4c7997b2e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,9 @@ Core * [cda] Fix extraction (closes #13935) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) +Extractors ++ [liveleak] Support another liveleak embedding pattern (#13336) + version 2017.08.18 diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bda6826f1..17048fd6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -537,7 +537,10 @@ from .limelight import ( LimelightChannelListIE, ) from .litv import LiTVIE -from .liveleak import LiveLeakIE +from .liveleak import ( + LiveLeakIE, + LiveLeakEmbedIE, +) from .livestream import ( LivestreamIE, LivestreamOriginalIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d2fb2627d..49b00b87e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1519,14 +1519,27 @@ class GenericIE(InfoExtractor): # LiveLeak embed { 'url': 'http://www.wykop.pl/link/3088787/', - 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', + 'md5': '7619da8c820e835bef21a1efa2a0fc71', 'info_dict': { 'id': '874_1459135191', 'ext': 'mp4', 'title': 'Man shows poor quality of new apartment building', 'description': 'The wall is like a sand pile.', 'uploader': 'Lake8737', - } + }, + 'add_ie': [LiveLeakIE.ie_key()], + }, + # Another LiveLeak embed pattern (#13336) + { + 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/', + 'info_dict': { + 'id': '2eb_1496309988', + 'ext': 'mp4', + 'title': 'Thief robs place where everyone was armed', + 'description': 'md5:694d73ee79e535953cf2488562288eee', + 'uploader': 'brazilwtf', + }, + 'add_ie': [LiveLeakIE.ie_key()], }, # Duplicated embedded video URLs { @@ -2757,9 +2770,9 @@ class GenericIE(InfoExtractor): self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) # Look for LiveLeak embeds - liveleak_url = LiveLeakIE._extract_url(webpage) - if liveleak_url: - return self.url_result(liveleak_url, 'LiveLeak') + liveleak_urls = LiveLeakIE._extract_urls(webpage) + if liveleak_urls: + return self.playlist_from_matches(liveleak_urls, video_id, video_title) # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index b2247a84d..d23eaa355 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -75,12 +75,10 @@ class LiveLeakIE(InfoExtractor): }] @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P[\w_]+)(?:.*)', + def _extract_urls(webpage): + return re.findall( + r']+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"', webpage) - if mobj: - return 'http://www.liveleak.com/view?i=%s' % mobj.group('id') def _real_extract(self, url): video_id = self._match_id(url) @@ -131,3 +129,30 @@ class LiveLeakIE(InfoExtractor): }) return info_dict + + +class LiveLeakEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P[if])=(?P[\w_]+)' + + # See generic.py for actual test cases + _TESTS = [{ + 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191', + 'only_matching': True, + }, { + 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id = mobj.group('kind', 'id') + + if kind == 'f': + webpage = self._download_webpage(url, video_id) + liveleak_url = self._search_regex( + r'logourl\s*:\s*(?P[\'"])(?P%s)(?P=q1)' % LiveLeakIE._VALID_URL, + webpage, 'LiveLeak URL', group='url') + elif kind == 'i': + liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id + + return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key()) From e2481b9b6e621e43fd77e395fd2283ce262b71f3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 22:28:58 +0800 Subject: [PATCH 0176/2417] [ChangeLog] Fix --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 4c7997b2e..320609a4f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,11 @@ version Core -* [cda] Fix extraction (closes #13935) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) Extractors + [liveleak] Support another liveleak embedding pattern (#13336) +* [cda] Fix extraction (#13935) version 2017.08.18 From 381ad4f30998443fabc4c8633caa548685f49c6b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 22:48:00 +0800 Subject: [PATCH 0177/2417] [liveleak] Support multi-video pages (closes #6542) --- ChangeLog | 1 + youtube_dl/extractor/liveleak.py | 43 ++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 320609a4f..c07cb9648 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) Extractors ++ [liveleak] Support multi-video pages (#6542) + [liveleak] Support another liveleak embedding pattern (#13336) * [cda] Fix extraction (#13935) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index d23eaa355..246aac576 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -72,6 +72,13 @@ class LiveLeakIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.liveleak.com/view?i=677_1439397581', + 'info_dict': { + 'id': '677_1439397581', + 'title': 'Fuel Depot in China Explosion caught on video', + }, + 'playlist_count': 3, }] @staticmethod @@ -109,26 +116,30 @@ class LiveLeakIE(InfoExtractor): 'age_limit': age_limit, } - info_dict = entries[0] + for idx, info_dict in enumerate(entries): + for a_format in info_dict['formats']: + if not a_format.get('height'): + a_format['height'] = int_or_none(self._search_regex( + r'([0-9]+)p\.mp4', a_format['url'], 'height label', + default=None)) - for a_format in info_dict['formats']: - if not a_format.get('height'): - a_format['height'] = int_or_none(self._search_regex( - r'([0-9]+)p\.mp4', a_format['url'], 'height label', - default=None)) + self._sort_formats(info_dict['formats']) - self._sort_formats(info_dict['formats']) + # Don't append entry ID for one-video pages to keep backward compatibility + if len(entries) > 1: + info_dict['id'] = '%s_%s' % (video_id, idx + 1) + else: + info_dict['id'] = video_id - info_dict.update({ - 'id': video_id, - 'title': video_title, - 'description': video_description, - 'uploader': video_uploader, - 'age_limit': age_limit, - 'thumbnail': video_thumbnail, - }) + info_dict.update({ + 'title': video_title, + 'description': video_description, + 'uploader': video_uploader, + 'age_limit': age_limit, + 'thumbnail': video_thumbnail, + }) - return info_dict + return self.playlist_result(entries, video_id, video_title) class LiveLeakEmbedIE(InfoExtractor): From d3d45e0a451bab2cc36181bb50bf3c129a7a5ec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Aug 2017 23:54:15 +0700 Subject: [PATCH 0178/2417] [bbccouk] Add support for events URLs (closes #13893) --- youtube_dl/extractor/bbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 79ded6ba1..911ae6780 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor): programmes/(?!articles/)| iplayer(?:/[^/]+)?/(?:episode/|playlist/)| music/(?:clips|audiovideo/popular)[/#]| - radio/player/ + radio/player/| + events/[^/]+/play/[^/]+/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) ''' % _ID_REGEX From 305d99f0bd1effc0e164792199bf93a872da2962 Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Mon, 17 Jul 2017 13:49:09 +0200 Subject: [PATCH 0179/2417] [build] Override timestamps in zip file to make build reproducible. See https://reproducible-builds.org/ for why this is good Copying files to not interfere with freshness detection. --- Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 84ccce2b3..41e3a683a 100644 --- a/Makefile +++ b/Makefile @@ -46,8 +46,15 @@ tar: youtube-dl.tar.gz pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish youtube-dl: youtube_dl/*.py youtube_dl/*/*.py - zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py - zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py + mkdir -p zip + for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \ + mkdir -p zip/$$d ;\ + cp -a $$d/*.py zip/$$d/ ;\ + done + touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py + mv zip/youtube_dl/__main__.py zip/ + cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py + rm -rf zip echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl rm youtube-dl.zip From b359e977b9bdff704cd58f6f3b34185ecbe450e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Aug 2017 14:16:58 +0700 Subject: [PATCH 0180/2417] [extractor/common] Make HLS and DASH extraction non fatal in _parse_html5_media_entries (closes #13970) --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e747258aa..ceba4ca1c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2123,11 +2123,11 @@ class InfoExtractor(object): formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, - preference=preference) + preference=preference, fatal=False) elif ext == 'mpd': is_plain_url = False formats = self._extract_mpd_formats( - full_url, video_id, mpd_id=mpd_id) + full_url, video_id, mpd_id=mpd_id, fatal=False) else: is_plain_url = True formats = [{ From 8239c6791a36813cacc337c5c4a8801d181b8b54 Mon Sep 17 00:00:00 2001 From: Luca Steeb Date: Sun, 20 Aug 2017 09:32:33 -0700 Subject: [PATCH 0181/2417] [bandcamp:album] Extract track titles --- youtube_dl/extractor/bandcamp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 9ddb9af17..be41bd5a2 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor): raise ExtractorError('The page doesn\'t contain any tracks') # Only tracks with duration info have songs entries = [ - self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) + self.url_result( + compat_urlparse.urljoin(url, t_path), + ie=BandcampIE.ie_key(), + video_title=self._search_regex( + r']+\bitemprop=["\']name["\'][^>]*>([^<]+)', + elem_content, 'track title', fatal=False)) for elem_content, t_path in track_elements if self._html_search_meta('duration', elem_content, default=None)] From 903d4d1625f59b6fb359a898fbb512cb2d6181e9 Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Sun, 20 Aug 2017 09:35:39 -0700 Subject: [PATCH 0182/2417] [README.md] Switch to HTTPS URLs --- README.md | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 0067184be..6f5d00df3 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). You can also use pip: @@ -33,7 +33,7 @@ You can also use pip: This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. -OS X users can install youtube-dl with [Homebrew](http://brew.sh/): +OS X users can install youtube-dl with [Homebrew](https://brew.sh/): brew install youtube-dl @@ -458,7 +458,7 @@ You can also use `--config-location` if you want to use custom configuration fil ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc @@ -485,7 +485,7 @@ The `-o` option allows users to indicate a template for the output file names. **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: - `id` (string): Video identifier - `title` (string): Video title @@ -603,7 +603,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext) $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/ # Download entire series season keeping each series and each season in separate directory under C:/MyVideos -$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617 +$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617 # Stream the video being downloaded to stdout $ youtube-dl -o - BaW_jenozKc @@ -716,17 +716,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 ### How do I update youtube-dl? -If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). +If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. -If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. +If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like sudo apt-get remove -y youtube-dl -Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html): +Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html): ``` sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl @@ -766,11 +766,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much. youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option. -Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed. +Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed. ### I have downloaded a video but how can I play it? -Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/). +Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/). ### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser. @@ -845,10 +845,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o ### How do I download a video starting with a `-`? -Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`: +Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`: youtube-dl -- -wNyEUrxzFU - youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU" + youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU" ### How do I pass cookies to youtube-dl? @@ -862,9 +862,9 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula ### How do I stream directly to media player? -You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: +You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with: - youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - + youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - ### How do I download only new videos from a playlist? @@ -884,7 +884,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg. -In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. +In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case. @@ -910,7 +910,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue ### How can I detect whether a given URL is supported by youtube-dl? -For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. +For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. @@ -924,7 +924,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -972,7 +972,7 @@ After you have ensured this site is distributing its content legally, you can fo class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' _TEST = { - 'url': 'http://yourextractor.com/watch/42', + 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '42', @@ -1005,8 +1005,8 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py @@ -1162,7 +1162,7 @@ import youtube_dl ydl_opts = {} with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object. @@ -1201,19 +1201,19 @@ ydl_opts = { 'progress_hooks': [my_hook], } with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` # BUGS -Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). +Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ youtube-dl -v [debug] System config: [] [debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] youtube-dl version 2015.12.06 [debug] Git HEAD: 135392e @@ -1244,7 +1244,7 @@ For bug reports, this means that your report should contain the *complete* outpu If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). -**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL. +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? From 8d9c2a681a1dcf99ab949e79b1b9da17513e11d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Aug 2017 23:06:27 +0700 Subject: [PATCH 0183/2417] [pornhub] Relax uploader regex (closes #13906, closes #13975) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index f6777cd26..3428458af 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor): title, thumbnail, duration = [None] * 3 video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) view_count = self._extract_count( From 05915e379a2406988f752722dfaa815804fb7fb8 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 22 Aug 2017 11:48:59 -0500 Subject: [PATCH 0184/2417] [googledrive] Add support for subtitles (fixes #13619) --- youtube_dl/extractor/googledrive.py | 104 +++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index c40da85c5..35edc7440 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -7,6 +7,8 @@ from ..utils import ( ExtractorError, int_or_none, lowercase_escape, + error_to_compat_str, + update_url_query, ) @@ -24,7 +26,14 @@ class GoogleDriveIE(InfoExtractor): }, { # video id is longer than 28 characters 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', - 'only_matching': True, + 'md5': 'c230c67252874fddd8170e3fd1a45886', + 'info_dict': { + 'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ', + 'ext': 'mp4', + 'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4', + 'duration': 189, + }, + 'only_matching': True }] _FORMATS_EXT = { '5': 'flv', @@ -44,6 +53,13 @@ class GoogleDriveIE(InfoExtractor): '46': 'webm', '59': 'mp4', } + _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext' + _CAPTIONS_ENTRY_TAG = { + 'subtitles': 'track', + 'automatic_captions': 'target', + } + _caption_formats_ext = [] + _captions_by_country_xml = None @staticmethod def _extract_url(webpage): @@ -53,6 +69,81 @@ class GoogleDriveIE(InfoExtractor): if mobj: return 'https://drive.google.com/file/d/%s' % mobj.group('id') + def _set_captions_data(self, video_id, video_subtitles_id, hl): + try: + self._captions_by_country_xml = self._download_xml(self._BASE_URL_CAPTIONS, video_id, query={ + 'id': video_id, + 'vid': video_subtitles_id, + 'hl': hl, + 'v': video_id, + 'type': 'list', + 'tlangs': '1', + 'fmts': '1', + 'vssids': '1', + }) + except ExtractorError as ee: + self.report_warning('unable to download video subtitles: %s' % error_to_compat_str(ee)) + if self._captions_by_country_xml is not None: + caption_available_extensions = self._captions_by_country_xml.findall('format') + for caption_extension in caption_available_extensions: + if caption_extension.attrib.get('fmt_code') and not caption_extension.attrib.get('default'): + self._caption_formats_ext.append(caption_extension.attrib['fmt_code']) + + def _get_captions_by_type(self, video_id, video_subtitles_id, caption_type, caption_original_lang_code=None): + if not video_subtitles_id or not caption_type: + return None + captions = {} + for caption_entry in self._captions_by_country_xml.findall(self._CAPTIONS_ENTRY_TAG[caption_type]): + caption_lang_code = caption_entry.attrib.get('lang_code') + if not caption_lang_code: + continue + caption_format_data = [] + for caption_format in self._caption_formats_ext: + query = { + 'vid': video_subtitles_id, + 'v': video_id, + 'fmt': caption_format, + 'lang': caption_lang_code if caption_original_lang_code is None else caption_original_lang_code, + 'type': 'track', + 'name': '', + 'kind': '', + } + if caption_original_lang_code is not None: + query.update({'tlang': caption_lang_code}) + caption_format_data.append({ + 'url': update_url_query(self._BASE_URL_CAPTIONS, query), + 'ext': caption_format, + }) + captions[caption_lang_code] = caption_format_data + if not captions: + self.report_warning('video doesn\'t have %s' % caption_type.replace('_', ' ')) + return captions + + def _get_subtitles(self, video_id, video_subtitles_id, hl): + if not video_subtitles_id or not hl: + return None + if self._captions_by_country_xml is None: + self._set_captions_data(video_id, video_subtitles_id, hl) + if self._captions_by_country_xml is None: + return None + return self._get_captions_by_type(video_id, video_subtitles_id, 'subtitles') + + def _get_automatic_captions(self, video_id, video_subtitles_id, hl): + if not video_subtitles_id or not hl: + return None + if self._captions_by_country_xml is None: + self._set_captions_data(video_id, video_subtitles_id, hl) + if self._captions_by_country_xml is None: + return None + self.to_screen('%s: Looking for automatic captions' % video_id) + subtitle_original_track = self._captions_by_country_xml.find('track') + if subtitle_original_track is None: + return None + subtitle_original_lang_code = subtitle_original_track.attrib.get('lang_code') + if not subtitle_original_lang_code: + return None + return self._get_captions_by_type(video_id, video_subtitles_id, 'automatic_captions', subtitle_original_lang_code) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( @@ -97,10 +188,21 @@ class GoogleDriveIE(InfoExtractor): formats.append(f) self._sort_formats(formats) + hl = self._search_regex( + r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) + video_subtitles_id = None + ttsurl = self._search_regex( + r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) + if ttsurl: + # the video Id for subtitles will be the last value in the ttsurl query string + video_subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1] + return { 'id': video_id, 'title': title, 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'formats': formats, + 'subtitles': self.extract_subtitles(video_id, video_subtitles_id, hl), + 'automatic_captions': self.extract_automatic_captions(video_id, video_subtitles_id, hl), } From e01c3d2ef7264b5d3d6f99e7e0b61340885ed661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 00:32:41 +0700 Subject: [PATCH 0185/2417] [extractor/common] Introduce _parse_xml --- youtube_dl/extractor/common.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ceba4ca1c..1804c4de0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -27,6 +27,7 @@ from ..compat import ( compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, + compat_xml_parse_error, ) from ..downloader.f4m import remove_encrypted_media from ..utils import ( @@ -646,15 +647,29 @@ class InfoExtractor(object): def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): + transform_source=None, fatal=True, encoding=None, + data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding, data=data, headers=headers, query=query) if xml_string is False: return xml_string + return self._parse_xml( + xml_string, video_id, transform_source=transform_source, + fatal=fatal) + + def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): if transform_source: xml_string = transform_source(xml_string) - return compat_etree_fromstring(xml_string.encode('utf-8')) + try: + return compat_etree_fromstring(xml_string.encode('utf-8')) + except compat_xml_parse_error as ve: + errmsg = '%s: Failed to parse XML ' % video_id + if fatal: + raise ExtractorError(errmsg, cause=ve) + else: + self.report_warning(errmsg + str(ve)) def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', From 37d9af306a928ce2184dcb60883e98ec0dd570ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 00:33:53 +0700 Subject: [PATCH 0186/2417] [googledrive] Simplify and carry long lines (#13638) --- youtube_dl/extractor/googledrive.py | 115 +++++++++++++++------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 35edc7440..97ff28219 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -7,7 +7,6 @@ from ..utils import ( ExtractorError, int_or_none, lowercase_escape, - error_to_compat_str, update_url_query, ) @@ -59,7 +58,7 @@ class GoogleDriveIE(InfoExtractor): 'automatic_captions': 'target', } _caption_formats_ext = [] - _captions_by_country_xml = None + _captions_xml = None @staticmethod def _extract_url(webpage): @@ -69,96 +68,99 @@ class GoogleDriveIE(InfoExtractor): if mobj: return 'https://drive.google.com/file/d/%s' % mobj.group('id') - def _set_captions_data(self, video_id, video_subtitles_id, hl): - try: - self._captions_by_country_xml = self._download_xml(self._BASE_URL_CAPTIONS, video_id, query={ + def _download_subtitles_xml(self, video_id, subtitles_id, hl): + if self._captions_xml: + return + self._captions_xml = self._download_xml( + self._BASE_URL_CAPTIONS, video_id, query={ 'id': video_id, - 'vid': video_subtitles_id, + 'vid': subtitles_id, 'hl': hl, 'v': video_id, 'type': 'list', 'tlangs': '1', 'fmts': '1', 'vssids': '1', - }) - except ExtractorError as ee: - self.report_warning('unable to download video subtitles: %s' % error_to_compat_str(ee)) - if self._captions_by_country_xml is not None: - caption_available_extensions = self._captions_by_country_xml.findall('format') - for caption_extension in caption_available_extensions: - if caption_extension.attrib.get('fmt_code') and not caption_extension.attrib.get('default'): - self._caption_formats_ext.append(caption_extension.attrib['fmt_code']) + }, note='Downloading subtitles XML', + errnote='Unable to download subtitles XML', fatal=False) + if self._captions_xml: + for f in self._captions_xml.findall('format'): + if f.attrib.get('fmt_code') and not f.attrib.get('default'): + self._caption_formats_ext.append(f.attrib['fmt_code']) - def _get_captions_by_type(self, video_id, video_subtitles_id, caption_type, caption_original_lang_code=None): - if not video_subtitles_id or not caption_type: - return None + def _get_captions_by_type(self, video_id, subtitles_id, caption_type, + origin_lang_code=None): + if not subtitles_id or not caption_type: + return captions = {} - for caption_entry in self._captions_by_country_xml.findall(self._CAPTIONS_ENTRY_TAG[caption_type]): + for caption_entry in self._captions_xml.findall( + self._CAPTIONS_ENTRY_TAG[caption_type]): caption_lang_code = caption_entry.attrib.get('lang_code') if not caption_lang_code: continue caption_format_data = [] for caption_format in self._caption_formats_ext: query = { - 'vid': video_subtitles_id, + 'vid': subtitles_id, 'v': video_id, 'fmt': caption_format, - 'lang': caption_lang_code if caption_original_lang_code is None else caption_original_lang_code, + 'lang': (caption_lang_code if origin_lang_code is None + else origin_lang_code), 'type': 'track', 'name': '', 'kind': '', } - if caption_original_lang_code is not None: + if origin_lang_code is not None: query.update({'tlang': caption_lang_code}) caption_format_data.append({ 'url': update_url_query(self._BASE_URL_CAPTIONS, query), 'ext': caption_format, }) captions[caption_lang_code] = caption_format_data - if not captions: - self.report_warning('video doesn\'t have %s' % caption_type.replace('_', ' ')) return captions - def _get_subtitles(self, video_id, video_subtitles_id, hl): - if not video_subtitles_id or not hl: - return None - if self._captions_by_country_xml is None: - self._set_captions_data(video_id, video_subtitles_id, hl) - if self._captions_by_country_xml is None: - return None - return self._get_captions_by_type(video_id, video_subtitles_id, 'subtitles') + def _get_subtitles(self, video_id, subtitles_id, hl): + if not subtitles_id or not hl: + return + self._download_subtitles_xml(video_id, subtitles_id, hl) + if not self._captions_xml: + return + return self._get_captions_by_type(video_id, subtitles_id, 'subtitles') - def _get_automatic_captions(self, video_id, video_subtitles_id, hl): - if not video_subtitles_id or not hl: - return None - if self._captions_by_country_xml is None: - self._set_captions_data(video_id, video_subtitles_id, hl) - if self._captions_by_country_xml is None: - return None - self.to_screen('%s: Looking for automatic captions' % video_id) - subtitle_original_track = self._captions_by_country_xml.find('track') - if subtitle_original_track is None: - return None - subtitle_original_lang_code = subtitle_original_track.attrib.get('lang_code') - if not subtitle_original_lang_code: - return None - return self._get_captions_by_type(video_id, video_subtitles_id, 'automatic_captions', subtitle_original_lang_code) + def _get_automatic_captions(self, video_id, subtitles_id, hl): + if not subtitles_id or not hl: + return + self._download_subtitles_xml(video_id, subtitles_id, hl) + if not self._captions_xml: + return + track = self._captions_xml.find('track') + if track is None: + return + origin_lang_code = track.attrib.get('lang_code') + if not origin_lang_code: + return + return self._get_captions_by_type( + video_id, subtitles_id, 'automatic_captions', origin_lang_code) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'http://docs.google.com/file/d/%s' % video_id, video_id) - reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) + reason = self._search_regex( + r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) if reason: raise ExtractorError(reason) title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') duration = int_or_none(self._search_regex( - r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) + r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', + default=None)) fmt_stream_map = self._search_regex( - r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') - fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') + r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, + 'fmt stream map').split(',') + fmt_list = self._search_regex( + r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') resolutions = {} for fmt in fmt_list: @@ -190,12 +192,14 @@ class GoogleDriveIE(InfoExtractor): hl = self._search_regex( r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) - video_subtitles_id = None + subtitles_id = None ttsurl = self._search_regex( r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) if ttsurl: - # the video Id for subtitles will be the last value in the ttsurl query string - video_subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1] + # the video Id for subtitles will be the last value in the ttsurl + # query string + subtitles_id = ttsurl.encode('utf-8').decode( + 'unicode_escape').split('=')[-1] return { 'id': video_id, @@ -203,6 +207,7 @@ class GoogleDriveIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'formats': formats, - 'subtitles': self.extract_subtitles(video_id, video_subtitles_id, hl), - 'automatic_captions': self.extract_automatic_captions(video_id, video_subtitles_id, hl), + 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), + 'automatic_captions': self.extract_automatic_captions( + video_id, subtitles_id, hl), } From 8d7a24aff60a57e651bab40f16a81eb7dffb405c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 22:28:09 +0700 Subject: [PATCH 0187/2417] [toutv] Relax DRM check (closes #13994) --- youtube_dl/extractor/toutv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 26d770992..071388dcc 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -78,8 +78,10 @@ class TouTvIE(InfoExtractor): def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + # IsDrm does not necessarily mean the video is DRM protected (see + # https://github.com/rg3/youtube-dl/issues/13994). if metadata.get('IsDrm'): - raise ExtractorError('This video is DRM protected.', expected=True) + self.report_warning('This video is probably DRM protected.', path) video_id = metadata['IdMedia'] details = metadata['Details'] title = details['OriginalTitle'] From 0830f3e04842a58eb563962940ceb2bed27aac1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 22:45:45 +0700 Subject: [PATCH 0188/2417] [cbc:watch] Bypass geo-restriction (closes #13993) --- youtube_dl/extractor/cbc.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 87ad14e91..9faf40227 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -200,6 +200,7 @@ class CBCWatchBaseIE(InfoExtractor): 'media': 'http://search.yahoo.com/mrss/', 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', } + _GEO_COUNTRIES = ['CA'] def _call_api(self, path, video_id): url = path if path.startswith('http') else self._API_BASE_URL + path @@ -287,6 +288,11 @@ class CBCWatchBaseIE(InfoExtractor): class CBCWatchVideoIE(CBCWatchBaseIE): IE_NAME = 'cbc.ca:watch:video' _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TEST = { + # geo-restricted to Canada, bypassable + 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235', + 'only_matching': True, + } def _real_extract(self, url): video_id = self._match_id(url) @@ -323,9 +329,10 @@ class CBCWatchIE(CBCWatchBaseIE): IE_NAME = 'cbc.ca:watch' _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' _TESTS = [{ + # geo-restricted to Canada, bypassable 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', 'info_dict': { - 'id': '38e815a-009e3ab12e4', + 'id': '9673749a-5e77-484c-8b62-a1092a6b5168', 'ext': 'mp4', 'title': 'Customer (Dis)Service', 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', @@ -337,8 +344,8 @@ class CBCWatchIE(CBCWatchBaseIE): 'skip_download': True, 'format': 'bestvideo', }, - 'skip': 'Geo-restricted to Canada', }, { + # geo-restricted to Canada, bypassable 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', 'info_dict': { 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', @@ -346,7 +353,6 @@ class CBCWatchIE(CBCWatchBaseIE): 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', }, 'playlist_mincount': 30, - 'skip': 'Geo-restricted to Canada', }] def _real_extract(self, url): From 5bae33485c223fdf230254fa424f972b3c51e77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 22:50:00 +0700 Subject: [PATCH 0189/2417] [toutv] PEP 8 --- youtube_dl/extractor/toutv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 071388dcc..e59ed2661 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..utils import ( int_or_none, js_to_json, - ExtractorError, urlencode_postdata, extract_attributes, smuggle_url, From c4bdc6811307c002a399b59860d311591145397f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 23:21:19 +0700 Subject: [PATCH 0190/2417] [ChangeLog] Actualize --- ChangeLog | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c07cb9648..4104b6ded 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,23 @@ version Core ++ [extractor/common] Introduce _parse_xml +* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries + non fatal (#13970) * [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) Extractors +* [cbc:watch] Bypass geo restriction (#13993) +* [toutv] Relax DRM check (#13994) ++ [googledrive] Add support for subtitles (#13619, #13638) +* [pornhub] Relax uploader regular expression (#13906, #13975) +* [bandcamp:album] Extract track titles (#13962) ++ [bbccouk] Add support for events URLs (#13893) + [liveleak] Support multi-video pages (#6542) + [liveleak] Support another liveleak embedding pattern (#13336) * [cda] Fix extraction (#13935) ++ [laola1tv] Add support for tv.ittf.com (#13965) +* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003) version 2017.08.18 @@ -129,7 +140,7 @@ Extractors * [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) * [adn] Improve error reporting (#13663) -* [crunchyroll] Relax series and season regex (#13659) +* [crunchyroll] Relax series and season regular expression (#13659) + [spiegel:article] Add support for nexx iframe embeds (#13029) + [nexx:embed] Add support for iframe embeds * [nexx] Improve JS embed extraction From df235dbba8d8ae3b51ad3432f67d0cb661dadd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Aug 2017 23:23:13 +0700 Subject: [PATCH 0191/2417] release 2017.08.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 12 ++++++------ ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 66dd4c480..3e1ff1536 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.18 +[debug] youtube-dl version 2017.08.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d606eab0e..a8091e7b5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ $ youtube-dl -v [debug] System config: [] [debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] youtube-dl version 2015.12.06 [debug] Git HEAD: 135392e @@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). -**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL. +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? @@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -118,7 +118,7 @@ After you have ensured this site is distributing its content legally, you can fo class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' _TEST = { - 'url': 'http://yourextractor.com/watch/42', + 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '42', @@ -151,8 +151,8 @@ After you have ensured this site is distributing its content legally, you can fo 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py diff --git a/ChangeLog b/ChangeLog index 4104b6ded..a60bd5fc8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.23 Core + [extractor/common] Introduce _parse_xml diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1991975cc..dbec6c8dc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -363,6 +363,7 @@ - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** + - **ITTF** - **ITV** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations @@ -419,6 +420,7 @@ - **limelight:channel_list** - **LiTV** - **LiveLeak** + - **LiveLeakEmbed** - **livestream** - **livestream:original** - **LnkGo** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4358cd3f2..94d35a66a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.18' +__version__ = '2017.08.23' From 745968bc72f5dcf1271559d58abd3f0d9a2ea01e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Thu, 24 Aug 2017 20:58:44 +0530 Subject: [PATCH 0192/2417] [mixcloud] Fix extraction (closes #14015) --- youtube_dl/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 798968ae3..25d3fc4b3 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor): js = self._download_webpage(js_url, track_id, fatal=False) if js: KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' - for key_name in ('value', 'key_value'): + for key_name in ('value', 'key_value', 'key_value_two'): key = self._search_regex( KEY_RE_TEMPLATE % key_name, js, 'key', default=None, group='key') From c7121fa7b804d2b9e35dec05f8861e4ebba8afd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Aug 2017 15:38:38 +0700 Subject: [PATCH 0193/2417] [youtube] Fix controversy videos extraction (closes #14027, closes #14029) --- youtube_dl/extractor/youtube.py | 36 ++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2e71795e7..3d0f5a5c9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1003,6 +1003,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'Skipping DASH manifest', ], }, + { + # The following content has been identified by the YouTube community + # as inappropriate or offensive to some audiences. + 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI', + 'info_dict': { + 'id': '6SJNVb0GnPI', + 'ext': 'mp4', + 'title': 'Race Differences in Intelligence', + 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1', + 'duration': 965, + 'upload_date': '20140124', + 'uploader': 'New Century Foundation', + 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg', + 'license': 'Standard YouTube License', + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { # itag 212 'url': '1t24XAntNCY', @@ -1437,9 +1458,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if dash_mpd and dash_mpd[0] not in dash_mpds: dash_mpds.append(dash_mpd[0]) + is_live = None + view_count = None + + def extract_view_count(v_info): + return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + # Get video info embed_webpage = None - is_live = None if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} @@ -1509,6 +1535,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue get_video_info = compat_parse_qs(video_info_webpage) add_dash_mpd(get_video_info) + if view_count is None: + view_count = extract_view_count(get_video_info) if not video_info: video_info = get_video_info if 'token' in get_video_info: @@ -1592,10 +1620,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self.playlist_result(entries, video_id, video_title, video_description) self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - if 'view_count' in video_info: - view_count = int(video_info['view_count'][0]) - else: - view_count = None + if view_count is None: + view_count = extract_view_count(get_video_info) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: From 151978f38a915afd0169e68955b3b7394c54e367 Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sat, 26 Aug 2017 18:02:57 +0530 Subject: [PATCH 0194/2417] [mixcloud] Fix extraction (closes #14020) --- youtube_dl/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 25d3fc4b3..f6360cce6 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -92,7 +92,7 @@ class MixcloudIE(InfoExtractor): js = self._download_webpage(js_url, track_id, fatal=False) if js: KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' - for key_name in ('value', 'key_value', 'key_value_two'): + for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'): key = self._search_regex( KEY_RE_TEMPLATE % key_name, js, 'key', default=None, group='key') From 085d9dd9bebfd1692cfe07e8bcb844780bfe4700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Aug 2017 22:02:49 +0700 Subject: [PATCH 0195/2417] [rai] Fix audio formats extraction (closes #14024) --- youtube_dl/extractor/rai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index e11bf8f9a..5bf64a56b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE): media_type = media['type'] if 'Audio' in media_type: relinker_info = { - 'formats': { + 'formats': [{ 'format_id': media.get('formatoAudio'), 'url': media['audioUrl'], 'ext': media.get('formatoAudio'), - } + }] } elif 'Video' in media_type: relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) From a3c3a1e12898a57fc2323e4c7cc37ace35482ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Aug 2017 23:55:48 +0700 Subject: [PATCH 0196/2417] [http] Rework HTTP downloader * Simplify code and split into separate routines to facilitate maintaining * Make retry mechanism work on errors during actual download not only during connection establishment phase * Retry on ECONNRESET and ETIMEDOUT during reading data from network * Retry on content too short and various timeout errors * Show error description on retry * Closes #506, closes #809, closes #2849, closes #4240, closes #6023, closes #8625, closes #9483 --- youtube_dl/downloader/common.py | 6 +- youtube_dl/downloader/http.py | 343 ++++++++++++++++++-------------- 2 files changed, 197 insertions(+), 152 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 77242dacc..75b8166c5 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -304,11 +304,11 @@ class FileDownloader(object): """Report attempt to resume at given byte.""" self.to_screen('[download] Resuming download at byte %s' % resume_len) - def report_retry(self, count, retries): + def report_retry(self, err, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen( - '[download] Got server HTTP error. Retrying (attempt %d of %s)...' - % (count, self.format_retries(retries))) + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' + % (error_to_compat_str(err), count, self.format_retries(retries))) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index af405b950..8a6638cc2 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -22,8 +22,16 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] - tmpfilename = self.temp_name(filename) - stream = None + + class DownloadContext(dict): + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + ctx = DownloadContext() + ctx.filename = filename + ctx.tmpfilename = self.temp_name(filename) + ctx.stream = None # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} @@ -38,46 +46,51 @@ class HttpFD(FileDownloader): if is_test: request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1)) - # Establish possible resume length - if os.path.isfile(encodeFilename(tmpfilename)): - resume_len = os.path.getsize(encodeFilename(tmpfilename)) - else: - resume_len = 0 + ctx.open_mode = 'wb' + ctx.resume_len = 0 - open_mode = 'wb' - if resume_len != 0: - if self.params.get('continuedl', True): - self.report_resuming_byte(resume_len) - request.add_header('Range', 'bytes=%d-' % resume_len) - open_mode = 'ab' - else: - resume_len = 0 + if self.params.get('continuedl', True): + # Establish possible resume length + if os.path.isfile(encodeFilename(ctx.tmpfilename)): + ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) count = 0 retries = self.params.get('retries', 0) - while count <= retries: + + class SucceedDownload(Exception): + pass + + class RetryDownload(Exception): + def __init__(self, source_error): + self.source_error = source_error + + def establish_connection(): + if ctx.resume_len != 0: + self.report_resuming_byte(ctx.resume_len) + request.add_header('Range', 'bytes=%d-' % ctx.resume_len) + ctx.open_mode = 'ab' # Establish connection try: - data = self.ydl.urlopen(request) + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) - if resume_len > 0: - content_range = data.headers.get('Content-Range') + if ctx.resume_len > 0: + content_range = ctx.data.headers.get('Content-Range') if content_range: content_range_m = re.search(r'bytes (\d+)-', content_range) # Content-Range is present and matches requested Range, resume is possible - if content_range_m and resume_len == int(content_range_m.group(1)): - break + if content_range_m and ctx.resume_len == int(content_range_m.group(1)): + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload self.report_unable_to_resume() - resume_len = 0 - open_mode = 'wb' - break + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: # Unexpected HTTP error @@ -86,15 +99,15 @@ class HttpFD(FileDownloader): # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header - data = self.ydl.urlopen(basic_request) - content_length = data.info()['Content-Length'] + ctx.data = self.ydl.urlopen(basic_request) + content_length = ctx.data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: raise else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < int(content_length) < resume_len + 100)): + (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -102,152 +115,184 @@ class HttpFD(FileDownloader): # I decided to implement a suggested change and consider the file # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. - self.report_file_already_downloaded(filename) - self.try_rename(tmpfilename, filename) + self.report_file_already_downloaded(ctx.filename) + self.try_rename(ctx.tmpfilename, ctx.filename) self._hook_progress({ - 'filename': filename, + 'filename': ctx.filename, 'status': 'finished', - 'downloaded_bytes': resume_len, - 'total_bytes': resume_len, + 'downloaded_bytes': ctx.resume_len, + 'total_bytes': ctx.resume_len, }) - return True + raise SucceedDownload() else: # The length does not match, we start the download over self.report_unable_to_resume() - resume_len = 0 - open_mode = 'wb' - break - except socket.error as e: - if e.errno != errno.ECONNRESET: + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return + raise RetryDownload(err) + except socket.error as err: + if err.errno != errno.ECONNRESET: # Connection reset is no problem, just retry raise + raise RetryDownload(err) - # Retry - count += 1 - if count <= retries: - self.report_retry(count, retries) + def download(): + data_len = ctx.data.info().get('Content-length', None) - if count > retries: - self.report_error('giving up after %s retries' % retries) - return False + # Range HTTP header may be ignored/unsupported by a webserver + # (e.g. extractor/scivee.py, extractor/bambuser.py). + # However, for a test we still would like to download just a piece of a file. + # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control + # block size when downloading a file. + if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + data_len = self._TEST_FILE_SIZE - data_len = data.info().get('Content-length', None) - - # Range HTTP header may be ignored/unsupported by a webserver - # (e.g. extractor/scivee.py, extractor/bambuser.py). - # However, for a test we still would like to download just a piece of a file. - # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control - # block size when downloading a file. - if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): - data_len = self._TEST_FILE_SIZE - - if data_len is not None: - data_len = int(data_len) + resume_len - min_data_len = self.params.get('min_filesize') - max_data_len = self.params.get('max_filesize') - if min_data_len is not None and data_len < min_data_len: - self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) - return False - if max_data_len is not None and data_len > max_data_len: - self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) - return False - - byte_counter = 0 + resume_len - block_size = self.params.get('buffersize', 1024) - start = time.time() - - # measure time over whole while-loop, so slow_down() and best_block_size() work together properly - now = None # needed for slow_down() in the first loop run - before = start # start measuring - while True: - - # Download and write - data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - byte_counter += len(data_block) - - # exit loop when download is finished - if len(data_block) == 0: - break - - # Open destination file just in time - if stream is None: - try: - (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) - assert stream is not None - filename = self.undo_temp_name(tmpfilename) - self.report_destination(filename) - except (OSError, IOError) as err: - self.report_error('unable to open for writing: %s' % str(err)) + if data_len is not None: + data_len = int(data_len) + ctx.resume_len + min_data_len = self.params.get('min_filesize') + max_data_len = self.params.get('max_filesize') + if min_data_len is not None and data_len < min_data_len: + self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) return False - if self.params.get('xattr_set_filesize', False) and data_len is not None: + byte_counter = 0 + ctx.resume_len + block_size = self.params.get('buffersize', 1024) + start = time.time() + + # measure time over whole while-loop, so slow_down() and best_block_size() work together properly + now = None # needed for slow_down() in the first loop run + before = start # start measuring + + def retry(e): + if ctx.tmpfilename != '-': + ctx.stream.close() + ctx.stream = None + ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) + raise RetryDownload(e) + + while True: + try: + # Download and write + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) + # socket.timeout is a subclass of socket.error but may not have + # errno set + except socket.timeout as e: + retry(e) + except socket.error as e: + if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): + raise + retry(e) + + byte_counter += len(data_block) + + # exit loop when download is finished + if len(data_block) == 0: + break + + # Open destination file just in time + if ctx.stream is None: try: - write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) - except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) + ctx.stream, ctx.tmpfilename = sanitize_open( + ctx.tmpfilename, ctx.open_mode) + assert ctx.stream is not None + ctx.filename = self.undo_temp_name(ctx.tmpfilename) + self.report_destination(ctx.filename) + except (OSError, IOError) as err: + self.report_error('unable to open for writing: %s' % str(err)) + return False - try: - stream.write(data_block) - except (IOError, OSError) as err: + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + except (XAttrUnavailableError, XAttrMetadataError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) + + try: + ctx.stream.write(data_block) + except (IOError, OSError) as err: + self.to_stderr('\n') + self.report_error('unable to write data: %s' % str(err)) + return False + + # Apply rate limit + self.slow_down(start, now, byte_counter - ctx.resume_len) + + # end measuring of one loop run + now = time.time() + after = now + + # Adjust block size + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) + + before = after + + # Progress message + speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) + if data_len is None: + eta = None + else: + eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len) + + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': ctx.tmpfilename, + 'filename': ctx.filename, + 'eta': eta, + 'speed': speed, + 'elapsed': now - start, + }) + + if is_test and byte_counter == data_len: + break + + if ctx.stream is None: self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) + self.report_error('Did not get any data blocks') return False + if ctx.tmpfilename != '-': + ctx.stream.close() - # Apply rate limit - self.slow_down(start, now, byte_counter - resume_len) + if data_len is not None and byte_counter != data_len: + err = ContentTooShortError(byte_counter, int(data_len)) + if count <= retries: + retry(err) + raise err - # end measuring of one loop run - now = time.time() - after = now + self.try_rename(ctx.tmpfilename, ctx.filename) - # Adjust block size - if not self.params.get('noresizebuffer', False): - block_size = self.best_block_size(after - before, len(data_block)) - - before = after - - # Progress message - speed = self.calc_speed(start, now, byte_counter - resume_len) - if data_len is None: - eta = None - else: - eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + # Update file modification time + if self.params.get('updatetime', True): + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) self._hook_progress({ - 'status': 'downloading', 'downloaded_bytes': byte_counter, - 'total_bytes': data_len, - 'tmpfilename': tmpfilename, - 'filename': filename, - 'eta': eta, - 'speed': speed, - 'elapsed': now - start, + 'total_bytes': byte_counter, + 'filename': ctx.filename, + 'status': 'finished', + 'elapsed': time.time() - start, }) - if is_test and byte_counter == data_len: - break + return True - if stream is None: - self.to_stderr('\n') - self.report_error('Did not get any data blocks') - return False - if tmpfilename != '-': - stream.close() + while count <= retries: + try: + establish_connection() + download() + return True + except RetryDownload as e: + count += 1 + if count <= retries: + self.report_retry(e.source_error, count, retries) + continue + except SucceedDownload: + return True - if data_len is not None and byte_counter != data_len: - raise ContentTooShortError(byte_counter, int(data_len)) - self.try_rename(tmpfilename, filename) - - # Update file modification time - if self.params.get('updatetime', True): - info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) - - self._hook_progress({ - 'downloaded_bytes': byte_counter, - 'total_bytes': byte_counter, - 'filename': filename, - 'status': 'finished', - 'elapsed': time.time() - start, - }) - - return True + self.report_error('giving up after %s retries' % retries) + return False From dd121cc1cab2f077cbf68ee432e83d4d094f0f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 03:12:56 +0700 Subject: [PATCH 0197/2417] [extractor/common] Extract height from res attribute of source tag for HTML5 videos (closes #14034) --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1804c4de0..b4af3f987 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2184,6 +2184,9 @@ class InfoExtractor(object): f = parse_content_type(source_attributes.get('type')) is_plain_url, formats = _media_formats(src, media_type, f) if is_plain_url: + # res attribute is not standard but seen several times + # in the wild + f['height'] = int_or_none(source_attributes.get('res')) f.update(formats[0]) media_info['formats'].append(f) else: From 1ed4549942c34cce52b9c641cf9f532c38866149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 03:27:05 +0700 Subject: [PATCH 0198/2417] [extractor/common] Extract format id from label attribute of source tag for HTML5 videos (#14034) --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b4af3f987..74d30ec50 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2186,7 +2186,10 @@ class InfoExtractor(object): if is_plain_url: # res attribute is not standard but seen several times # in the wild - f['height'] = int_or_none(source_attributes.get('res')) + f.update({ + 'height': int_or_none(source_attributes.get('res')), + 'format_id': source_attributes.get('label'), + }) f.update(formats[0]) media_info['formats'].append(f) else: From ff17be3ac921910e5cab6f54a579f379dacae068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 03:27:20 +0700 Subject: [PATCH 0199/2417] [extractor/generic] Extract from LD-JSON last of all Previous sources may contain several formats, e.g. http://tamasha.com/v/PgGZ --- youtube_dl/extractor/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 49b00b87e..c81efdc00 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2871,12 +2871,6 @@ class GenericIE(InfoExtractor): merged[k] = v return merged - # Looking for http://schema.org/VideoObject - json_ld = self._search_json_ld( - webpage, video_id, default={}, expected_type='VideoObject') - if json_ld.get('url'): - return merge_dicts(json_ld, info_dict) - # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: @@ -2895,6 +2889,12 @@ class GenericIE(InfoExtractor): jwplayer_data, video_id, require_title=False, base_url=url) return merge_dicts(info, info_dict) + # Looking for http://schema.org/VideoObject + json_ld = self._search_json_ld( + webpage, video_id, default={}, expected_type='VideoObject') + if json_ld.get('url'): + return merge_dicts(json_ld, info_dict) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From 62c06c593d19c1314c84639128bb3354665399bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 04:24:41 +0700 Subject: [PATCH 0200/2417] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index a60bd5fc8..46d76ff3d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core ++ [extractor/common] Extract height and format id for HTML5 videos (#14034) +* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023, + #8625, #9483) + * Simplify code and split into separate routines to facilitate maintaining + * Make retry mechanism work on errors during actual download not only + during connection establishment phase + * Retry on ECONNRESET and ETIMEDOUT during reading data from network + * Retry on content too short + * Show error description on retry + +Extractors +* [generic] Lower preference for extraction from LD-JSON +* [rai] Fix audio formats extraction (#14024) +* [youtube] Fix controversy videos extraction (#14027, #14029) +* [mixcloud] Fix extraction (#14015, #14020) + + version 2017.08.23 Core From f031b760650f57672deecacb3f1991bd6fcd778e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 04:28:04 +0700 Subject: [PATCH 0201/2417] release 2017.08.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3e1ff1536..777932f93 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.23*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.23 +[debug] youtube-dl version 2017.08.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 46d76ff3d..a997759b0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.27 Core + [extractor/common] Extract height and format id for HTML5 videos (#14034) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 94d35a66a..6a24b70d7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.23' +__version__ = '2017.08.27' From 1c9c8de29e83bbfc38027dabcd9642f1c41a64b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 05:59:08 +0700 Subject: [PATCH 0202/2417] [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (closes #14037) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3d0f5a5c9..5a6b735a0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1621,7 +1621,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) if view_count is None: - view_count = extract_view_count(get_video_info) + view_count = extract_view_count(video_info) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: From cc0412ef915047233391c978fa1f8d4d1479ddfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 06:06:49 +0700 Subject: [PATCH 0203/2417] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index a997759b0..c38a83670 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors + +* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037) + + version 2017.08.27 Core From 2cfa7cbdd09353c8f8b135ba1c0b3a325bb03a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 06:09:29 +0700 Subject: [PATCH 0204/2417] release 2017.08.27.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 777932f93..7959d910b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.27 +[debug] youtube-dl version 2017.08.27.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c38a83670..ef9ac4660 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.08.27.1 Extractors diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6a24b70d7..7504ef0f0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.27' +__version__ = '2017.08.27.1' From 3902cdd0e3309364809407b895e4060199b89d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Aug 2017 22:36:57 +0700 Subject: [PATCH 0205/2417] [pornhd] Fix extraction (closes #14005) --- youtube_dl/extractor/pornhd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 36761788d..b52879c7a 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor): r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') sources = self._parse_json(js_to_json(self._search_regex( - r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]", + r"(?s)sources'?\s*[:=]\s*(\{.+?\})", webpage, 'sources', default='{}')), video_id) if not sources: @@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor): view_count = int_or_none(self._html_search_regex( r'(\d+) views\s*<', webpage, 'view count', fatal=False)) thumbnail = self._search_regex( - r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) + r"poster'?\s*:\s*([\"'])(?P(?:(?!\1).)+)\1", webpage, + 'thumbnail', fatal=False, group='url') return { 'id': video_id, From fea82c1780cc751267fd2b9b4145996bfc0c1994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 28 Aug 2017 00:39:22 +0700 Subject: [PATCH 0206/2417] [googledrive] Add support for source format (closes #14046) --- youtube_dl/extractor/googledrive.py | 121 +++++++++++++++++++--------- 1 file changed, 84 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 97ff28219..37d373901 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, int_or_none, lowercase_escape, @@ -15,17 +16,26 @@ class GoogleDriveIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', - 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', + 'md5': '5c602afbbf2c1db91831f5d82f678554', 'info_dict': { 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', 'ext': 'mp4', 'title': 'Big Buck Bunny.mp4', 'duration': 45, } + }, { + # video can't be watched anonymously due to view count limit reached, + # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046) + 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', + 'md5': 'bfbd670d03a470bb1e6d4a257adec12e', + 'info_dict': { + 'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ', + 'ext': 'mp4', + 'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4', + } }, { # video id is longer than 28 characters 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', - 'md5': 'c230c67252874fddd8170e3fd1a45886', 'info_dict': { 'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ', 'ext': 'mp4', @@ -147,47 +157,84 @@ class GoogleDriveIE(InfoExtractor): webpage = self._download_webpage( 'http://docs.google.com/file/d/%s' % video_id, video_id) - reason = self._search_regex( - r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) - if reason: - raise ExtractorError(reason) - - title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') + title = self._search_regex( + r'"title"\s*,\s*"([^"]+)', webpage, 'title', + default=None) or self._og_search_title(webpage) duration = int_or_none(self._search_regex( r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) - fmt_stream_map = self._search_regex( - r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, - 'fmt stream map').split(',') - fmt_list = self._search_regex( - r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') - - resolutions = {} - for fmt in fmt_list: - mobj = re.search( - r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt) - if mobj: - resolutions[mobj.group('format_id')] = ( - int(mobj.group('width')), int(mobj.group('height'))) formats = [] - for fmt_stream in fmt_stream_map: - fmt_stream_split = fmt_stream.split('|') - if len(fmt_stream_split) < 2: - continue - format_id, format_url = fmt_stream_split[:2] - f = { - 'url': lowercase_escape(format_url), - 'format_id': format_id, - 'ext': self._FORMATS_EXT[format_id], - } - resolution = resolutions.get(format_id) - if resolution: - f.update({ - 'width': resolution[0], - 'height': resolution[1], + fmt_stream_map = self._search_regex( + r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, + 'fmt stream map', default='').split(',') + fmt_list = self._search_regex( + r'"fmt_list"\s*,\s*"([^"]+)', webpage, + 'fmt_list', default='').split(',') + if fmt_stream_map and fmt_list: + resolutions = {} + for fmt in fmt_list: + mobj = re.search( + r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt) + if mobj: + resolutions[mobj.group('format_id')] = ( + int(mobj.group('width')), int(mobj.group('height'))) + + for fmt_stream in fmt_stream_map: + fmt_stream_split = fmt_stream.split('|') + if len(fmt_stream_split) < 2: + continue + format_id, format_url = fmt_stream_split[:2] + f = { + 'url': lowercase_escape(format_url), + 'format_id': format_id, + 'ext': self._FORMATS_EXT[format_id], + } + resolution = resolutions.get(format_id) + if resolution: + f.update({ + 'width': resolution[0], + 'height': resolution[1], + }) + formats.append(f) + + source_url = update_url_query( + 'https://drive.google.com/uc', { + 'id': video_id, + 'export': 'download', + }) + urlh = self._request_webpage( + source_url, video_id, note='Requesting source file', + errnote='Unable to request source file', fatal=False) + if urlh: + def add_source_format(src_url): + formats.append({ + 'url': src_url, + 'ext': determine_ext(title, 'mp4').lower(), + 'format_id': 'source', + 'quality': 1, }) - formats.append(f) + if urlh.headers.get('Content-Disposition'): + add_source_format(source_url) + else: + confirmation_webpage = self._webpage_read_content( + urlh, url, video_id, note='Downloading confirmation page', + errnote='Unable to confirm download', fatal=False) + if confirmation_webpage: + confirm = self._search_regex( + r'confirm=([^&"\']+)', confirmation_webpage, + 'confirmation code', fatal=False) + if confirm: + add_source_format(update_url_query(source_url, { + 'confirm': confirm, + })) + + if not formats: + reason = self._search_regex( + r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) + if reason: + raise ExtractorError(reason, expected=True) + self._sort_formats(formats) hl = self._search_regex( From 1b41da488df64bd41463a79691330e555f79ac2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 28 Aug 2017 00:50:41 +0700 Subject: [PATCH 0207/2417] [googledrive] Extend _VALID_URL (closes #9785) --- youtube_dl/extractor/googledrive.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 37d373901..3bf462d63 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -13,7 +13,18 @@ from ..utils import ( class GoogleDriveIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:docs|drive)\.google\.com/ + (?: + (?:uc|open)\?.*?id=| + file/d/ + )| + video\.google\.com/get_player\?.*?docid= + ) + (?P[a-zA-Z0-9_-]{28,}) + ''' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', 'md5': '5c602afbbf2c1db91831f5d82f678554', @@ -42,7 +53,13 @@ class GoogleDriveIE(InfoExtractor): 'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4', 'duration': 189, }, - 'only_matching': True + 'only_matching': True, + }, { + 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', + 'only_matching': True, + }, { + 'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28', + 'only_matching': True, }] _FORMATS_EXT = { '5': 'flv', From c75c384fb6d64f8d44c10d798cb64c8c00c61175 Mon Sep 17 00:00:00 2001 From: Ryan Schmidt Date: Sun, 27 Aug 2017 18:07:09 -0500 Subject: [PATCH 0208/2417] Fix build failures with old cp and zip --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 41e3a683a..c74eea792 100644 --- a/Makefile +++ b/Makefile @@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py mkdir -p zip for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \ mkdir -p zip/$$d ;\ - cp -a $$d/*.py zip/$$d/ ;\ + cp -pPR $$d/*.py zip/$$d/ ;\ done touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py mv zip/youtube_dl/__main__.py zip/ - cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py + cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py rm -rf zip echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl From 53647dfd0ae5f3d369e01f04acd5eff9713cc91d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Aug 2017 05:27:56 +0700 Subject: [PATCH 0209/2417] [bbccouk] Add support for w-prefixed ids (closes #14056) --- youtube_dl/extractor/bbc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 911ae6780..8b20c03d6 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -29,7 +29,7 @@ from ..compat import ( class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' - _ID_REGEX = r'[pb][\da-z]{7}' + _ID_REGEX = r'[pbw][\da-z]{7}' _VALID_URL = r'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ @@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor): }, { 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55', 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9', + 'only_matching': True, }] _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' From 5b4bfbfc3bba2174234834a08a97e261e492a2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Aug 2017 23:50:33 +0700 Subject: [PATCH 0210/2417] [charlierose] Add support for episodes (closes #14062) --- youtube_dl/extractor/charlierose.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index 2d517f231..42c9af263 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -5,7 +5,7 @@ from ..utils import remove_end class CharlieRoseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P\d+)' _TESTS = [{ 'url': 'https://charlierose.com/videos/27996', 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', @@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor): }, { 'url': 'https://charlierose.com/videos/27996', 'only_matching': True, + }, { + 'url': 'https://charlierose.com/episodes/30887?autoplay=true', + 'only_matching': True, }] _PLAYER_BASE = 'https://charlierose.com/video/player/%s' From 7998520933db70625ff3859c464fa4bbf2ff7fac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Aug 2017 00:47:58 +0700 Subject: [PATCH 0211/2417] [youtube] Fix upload date extraction (closes #14065) --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5a6b735a0..563cf6274 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1665,10 +1665,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not upload_date: upload_date = self._search_regex( [r'(?s)id="eow-date.*?>(.*?)', - r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)'], + r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'], video_webpage, 'upload date', default=None) - if upload_date: - upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = unified_strdate(upload_date) video_license = self._html_search_regex( From 8d81f3e36d3caaeabcdff99d3340d4075d30741e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Sep 2017 00:57:14 +0700 Subject: [PATCH 0212/2417] [youtube] Force old layout for each webpage (closes #14083) --- youtube_dl/extractor/youtube.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 563cf6274..953e38227 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, @@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return True + def _download_webpage(self, *args, **kwargs): + kwargs.setdefault('query', {})['disable_polymer'] = 'true' + return super(YoutubeBaseInfoExtractor, self)._download_webpage( + *args, **compat_kwargs(kwargs)) + def _real_initialize(self): if self._downloader is None: return @@ -2052,7 +2058,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} - _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' + _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' IE_NAME = 'youtube:playlist' _TESTS = [{ From 8681ed7fc80a6b4b99e3a57adf5ec8d4177a601c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 01:04:22 +0700 Subject: [PATCH 0213/2417] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index ef9ac4660..d89fb3d18 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version <unreleased> + +Extractors +* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076, + #14077, #14079, #14082, #14083, #14094, #14095, #14096) +* [youtube] Fix upload date extraction (#14065) ++ [charlierose] Add support for episodes (#14062) ++ [bbccouk] Add support for w-prefixed ids (#14056) +* [googledrive] Extend URL regular expression (#9785) ++ [googledrive] Add support for source format (#14046) +* [pornhd] Fix extraction (#14005) + + version 2017.08.27.1 Extractors From a2022b0c406286aa3d5101f1c85e7d11453e89d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 01:08:32 +0700 Subject: [PATCH 0214/2417] release 2017.09.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7959d910b..bd9e21983 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.08.27.1 +[debug] youtube-dl version 2017.09.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d89fb3d18..c439c8ef9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.02 Extractors * [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076, diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7504ef0f0..60ed35de9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.08.27.1' +__version__ = '2017.09.02' From a3431e12249530aa7a6962e54bcdbfce190c4c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 15:33:54 +0700 Subject: [PATCH 0215/2417] [radiocanada] Skip unsupported platforms (closes #14100) --- youtube_dl/extractor/radiocanada.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 3b40002a8..6bbc2781c 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -59,6 +59,7 @@ class RadioCanadaIE(InfoExtractor): device_types.append('android') formats = [] + error = None # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file for device_type in device_types: @@ -84,8 +85,8 @@ class RadioCanadaIE(InfoExtractor): if not v_url: continue if v_url == 'null': - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, xpath_text(v_data, 'message')), expected=True) + error = xpath_text(v_data, 'message') + continue ext = determine_ext(v_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -129,6 +130,9 @@ class RadioCanadaIE(InfoExtractor): formats.extend(self._extract_f4m_formats( base_url + '/manifest.f4m', video_id, f4m_id='hds', fatal=False)) + if not formats and error: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) self._sort_formats(formats) subtitles = {} From 64f0e30b93db804323a6db382d4ccdf1dac4e38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Sep 2017 15:44:49 +0700 Subject: [PATCH 0216/2417] [viidea] Capture and output lecture error message (#14099) --- youtube_dl/extractor/viidea.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 4adcd1830..a0abbae60 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -4,12 +4,14 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urlparse, + compat_HTTPError, compat_str, + compat_urlparse, ) from ..utils import ( - parse_duration, + ExtractorError, js_to_json, + parse_duration, parse_iso8601, ) @@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor): base_url = self._proto_relative_url(cfg['livepipe'], 'http:') - lecture_data = self._download_json( - '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id), - lecture_id)['lecture'][0] + try: + lecture_data = self._download_json( + '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id), + lecture_id)['lecture'][0] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + msg = self._parse_json( + e.cause.read().decode('utf-8'), lecture_id) + raise ExtractorError(msg['detail'], expected=True) + raise lecture_info = { 'id': lecture_id, From 503115540d8f135dc944ae48e40ba78f36238867 Mon Sep 17 00:00:00 2001 From: dubber0 <rexa.mose@gmail.com> Date: Sat, 22 Jul 2017 21:32:51 +0200 Subject: [PATCH 0217/2417] [aliexpress:live] Add extractor --- youtube_dl/extractor/aliexpress.py | 40 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/aliexpress.py diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py new file mode 100644 index 000000000..3997213f8 --- /dev/null +++ b/youtube_dl/extractor/aliexpress.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +import re + +from .common import InfoExtractor +from ..utils import try_get, float_or_none +from ..compat import compat_str + + +class AliExpressLiveIE(InfoExtractor): + + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>[0-9]{16})' + _TEST = { + 'url': 'https://live.aliexpress.com/live/2800002704436634', + 'md5': '7ac2bc46afdd18f0b45a0a340fc47ffe', + 'info_dict': { + 'id': '2800002704436634', + 'ext': 'm3u8', + 'title': 'CASIMA7.22', + 'uploader': 'CASIMA Official Store', + 'upload_date': '20170714', + 'timestamp': 1500027138, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) + run_params_json = self._search_regex(r'runParams = (.+)[\s+]var myCtl', page, 'runParams', flags=re.DOTALL) + run_params = self._parse_json(run_params_json, video_id) + + return { + 'id': video_id, + 'title': run_params['title'], + 'url': run_params['replyStreamUrl'], + 'uploader': try_get(run_params, lambda x: x['followBar']['name'], compat_str), + 'timestamp': float_or_none(try_get(run_params, lambda x: x['followBar']['createTime']) / 1000), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17048fd6e..d335f9fff 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -45,6 +45,7 @@ from .anvato import AnvatoIE from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE +from .aliexpress import AliExpressLiveIE from .aparat import AparatIE from .appleconnect import AppleConnectIE from .appletrailers import ( From 23b2df82c70a832e485aaf52befa26e27a904995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 16:04:36 +0700 Subject: [PATCH 0218/2417] [aliexpress:live] Fix issues (closes #13698, closes #13707) --- youtube_dl/extractor/aliexpress.py | 47 +++++++++++++++++++----------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py index 3997213f8..6f241e683 100644 --- a/youtube_dl/extractor/aliexpress.py +++ b/youtube_dl/extractor/aliexpress.py @@ -1,40 +1,53 @@ # coding: utf-8 from __future__ import unicode_literals - -import re - from .common import InfoExtractor -from ..utils import try_get, float_or_none from ..compat import compat_str +from ..utils import ( + float_or_none, + try_get, +) class AliExpressLiveIE(InfoExtractor): - - _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>[0-9]{16})' + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)' _TEST = { 'url': 'https://live.aliexpress.com/live/2800002704436634', - 'md5': '7ac2bc46afdd18f0b45a0a340fc47ffe', + 'md5': 'e729e25d47c5e557f2630eaf99b740a5', 'info_dict': { 'id': '2800002704436634', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': 'CASIMA7.22', + 'thumbnail': r're:http://.*\.jpg', 'uploader': 'CASIMA Official Store', - 'upload_date': '20170714', - 'timestamp': 1500027138, + 'timestamp': 1500717600, + 'upload_date': '20170722', }, } def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - run_params_json = self._search_regex(r'runParams = (.+)[\s+]var myCtl', page, 'runParams', flags=re.DOTALL) - run_params = self._parse_json(run_params_json, video_id) + + webpage = self._download_webpage(url, video_id) + + data = self._parse_json( + self._search_regex( + r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var', + webpage, 'runParams'), + video_id) + + title = data['title'] + + formats = self._extract_m3u8_formats( + data['replyStreamUrl'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') return { 'id': video_id, - 'title': run_params['title'], - 'url': run_params['replyStreamUrl'], - 'uploader': try_get(run_params, lambda x: x['followBar']['name'], compat_str), - 'timestamp': float_or_none(try_get(run_params, lambda x: x['followBar']['createTime']) / 1000), + 'title': title, + 'thumbnail': data.get('coverUrl'), + 'uploader': try_get( + data, lambda x: x['followBar']['name'], compat_str), + 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), + 'formats': formats, } From 73602bcd0c254b735cc93ce5ffeca9e98228190e Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi <ishitatsuyuki@gmail.com> Date: Fri, 1 Sep 2017 17:08:24 +0900 Subject: [PATCH 0219/2417] [soundcloud] Fix download URL with private tracks --- youtube_dl/extractor/soundcloud.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2e52e092b..23dcac803 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,8 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -import re import itertools +import re from .common import ( InfoExtractor, @@ -17,7 +17,7 @@ from ..utils import ( ExtractorError, int_or_none, unified_strdate, -) + update_url_query) class SoundcloudIE(InfoExtractor): @@ -160,11 +160,13 @@ class SoundcloudIE(InfoExtractor): 'license': info.get('license'), } formats = [] + query = {'client_id': self._CLIENT_ID} + if secret_token is not None: + query['secret_token'] = secret_token if info.get('downloadable', False): # We can build a direct link to the song - format_url = ( - 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( - track_id, self._CLIENT_ID)) + format_url = update_url_query( + 'https://api.soundcloud.com/tracks/{0}/download'.format(track_id), query) formats.append({ 'format_id': 'download', 'ext': info.get('original_format', 'mp3'), @@ -176,10 +178,7 @@ class SoundcloudIE(InfoExtractor): # We have to retrieve the url format_dict = self._download_json( 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, - track_id, 'Downloading track url', query={ - 'client_id': self._CLIENT_ID, - 'secret_token': secret_token, - }) + track_id, 'Downloading track url', query=query) for key, stream_url in format_dict.items(): abr = int_or_none(self._search_regex( @@ -216,7 +215,7 @@ class SoundcloudIE(InfoExtractor): # cannot be always used, sometimes it can give an HTTP 404 error formats.append({ 'format_id': 'fallback', - 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, + 'url': update_url_query(info['stream_url'], query), 'ext': ext, }) From d7c7100e3d920512a11bf7c6fee21e26da7ffa73 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 3 Sep 2017 16:18:24 +0700 Subject: [PATCH 0220/2417] [soundcloud] Simplify and add test (closes #14093) --- youtube_dl/extractor/soundcloud.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 23dcac803..1c6799d57 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -17,7 +17,8 @@ from ..utils import ( ExtractorError, int_or_none, unified_strdate, - update_url_query) + update_url_query, +) class SoundcloudIE(InfoExtractor): @@ -120,6 +121,21 @@ class SoundcloudIE(InfoExtractor): 'license': 'cc-by-sa', }, }, + # private link, downloadable format + { + 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', + 'md5': '64a60b16e617d41d0bef032b7f55441e', + 'info_dict': { + 'id': '340344461', + 'ext': 'wav', + 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', + 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', + 'uploader': 'Ori Uplift Music', + 'upload_date': '20170831', + 'duration': 7449, + 'license': 'all-rights-reserved', + }, + }, ] _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg' @@ -166,7 +182,7 @@ class SoundcloudIE(InfoExtractor): if info.get('downloadable', False): # We can build a direct link to the song format_url = update_url_query( - 'https://api.soundcloud.com/tracks/{0}/download'.format(track_id), query) + 'https://api.soundcloud.com/tracks/%s/download' % track_id, query) formats.append({ 'format_id': 'download', 'ext': info.get('original_format', 'mp3'), From 0cbb841ba94c8d813ff81e817154c5491a796f20 Mon Sep 17 00:00:00 2001 From: Timendum <timedum@gmail.com> Date: Thu, 31 Aug 2017 12:56:37 +0200 Subject: [PATCH 0221/2417] [bpb] Fix extraction (closes #14043) --- youtube_dl/extractor/bpb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 9661ade4f..14bc0f77d 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -33,13 +33,13 @@ class BpbIE(InfoExtractor): title = self._html_search_regex( r'<h2 class="white">(.*?)</h2>', webpage, 'title') video_info_dicts = re.findall( - r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage) + r"({\s*src\s*:\s*'https://film\.bpb\.de/[^}]+})", webpage) formats = [] for video_info in video_info_dicts: video_info = self._parse_json(video_info, video_id, transform_source=js_to_json) - quality = video_info['quality'] video_url = video_info['src'] + quality = 'high' if re.search(r'_high\.', video_url) else 'low' formats.append({ 'url': video_url, 'preference': 10 if quality == 'high' else 0, From c1c1585b316995ca47b59e8dc1e3b463beb1c54a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 16:38:43 +0700 Subject: [PATCH 0222/2417] [bpb] Improve (closes #14086) --- youtube_dl/extractor/bpb.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 14bc0f77d..07833532e 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -33,13 +33,18 @@ class BpbIE(InfoExtractor): title = self._html_search_regex( r'<h2 class="white">(.*?)</h2>', webpage, 'title') video_info_dicts = re.findall( - r"({\s*src\s*:\s*'https://film\.bpb\.de/[^}]+})", webpage) + r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage) formats = [] for video_info in video_info_dicts: - video_info = self._parse_json(video_info, video_id, transform_source=js_to_json) - video_url = video_info['src'] - quality = 'high' if re.search(r'_high\.', video_url) else 'low' + video_info = self._parse_json( + video_info, video_id, transform_source=js_to_json, fatal=False) + if not video_info: + continue + video_url = video_info.get('src') + if not video_url: + continue + quality = 'high' if '_high' in video_url else 'low' formats.append({ 'url': video_url, 'preference': 10 if quality == 'high' else 0, From 0b4a8eb3ac823c26b037eb368c114ce6d976c5c3 Mon Sep 17 00:00:00 2001 From: theychx <boomingzooming@gmail.com> Date: Mon, 28 Aug 2017 21:35:57 +0200 Subject: [PATCH 0223/2417] [vidme:user] Relax _VALID_URLs --- youtube_dl/extractor/vidme.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index a7971d72e..39b65ed2f 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -263,29 +263,43 @@ class VidmeListBaseIE(InfoExtractor): class VidmeUserIE(VidmeListBaseIE): IE_NAME = 'vidme:user' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)' + _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)' _API_ITEM = 'list' _TITLE = 'Videos' - _TEST = { - 'url': 'https://vid.me/EFARCHIVE', + _TESTS = [{ + 'url': 'https://vid.me/MasakoX', 'info_dict': { - 'id': '3834632', - 'title': 'EFARCHIVE - %s' % _TITLE, + 'id': '16112341', + 'title': 'MasakoX - %s' % _TITLE, }, - 'playlist_mincount': 238, - } + 'playlist_mincount': 191, + }, { + 'url': 'https://vid.me/unsQuare_netWork', + 'info_dict': { + 'id': '16148757', + 'title': 'unsQuare_netWork - %s' % _TITLE, + }, + 'playlist_mincount': 73, + }] class VidmeUserLikesIE(VidmeListBaseIE): IE_NAME = 'vidme:user:likes' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes' + _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes' _API_ITEM = 'likes' _TITLE = 'Likes' - _TEST = { + _TESTS = [{ 'url': 'https://vid.me/ErinAlexis/likes', 'info_dict': { 'id': '6483530', 'title': 'ErinAlexis - %s' % _TITLE, }, 'playlist_mincount': 415, - } + }, { + 'url': 'https://vid.me/Kaleidoscope-Ish/likes', + 'info_dict': { + 'id': '16908594', + 'title': 'Kaleidoscope-Ish - %s' % _TITLE, + }, + 'playlist_mincount': 43, + }] From bc35f075370ed1e67fe71c544e6243a2fc4fa430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 17:02:11 +0700 Subject: [PATCH 0224/2417] [vidme:user] Make tests only matching (closes #14054) --- youtube_dl/extractor/vidme.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 39b65ed2f..59adb2377 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -275,11 +275,7 @@ class VidmeUserIE(VidmeListBaseIE): 'playlist_mincount': 191, }, { 'url': 'https://vid.me/unsQuare_netWork', - 'info_dict': { - 'id': '16148757', - 'title': 'unsQuare_netWork - %s' % _TITLE, - }, - 'playlist_mincount': 73, + 'only_matching': True, }] @@ -297,9 +293,5 @@ class VidmeUserLikesIE(VidmeListBaseIE): 'playlist_mincount': 415, }, { 'url': 'https://vid.me/Kaleidoscope-Ish/likes', - 'info_dict': { - 'id': '16908594', - 'title': 'Kaleidoscope-Ish - %s' % _TITLE, - }, - 'playlist_mincount': 43, + 'only_matching': True, }] From e9b865267aaa90e3b9e1b0468d20a4df31e13393 Mon Sep 17 00:00:00 2001 From: John D <jdong1992@gmail.com> Date: Wed, 30 Aug 2017 00:14:43 -0700 Subject: [PATCH 0225/2417] [manyvids] Add support for preview videos (closes #14053) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/manyvids.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/manyvids.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d335f9fff..46a11f3ef 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -564,6 +564,7 @@ from .mangomolo import ( MangomoloVideoIE, MangomoloLiveIE, ) +from .manyvids import ManyVidsIE from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py new file mode 100644 index 000000000..ea739ce3f --- /dev/null +++ b/youtube_dl/extractor/manyvids.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class ManyVidsIE(InfoExtractor): + _VALID_URL = r'https?://www.manyvids\.com/Video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', + 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', + 'info_dict': { + 'id': '133957', + 'ext': 'mp4', + 'title': 'everthing about me', + + } + } + + def _real_extract(self, url): + formats = [] + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_url = compat_urllib_parse_unquote(self._search_regex( + r'data-video-filepath=\"(.+?)\"', webpage, 'video URL', default='')) + + title = self._html_search_regex(r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') + formats.append({ + 'url': video_url + }) + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } From efc57145c10bdf22da9d8571c35ccd0404e3b7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Sep 2017 17:30:02 +0700 Subject: [PATCH 0226/2417] [manyvids] Improve (closes #14059) --- youtube_dl/extractor/manyvids.py | 40 +++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py index ea739ce3f..b94b3c2ab 100644 --- a/youtube_dl/extractor/manyvids.py +++ b/youtube_dl/extractor/manyvids.py @@ -2,35 +2,47 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..utils import int_or_none class ManyVidsIE(InfoExtractor): - _VALID_URL = r'https?://www.manyvids\.com/Video/(?P<id>[0-9]+)' + _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' _TEST = { 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'info_dict': { 'id': '133957', 'ext': 'mp4', - 'title': 'everthing about me', - - } + 'title': 'everthing about me (Preview)', + 'view_count': int, + 'like_count': int, + }, } def _real_extract(self, url): - formats = [] video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_url = compat_urllib_parse_unquote(self._search_regex( - r'data-video-filepath=\"(.+?)\"', webpage, 'video URL', default='')) - title = self._html_search_regex(r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') - formats.append({ - 'url': video_url - }) + webpage = self._download_webpage(url, video_id) + + video_url = self._search_regex( + r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video URL', group='url') + + title = '%s (Preview)' % self._html_search_regex( + r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title') + + like_count = int_or_none(self._search_regex( + r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) + view_count = int_or_none(self._html_search_regex( + r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, + 'view count', default=None)) + return { 'id': video_id, 'title': title, - 'formats': formats, + 'view_count': view_count, + 'like_count': like_count, + 'formats': [{ + 'url': video_url, + }], } From 6348671c4a4e9f45cebc107ff4c148ef4970bb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Sep 2017 23:08:07 +0700 Subject: [PATCH 0227/2417] [arte] Relax unavailability check (closes #14112) --- youtube_dl/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 02613cf5d..5cde90c5b 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -82,7 +82,7 @@ class ArteTVBaseIE(InfoExtractor): vsr = player_info['VSR'] - if not vsr and not player_info.get('VRU'): + if not vsr: raise ExtractorError( 'Video %s is not available' % player_info.get('VID') or video_id, expected=True) From 880fa66f4ffa9afcfce91b5ce39f05909050da67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Sep 2017 22:45:07 +0700 Subject: [PATCH 0228/2417] [redtube] Fix formats extraction (closes #14122) --- youtube_dl/extractor/redtube.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index c367a6ae7..f70a75256 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -62,7 +63,23 @@ class RedTubeIE(InfoExtractor): 'format_id': format_id, 'height': int_or_none(format_id), }) - else: + medias = self._parse_json( + self._search_regex( + r'mediaDefinition\s*:\s*(\[.+?\])', webpage, + 'media definitions', default='{}'), + video_id, fatal=False) + if medias and isinstance(medias, list): + for media in medias: + format_url = media.get('videoUrl') + if not format_url or not isinstance(format_url, compat_str): + continue + format_id = media.get('quality') + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + if not formats: video_url = self._html_search_regex( r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') formats.append({'url': video_url}) @@ -73,7 +90,7 @@ class RedTubeIE(InfoExtractor): r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<', webpage, 'upload date', fatal=False)) duration = int_or_none(self._search_regex( - r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)', webpage, 'view count', fatal=False)) From c5c9bf0c120d2c481124a0c3913b981cf061fb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Sep 2017 23:31:34 +0700 Subject: [PATCH 0229/2417] [YoutubeDL] Ensure dir existence for each requested format (closes #14116) --- youtube_dl/YoutubeDL.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5f4c93ea3..4f208f1e1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1710,12 +1710,17 @@ class YoutubeDL(object): if filename is None: return - try: - dn = os.path.dirname(sanitize_path(encodeFilename(filename))) - if dn and not os.path.exists(dn): - os.makedirs(dn) - except (OSError, IOError) as err: - self.report_error('unable to create directory ' + error_to_compat_str(err)) + def ensure_dir_exists(path): + try: + dn = os.path.dirname(path) + if dn and not os.path.exists(dn): + os.makedirs(dn) + return True + except (OSError, IOError) as err: + self.report_error('unable to create directory ' + error_to_compat_str(err)) + return False + + if not ensure_dir_exists(sanitize_path(encodeFilename(filename))): return if self.params.get('writedescription', False): @@ -1853,8 +1858,11 @@ class YoutubeDL(object): for f in requested_formats: new_info = dict(info_dict) new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) + fname = prepend_extension( + self.prepare_filename(new_info), + 'f%s' % f['format_id'], new_info['ext']) + if not ensure_dir_exists(fname): + return downloaded.append(fname) partial_success = dl(fname, new_info) success = success and partial_success From 66c9fa36c10860b380806b9de48f38d628289e03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 Sep 2017 00:48:37 +0700 Subject: [PATCH 0230/2417] [youtube] Separate methods for embeds extraction --- youtube_dl/extractor/generic.py | 33 ++++------------------------- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c81efdc00..b83c18380 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor): if vid_me_embed_url is not None: return self.url_result(vid_me_embed_url, 'Vidme') - # Look for embedded YouTube player - matches = re.findall(r'''(?x) - (?: - <iframe[^>]+?src=| - data-video-url=| - <embed[^>]+?src=| - embedSWF\(?:\s*| - <object[^>]+data=| - new\s+SWFObject\( - ) - (["\']) - (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v|p)/.+?) - \1''', webpage) - if matches: + # Look for YouTube embeds + youtube_urls = YoutubeIE._extract_urls(webpage) + if youtube_urls: return self.playlist_from_matches( - matches, video_id, video_title, lambda m: unescapeHTML(m[1])) - - # Look for lazyYT YouTube embed - matches = re.findall( - r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) - if matches: - return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) - - # Look for Wordpress "YouTube Video Importer" plugin - matches = re.findall(r'''(?x)<div[^>]+ - class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ - data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) - if matches: - return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) + youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key()) matches = DailymotionIE._extract_urls(webpage) if matches: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 953e38227..ad2e933ee 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor): playback_url, video_id, 'Marking watched', 'Unable to mark watched', fatal=False) + @staticmethod + def _extract_urls(webpage): + # Embedded YouTube player + entries = [ + unescapeHTML(mobj.group('url')) + for mobj in re.finditer(r'''(?x) + (?: + <iframe[^>]+?src=| + data-video-url=| + <embed[^>]+?src=| + embedSWF\(?:\s*| + <object[^>]+data=| + new\s+SWFObject\( + ) + (["\']) + (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ + (?:embed|v|p)/.+?) + \1''', webpage)] + + # lazyYT YouTube embed + entries.extend(list(map( + unescapeHTML, + re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) + + # Wordpress "YouTube Video Importer" plugin + matches = re.findall(r'''(?x)<div[^>]+ + class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ + data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) + entries.extend(m[-1] for m in matches) + + return entries + + @staticmethod + def _extract_url(webpage): + urls = YoutubeIE._extract_urls(webpage) + return urls[0] if urls else None + @classmethod def extract_id(cls, url): mobj = re.match(cls._VALID_URL, url, re.VERBOSE) From 5113b6912467619bd463c5ebefe759d07078bea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 Sep 2017 00:50:25 +0700 Subject: [PATCH 0231/2417] [abcnews,chilloutsoze,cracked,vice,vk] Use dedicated YouTube embeds extraction routines --- youtube_dl/extractor/abcnews.py | 7 +++---- youtube_dl/extractor/chilloutzone.py | 9 ++++----- youtube_dl/extractor/cracked.py | 7 +++---- youtube_dl/extractor/vice.py | 7 +++---- youtube_dl/extractor/vk.py | 7 +++---- 5 files changed, 16 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index 74d54560c..f770fe901 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -7,6 +7,7 @@ import time from .amp import AMPIE from .common import InfoExtractor +from .youtube import YoutubeIE from ..compat import compat_urlparse @@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor): r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') full_video_url = compat_urlparse.urljoin(url, video_url) - youtube_url = self._html_search_regex( - r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube URL', default=None) + youtube_url = YoutubeIE._extract_url(webpage) timestamp = None date_str = self._html_search_regex( @@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor): } if youtube_url: - entries = [entry, self.url_result(youtube_url, 'Youtube')] + entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())] return self.playlist_result(entries) return entry diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index 0206d96db..d4769da75 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -5,6 +5,7 @@ import base64 import json from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( clean_html, ExtractorError @@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor): # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) if native_platform is None: - youtube_url = self._html_search_regex( - r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', - webpage, 'fallback video URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, ie='Youtube') + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or # the own CDN diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index 94d03ce2a..f77a68ece 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( parse_iso8601, str_to_int, @@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - youtube_url = self._search_regex( - r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'youtube url', default=None) + youtube_url = YoutubeIE._extract_url(webpage) if youtube_url: - return self.url_result(youtube_url, 'Youtube') + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) video_url = self._html_search_regex( [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 54e207b39..b8b8bf979 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -7,6 +7,7 @@ import hashlib import json from .adobepass import AdobePassIE +from .youtube import YoutubeIE from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -261,11 +262,9 @@ class ViceArticleIE(InfoExtractor): if embed_code: return _url_res('ooyala:%s' % embed_code, 'Ooyala') - youtube_url = self._html_search_regex( - r'<iframe[^>]+src="(.*youtube\.com/.*)"', - body, 'YouTube URL', default=None) + youtube_url = YoutubeIE._extract_url(body) if youtube_url: - return _url_res(youtube_url, 'Youtube') + return _url_res(youtube_url, YoutubeIE.ie_key()) video_url = self._html_search_regex( r'data-video-url="([^"]+)"', diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index dc2719cf9..105e172d5 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -25,6 +25,7 @@ from ..utils import ( from .dailymotion import DailymotionIE from .pladform import PladformIE from .vimeo import VimeoIE +from .youtube import YoutubeIE class VKBaseIE(InfoExtractor): @@ -345,11 +346,9 @@ class VKIE(VKBaseIE): if re.search(error_re, info_page): raise ExtractorError(error_msg % video_id, expected=True) - youtube_url = self._search_regex( - r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', - info_page, 'youtube iframe', default=None) + youtube_url = YoutubeIE._extract_url(info_page) if youtube_url: - return self.url_result(youtube_url, 'Youtube') + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: From 931edb2ada89db2bf3596ce1ad5c4d808914c7ab Mon Sep 17 00:00:00 2001 From: Olivier Bilodeau <olivier@bottomlesspit.org> Date: Fri, 8 Sep 2017 10:53:24 -0400 Subject: [PATCH 0232/2417] [radiocanada] Add fallback for title extraction --- youtube_dl/extractor/radiocanada.py | 45 ++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 6bbc2781c..b952e59b4 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -20,20 +20,37 @@ from ..utils import ( class RadioCanadaIE(InfoExtractor): IE_NAME = 'radiocanada' _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' - _TEST = { - 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', - 'info_dict': { - 'id': '7184272', - 'ext': 'mp4', - 'title': 'Le parcours du tireur capté sur vidéo', - 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', - 'upload_date': '20141023', + _TESTS = [ + { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'mp4', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } + { + # empty Title + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', + 'info_dict': { + 'id': '7754998', + 'ext': 'mp4', + 'title': 'letelejournal22h', + 'description': 'INTEGRALE WEB 22H-TJ', + 'upload_date': '20170720', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + ] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -145,7 +162,7 @@ class RadioCanadaIE(InfoExtractor): return { 'id': video_id, - 'title': get_meta('Title'), + 'title': get_meta('Title') or get_meta('AV-nomEmission'), 'description': get_meta('Description') or get_meta('ShortDescription'), 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 'duration': int_or_none(get_meta('length')), From 51aee72d16eb844377a44c12e50dbb95cd4ced27 Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Fri, 8 Sep 2017 15:13:17 +0000 Subject: [PATCH 0233/2417] [README.md] Clarify how to run extractor specific test cases --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f5d00df3..28ee63f40 100644 --- a/README.md +++ b/README.md @@ -936,6 +936,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + If you want to create a build of youtube-dl yourself, you'll need * python @@ -1003,7 +1005,7 @@ After you have ensured this site is distributing its content legally, you can fo } ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: From debed8d759e74507371758d2344ce5afe5e237c2 Mon Sep 17 00:00:00 2001 From: luceatnobis <wehrmeyer.martin@web.de> Date: Tue, 4 Jul 2017 11:26:02 +0200 Subject: [PATCH 0234/2417] [rutube:playlist] Add extractor (closes #13534) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rutube.py | 84 ++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 46a11f3ef..aefadc56f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -899,6 +899,7 @@ from .rutube import ( RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, + RutubePlaylistIE, ) from .rutv import RUTVIE from .ruutu import RuutuIE diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 889fa7628..a6b17c0ef 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -7,10 +7,14 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_str, + compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( determine_ext, unified_strdate, + try_get, + int_or_none, ) @@ -42,8 +46,24 @@ class RutubeIE(InfoExtractor): }, { 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, + }, { + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + parts = compat_urllib_parse_urlparse(url) + params = compat_parse_qs(parts.query) + + # see if URL without parameters is OK + res = super(RutubeIE, cls).suitable(url) + + if params: # we only allow pl_id parameter in the url + res = res and 'pl_id' in params and len(params) == 1 + + return res + @staticmethod def _extract_urls(webpage): return [mobj.group('url') for mobj in re.finditer( @@ -193,3 +213,67 @@ class RutubePersonIE(RutubeChannelIE): }] _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + + +class RutubePlaylistIE(InfoExtractor): + IE_NAME = 'rutube:playlist' + IE_DESC = 'Rutube playlists' + _TESTS = [{ + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'info_dict': { + 'id': '4252', + }, + 'playlist_count': 25, + }] + + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)' + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s' + + @staticmethod + def suitable(url): + params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + return params.get('pl_id') and int_or_none(params['pl_id'][0]) \ + and params.get('pl_type') + + def _real_extract(self, url): + playlist_id = self._match_id(url) + return self._extract_playlist(playlist_id) + + def _extract_playlist(self, playlist_id): + entries = [] + for pagenum in itertools.count(1): + page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum) + + # download_json will sent an accept: application/xml header + page = self._download_json(page_url, playlist_id, + "Downloading metadata for page %s" % pagenum, + headers={'Accept': 'application/json'}) + + if not page['results']: + break + + results = page['results'] + for result in results: + entry = self.url_result(result.get('video_url'), 'Rutube') + category = try_get(result, lambda x: x['category']['name']) + entry.update({ + 'id': result.get('id'), + 'uploader': try_get(result, lambda x: x['author']['name']), + 'uploader_id': try_get(result, lambda x: x['author']['id']), + 'upload_date': unified_strdate(result.get('created_ts')), + 'title': result.get('title'), + 'description': result.get('description'), + 'thumbnail': result.get('thumbnail_url'), + 'duration': int_or_none(result.get('duration')), + 'category': [category] if category else None, + 'age_limit': 18 if result.get('is_adult') else 0, + 'view_count': int_or_none(result.get('hits')), + 'is_live': result.get('is_livestream'), + 'webpage_url': result.get('video_url'), + }) + entries.append(entry) + + if page['has_next'] is False: + break + + return self.playlist_result(entries, playlist_id, page['name']) From 48b813748d91acc7e9efc15075079a03faea18ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 18:39:13 +0700 Subject: [PATCH 0235/2417] [rutube] Rework and generalize playlist extractors (closes #13565) --- youtube_dl/extractor/rutube.py | 216 ++++++++++++++++----------------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index a6b17c0ef..5a184879f 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -12,34 +12,60 @@ from ..compat import ( ) from ..utils import ( determine_ext, - unified_strdate, + unified_timestamp, try_get, int_or_none, ) -class RutubeIE(InfoExtractor): +class RutubeBaseIE(InfoExtractor): + def _extract_video(self, video, video_id=None, require_title=True): + title = video['title'] if require_title else video.get('title') + + age_limit = video.get('is_adult') + if age_limit is not None: + age_limit = 18 if age_limit is True else 0 + + uploader_id = try_get(video, lambda x: x['author']['id']) + category = try_get(video, lambda x: x['category']['name']) + + return { + 'id': video.get('id') or video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnail_url'), + 'duration': int_or_none(video.get('duration')), + 'uploader': try_get(video, lambda x: x['author']['name']), + 'uploader_id': compat_str(uploader_id) if uploader_id else None, + 'timestamp': unified_timestamp(video.get('created_ts')), + 'category': [category] if category else None, + 'age_limit': age_limit, + 'view_count': int_or_none(video.get('hits')), + 'comment_count': int_or_none(video.get('comments_count')), + 'is_live': video.get('is_livestream'), + } + + +class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': '79938ade01294ef7e27574890d0d3769', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', 'duration': 80, 'uploader': 'NTDRussian', 'uploader_id': '29790', + 'timestamp': 1381943602, 'upload_date': '20131016', 'age_limit': 0, }, - 'params': { - # It requires ffmpeg (m3u8 download) - 'skip_download': True, - }, }, { 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, @@ -49,20 +75,14 @@ class RutubeIE(InfoExtractor): }, { 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', + 'only_matching': True, }] @classmethod def suitable(cls, url): - parts = compat_urllib_parse_urlparse(url) - params = compat_parse_qs(parts.query) - - # see if URL without parameters is OK - res = super(RutubeIE, cls).suitable(url) - - if params: # we only allow pl_id parameter in the url - res = res and 'pl_id' in params and len(params) == 1 - - return res + return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) @staticmethod def _extract_urls(webpage): @@ -72,12 +92,12 @@ class RutubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + video = self._download_json( 'http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') - # Some videos don't have the author field - author = video.get('author') or {} + info = self._extract_video(video, video_id) options = self._download_json( 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, @@ -99,19 +119,8 @@ class RutubeIE(InfoExtractor): }) self._sort_formats(formats) - return { - 'id': video['id'], - 'title': video['title'], - 'description': video['description'], - 'duration': video['duration'], - 'view_count': video['hits'], - 'formats': formats, - 'thumbnail': video['thumbnail_url'], - 'uploader': author.get('name'), - 'uploader_id': compat_str(author['id']) if author else None, - 'upload_date': unified_strdate(video['created_ts']), - 'age_limit': 18 if video['is_adult'] else 0, - } + info['formats'] = formats + return info class RutubeEmbedIE(InfoExtractor): @@ -123,7 +132,8 @@ class RutubeEmbedIE(InfoExtractor): 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', - 'ext': 'mp4', + 'ext': 'flv', + 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', @@ -131,7 +141,7 @@ class RutubeEmbedIE(InfoExtractor): 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', }, 'params': { - 'skip_download': 'Requires ffmpeg', + 'skip_download': True, }, }, { 'url': 'http://rutube.ru/play/embed/8083783', @@ -145,10 +155,51 @@ class RutubeEmbedIE(InfoExtractor): canonical_url = self._html_search_regex( r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, 'Canonical URL') - return self.url_result(canonical_url, 'Rutube') + return self.url_result(canonical_url, RutubeIE.ie_key()) -class RutubeChannelIE(InfoExtractor): +class RutubePlaylistBaseIE(RutubeBaseIE): + def _next_page_url(self, page_num, playlist_id, *args, **kwargs): + return self._PAGE_TEMPLATE % (playlist_id, page_num) + + def _entries(self, playlist_id, *args, **kwargs): + next_page_url = None + for pagenum in itertools.count(1): + page = self._download_json( + next_page_url or self._next_page_url( + pagenum, playlist_id, *args, **kwargs), + playlist_id, 'Downloading page %s' % pagenum) + + results = page.get('results') + if not results or not isinstance(results, list): + break + + for result in results: + video_url = result.get('video_url') + if not video_url or not isinstance(video_url, compat_str): + continue + entry = self._extract_video(result, require_title=False) + entry.update({ + '_type': 'url', + 'url': video_url, + 'ie_key': RutubeIE.ie_key(), + }) + yield entry + + next_page_url = page.get('next') + if not next_page_url or not page.get('has_next'): + break + + def _extract_playlist(self, playlist_id, *args, **kwargs): + return self.playlist_result( + self._entries(playlist_id, *args, **kwargs), + playlist_id, kwargs.get('playlist_name')) + + def _real_extract(self, url): + return self._extract_playlist(self._match_id(url)) + + +class RutubeChannelIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' @@ -162,27 +213,8 @@ class RutubeChannelIE(InfoExtractor): _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' - def _extract_videos(self, channel_id, channel_title=None): - entries = [] - for pagenum in itertools.count(1): - page = self._download_json( - self._PAGE_TEMPLATE % (channel_id, pagenum), - channel_id, 'Downloading page %s' % pagenum) - results = page['results'] - if not results: - break - entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) - if not page['has_next']: - break - return self.playlist_result(entries, channel_id, channel_title) - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - return self._extract_videos(channel_id) - - -class RutubeMovieIE(RutubeChannelIE): +class RutubeMovieIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:movie' IE_DESC = 'Rutube movies' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' @@ -196,11 +228,11 @@ class RutubeMovieIE(RutubeChannelIE): movie = self._download_json( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') - movie_name = movie['name'] - return self._extract_videos(movie_id, movie_name) + return self._extract_playlist( + movie_id, playlist_name=movie.get('name')) -class RutubePersonIE(RutubeChannelIE): +class RutubePersonIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:person' IE_DESC = 'Rutube person videos' _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' @@ -215,65 +247,33 @@ class RutubePersonIE(RutubeChannelIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' -class RutubePlaylistIE(InfoExtractor): +class RutubePlaylistIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:playlist' IE_DESC = 'Rutube playlists' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' _TESTS = [{ - 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', 'info_dict': { - 'id': '4252', + 'id': '3097', }, - 'playlist_count': 25, + 'playlist_count': 27, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'only_matching': True, }] - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)' - _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s' + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' @staticmethod def suitable(url): params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - return params.get('pl_id') and int_or_none(params['pl_id'][0]) \ - and params.get('pl_type') + return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) + + def _next_page_url(self, page_num, playlist_id, item_kind): + return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) def _real_extract(self, url): - playlist_id = self._match_id(url) - return self._extract_playlist(playlist_id) - - def _extract_playlist(self, playlist_id): - entries = [] - for pagenum in itertools.count(1): - page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum) - - # download_json will sent an accept: application/xml header - page = self._download_json(page_url, playlist_id, - "Downloading metadata for page %s" % pagenum, - headers={'Accept': 'application/json'}) - - if not page['results']: - break - - results = page['results'] - for result in results: - entry = self.url_result(result.get('video_url'), 'Rutube') - category = try_get(result, lambda x: x['category']['name']) - entry.update({ - 'id': result.get('id'), - 'uploader': try_get(result, lambda x: x['author']['name']), - 'uploader_id': try_get(result, lambda x: x['author']['id']), - 'upload_date': unified_strdate(result.get('created_ts')), - 'title': result.get('title'), - 'description': result.get('description'), - 'thumbnail': result.get('thumbnail_url'), - 'duration': int_or_none(result.get('duration')), - 'category': [category] if category else None, - 'age_limit': 18 if result.get('is_adult') else 0, - 'view_count': int_or_none(result.get('hits')), - 'is_live': result.get('is_livestream'), - 'webpage_url': result.get('video_url'), - }) - entries.append(entry) - - if page['has_next'] is False: - break - - return self.playlist_result(entries, playlist_id, page['name']) + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + playlist_kind = qs['pl_type'][0] + playlist_id = qs['pl_id'][0] + return self._extract_playlist(playlist_id, item_kind=playlist_kind) From c7e327c4d46a9b72f3f707710194dccf6eee50d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 19:08:39 +0700 Subject: [PATCH 0236/2417] [utils] Introduce bool_or_none --- youtube_dl/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2554a2abd..c42dd4c3a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1815,6 +1815,10 @@ def float_or_none(v, scale=1, invscale=1, default=None): return default +def bool_or_none(v, default=None): + return v if isinstance(v, bool) else default + + def strip_or_none(v): return None if v is None else v.strip() From c3dd44e08577c2ae0d08951037db5d1db7a321c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 19:09:27 +0700 Subject: [PATCH 0237/2417] [rutube] Use bool_or_none --- youtube_dl/extractor/rutube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5a184879f..828c03b48 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -12,9 +12,10 @@ from ..compat import ( ) from ..utils import ( determine_ext, - unified_timestamp, - try_get, + bool_or_none, int_or_none, + try_get, + unified_timestamp, ) @@ -42,7 +43,7 @@ class RutubeBaseIE(InfoExtractor): 'age_limit': age_limit, 'view_count': int_or_none(video.get('hits')), 'comment_count': int_or_none(video.get('comments_count')), - 'is_live': video.get('is_livestream'), + 'is_live': bool_or_none(video.get('is_livestream')), } From bf6ec2fea9087235c14df2a079620fcc2c17b5eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 22:08:32 +0700 Subject: [PATCH 0238/2417] [fox] Fix extraction (#14147) --- youtube_dl/extractor/fox.py | 121 ++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 159fdf9c4..facc665f6 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -3,56 +3,99 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE from ..utils import ( - smuggle_url, - update_url_query, + int_or_none, + parse_age_limit, + parse_duration, + try_get, + unified_timestamp, ) class FOXIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.fox.com/watch/255180355939/7684182528', + _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)' + _TESTS = [{ + # clip + 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', 'md5': 'ebd296fcc41dd4b19f8115d8461a3165', 'info_dict': { - 'id': '255180355939', + 'id': '4b765a60490325103ea69888fb2bd4e8', 'ext': 'mp4', - 'title': 'Official Trailer: Gotham', - 'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.', - 'duration': 129, - 'timestamp': 1400020798, - 'upload_date': '20140513', - 'uploader': 'NEWA-FNG-FOXCOM', + 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight', + 'description': 'md5:549cd9c70d413adb32ce2a779b53b486', + 'duration': 102, + 'timestamp': 1504291893, + 'upload_date': '20170901', + 'creator': 'FOX', + 'series': 'Gotham', }, - 'add_ie': ['ThePlatform'], - } + 'params': { + 'skip_download': True, + }, + }, { + # episode, geo-restricted + 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/', + 'only_matching': True, + }, { + # episode, geo-restricted, tv provided required + 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), video_id) - fox_pdk_player = settings['fox_pdk_player'] - release_url = fox_pdk_player['release_url'] - query = { - 'mbr': 'true', - 'switch': 'http' - } - if fox_pdk_player.get('access') == 'locked': - ap_p = settings['foxAdobePassProvider'] - rating = ap_p.get('videoRating') - if rating == 'n/a': - rating = None - resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) + video = self._download_json( + 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id, + video_id, headers={ + 'apikey': 'abdcbed02c124d393b39e818a4312055', + 'Content-Type': 'application/json', + 'Referer': url, + }) - info = self._search_json_ld(webpage, video_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), + title = video['name'] + + m3u8_url = self._download_json( + video['videoRelease']['url'], video_id)['playURL'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + description = video.get('description') + duration = int_or_none(video.get('durationInSeconds')) or int_or_none( + video.get('duration')) or parse_duration(video.get('duration')) + timestamp = unified_timestamp(video.get('datePublished')) + age_limit = parse_age_limit(video.get('contentRating')) + + data = try_get( + video, lambda x: x['trackingData']['properties'], dict) or {} + + creator = data.get('brand') or data.get('network') or video.get('network') + + series = video.get('seriesName') or data.get( + 'seriesName') or data.get('show') + season_number = int_or_none(video.get('seasonNumber')) + episode = video.get('name') + episode_number = int_or_none(video.get('episodeNumber')) + release_year = int_or_none(video.get('releaseYear')) + + if data.get('authRequired'): + # TODO: AP + pass + + return { 'id': video_id, - }) - - return info + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'age_limit': age_limit, + 'creator': creator, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'release_year': release_year, + 'formats': formats, + } From b98339b54b1de517def970a955cbbdda3e1d4874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 22:15:55 +0700 Subject: [PATCH 0239/2417] [ChangeLog] Actualize --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index c439c8ef9..86b36c37e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version <unreleased> + +Core ++ [utils] Introduce bool_or_none +* [YoutubeDL] Ensure dir existence for each requested format (#14116) + +Extractors +* [fox] Fix extraction (#14147) +* [rutube] Use bool_or_none +* [rutube] Rework and generalize playlist extractors (#13565) ++ [rutube:playlist] Add support for playlists (#13534, #13565) ++ [radiocanada] Add fallback for title extraction (#14145) +* [vk] Use dedicated YouTube embeds extraction routine +* [vice] Use dedicated YouTube embeds extraction routine +* [cracked] Use dedicated YouTube embeds extraction routine +* [chilloutzone] Use dedicated YouTube embeds extraction routine +* [abcnews] Use dedicated YouTube embeds extraction routine +* [youtube] Separate methods for embeds extraction +* [redtube] Fix formats extraction (#14122) +* [arte] Relax unavailability check (#14112) ++ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059) +* [vidme:user] Relax URL regular expression (#14054) +* [bpb] Fix extraction (#14043, #14086) +* [soundcloud] Fix download URL with private tracks (#14093) +* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707) +* [viidea] Capture and output lecture error message (#14099) +* [radiocanada] Skip unsupported platforms (#14100) + + version 2017.09.02 Extractors From 806498cf2f35cc98cf0e6c5b46f58ca357a842de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 10 Sep 2017 22:16:55 +0700 Subject: [PATCH 0240/2417] release 2017.09.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 4 +++- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bd9e21983..fb934d4ba 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.02 +[debug] youtube-dl version 2017.09.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8091e7b5..333acee80 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + If you want to create a build of youtube-dl yourself, you'll need * python @@ -149,7 +151,7 @@ After you have ensured this site is distributing its content legally, you can fo } ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: diff --git a/ChangeLog b/ChangeLog index 86b36c37e..99667e5e2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.10 Core + [utils] Introduce bool_or_none diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dbec6c8dc..798a81d3c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -38,6 +38,7 @@ - **afreecatv**: afreecatv.com - **afreecatv:global**: afreecatv.com - **AirMozilla** + - **AliExpressLive** - **AlJazeera** - **Allocine** - **AlphaPorno** @@ -437,6 +438,7 @@ - **MakerTV** - **mangomolo:live** - **mangomolo:video** + - **ManyVids** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** @@ -701,6 +703,7 @@ - **rutube:embed**: Rutube embedded videos - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos + - **rutube:playlist**: Rutube playlists - **RUTV**: RUTV.RU - **Ruutu** - **Ruv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 60ed35de9..736f753df 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.02' +__version__ = '2017.09.10' From f12a6e88b2c2632b10c156eb94d91675327485f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 03:22:27 +0700 Subject: [PATCH 0241/2417] [rutube:playlist] Fix suitable (closes #14166) --- youtube_dl/extractor/rutube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 828c03b48..89d89b65a 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -265,8 +265,10 @@ class RutubePlaylistIE(RutubePlaylistBaseIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' - @staticmethod - def suitable(url): + @classmethod + def suitable(cls, url): + if not super(RutubePlaylistIE, cls).suitable(url): + return False params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) From 43df248f10548e3c43f0f02584a360136f1129d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 03:27:43 +0700 Subject: [PATCH 0242/2417] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 99667e5e2..189276408 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [rutube:playlist] Fix suitable (#14166) + + version 2017.09.10 Core From 7dacceae75d3c513f442cfd20d778a31bb35d3d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 03:30:33 +0700 Subject: [PATCH 0243/2417] release 2017.09.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fb934d4ba..f40cb2c4e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.11*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.10 +[debug] youtube-dl version 2017.09.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 189276408..c286da6c6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.11 Extractors * [rutube:playlist] Fix suitable (#14166) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 736f753df..cdcb32e06 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.10' +__version__ = '2017.09.11' From 2709d9fa28155f7abc84d3b57ce4491391d185ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 04:14:54 +0700 Subject: [PATCH 0244/2417] [animeondemand] Add support for flash videos (closes #9944) --- youtube_dl/extractor/animeondemand.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 9e28f2579..c22530778 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -46,6 +46,10 @@ class AnimeOnDemandIE(InfoExtractor): # Full length film, non-series, ger/jap, Dub/OmU, account required 'url': 'https://www.anime-on-demand.de/anime/185', 'only_matching': True, + }, { + # Flash videos + 'url': 'https://www.anime-on-demand.de/anime/12', + 'only_matching': True, }] def _login(self): @@ -120,10 +124,11 @@ class AnimeOnDemandIE(InfoExtractor): formats = [] for input_ in re.findall( - r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html): + r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html): attributes = extract_attributes(input_) + title = attributes.get('data-dialog-header') playlist_urls = [] - for playlist_key in ('data-playlist', 'data-otherplaylist'): + for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'): playlist_url = attributes.get(playlist_key) if isinstance(playlist_url, compat_str) and re.match( r'/?[\da-zA-Z]+', playlist_url): @@ -160,6 +165,23 @@ class AnimeOnDemandIE(InfoExtractor): fatal=False) if not playlist: continue + stream_url = playlist.get('streamurl') + if stream_url: + rtmp = re.search( + r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)', + stream_url) + if rtmp: + formats.append({ + 'url': rtmp.group('url'), + 'app': rtmp.group('app'), + 'play_path': rtmp.group('playpath'), + 'page_url': url, + 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf', + 'rtmp_real_time': True, + 'format_id': 'rtmp', + 'ext': 'flv', + }) + continue start_video = playlist.get('startvideo', 0) playlist = playlist.get('playlist') if not playlist or not isinstance(playlist, list): From 018cc61549f417cf1e88af46ff68a17b75e62630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 04:22:55 +0700 Subject: [PATCH 0245/2417] [animeondemand] Bypass geo restriction --- youtube_dl/extractor/animeondemand.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index c22530778..2a1cd6520 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -21,6 +21,8 @@ class AnimeOnDemandIE(InfoExtractor): _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _NETRC_MACHINE = 'animeondemand' + # German-speaking countries of Europe + _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU'] _TESTS = [{ # jap, OmU 'url': 'https://www.anime-on-demand.de/anime/161', From 2f483758bc6a6661f1215c38161ee626d90ab655 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 11 Sep 2017 04:32:35 +0700 Subject: [PATCH 0246/2417] [animeondemand] Improve and modernize --- youtube_dl/extractor/animeondemand.py | 34 +++++++++++++-------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 2a1cd6520..69d363311 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -3,16 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_str, -) +from ..compat import compat_str from ..utils import ( determine_ext, extract_attributes, ExtractorError, - sanitized_Request, urlencode_postdata, + urljoin, ) @@ -78,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Referer', self._LOGIN_URL) + post_url = urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), headers={ + 'Referer': self._LOGIN_URL, + }) if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): error = self._search_regex( @@ -154,17 +150,19 @@ class AnimeOnDemandIE(InfoExtractor): format_id_list.append(compat_str(num)) format_id = '-'.join(format_id_list) format_note = ', '.join(filter(None, (kind, lang_note))) - request = sanitized_Request( - compat_urlparse.urljoin(url, playlist_url), + item_id_list = [] + if format_id: + item_id_list.append(format_id) + item_id_list.append('videomaterial') + playlist = self._download_json( + urljoin(url, playlist_url), video_id, + 'Downloading %s JSON' % ' '.join(item_id_list), headers={ 'X-Requested-With': 'XMLHttpRequest', 'X-CSRF-Token': csrf_token, 'Referer': url, 'Accept': 'application/json, text/javascript, */*; q=0.01', - }) - playlist = self._download_json( - request, video_id, 'Downloading %s playlist JSON' % format_id, - fatal=False) + }, fatal=False) if not playlist: continue stream_url = playlist.get('streamurl') @@ -246,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor): f.update({ 'id': '%s-%s' % (f['id'], m.group('kind').lower()), 'title': m.group('title'), - 'url': compat_urlparse.urljoin(url, m.group('href')), + 'url': urljoin(url, m.group('href')), }) entries.append(f) From e7c3e33456e155106ad08347d8ab9a2ecd0c8a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 14 Sep 2017 23:19:53 +0700 Subject: [PATCH 0247/2417] [downloader/fragment] Restart inconsistent incomplete fragment downloads (#13731) --- youtube_dl/downloader/fragment.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index bccc8ecc1..6f6fb4a77 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -151,10 +151,15 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): self._read_ytdl_file(ctx) + if ctx['fragment_index'] > 0 and resume_len == 0: + self.report_error( + 'Inconsistent state of incomplete fragment download. ' + 'Restarting from the beginning...') + ctx['fragment_index'] = resume_len = 0 + self._write_ytdl_file(ctx) else: self._write_ytdl_file(ctx) - if ctx['fragment_index'] > 0: - assert resume_len > 0 + assert ctx['fragment_index'] == 0 dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) From 319fc70676fea19b71437aab4078d4d72cc1ba5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 14 Sep 2017 23:50:19 +0700 Subject: [PATCH 0248/2417] [tv4] Relax _VALID_URL (closes #14206) --- youtube_dl/extractor/tv4.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 7aeb2c620..6487039e5 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -18,7 +18,7 @@ class TV4IE(InfoExtractor): tv4\.se/(?:[^/]+)/klipp/(?:.*)-| tv4play\.se/ (?: - (?:program|barn)/(?:[^\?]+)\?video_id=| + (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)| iframe/video/| film/| sport/| @@ -63,6 +63,10 @@ class TV4IE(InfoExtractor): 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412', 'only_matching': True, }, + { + 'url': ' http://www.tv4play.se/program/farang/3922081', + 'only_matching': True, + } ] def _real_extract(self, url): From 0732a90579091ad60b124fd693bdda8ee526e305 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 14 Sep 2017 20:37:46 +0200 Subject: [PATCH 0249/2417] [orf] Add new extractor for f4m stories --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/orf.py | 116 +++++++++++++++++++++++++++-- 2 files changed, 112 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aefadc56f..a3a97e940 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -768,6 +768,7 @@ from .ora import OraTVIE from .orf import ( ORFTVthekIE, ORFFM4IE, + ORFFM4StoryIE, ORFOE1IE, ORFIPTVIE, ) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index cc296eabd..74fe8017e 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -6,14 +6,15 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - HEADRequest, - unified_strdate, - strip_jsonp, - int_or_none, - float_or_none, determine_ext, + float_or_none, + HEADRequest, + int_or_none, + orderedSet, remove_end, + strip_jsonp, unescapeHTML, + unified_strdate, ) @@ -307,3 +308,108 @@ class ORFIPTVIE(InfoExtractor): 'upload_date': upload_date, 'formats': formats, } + + +class ORFFM4StoryIE(InfoExtractor): + IE_NAME = 'orf:fm4:story' + IE_DESC = 'fm4.orf.at stories' + _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)' + + _TEST = { + 'url': 'http://fm4.orf.at/stories/2865738/', + 'playlist': [{ + 'md5': 'e1c2c706c45c7b34cf478bbf409907ca', + 'info_dict': { + 'id': '547792', + 'ext': 'flv', + 'title': 'Manu Delago und Inner Tongue live', + 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', + 'duration': 1748.52, + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20170913', + }, + }, { + 'md5': 'c6dd2179731f86f4f55a7b49899d515f', + 'info_dict': { + 'id': '547798', + 'ext': 'flv', + 'title': 'Manu Delago und Inner Tongue live (2)', + 'duration': 1504.08, + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20170913', + 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', + }, + }], + } + + def _real_extract(self, url): + story_id = self._match_id(url) + webpage = self._download_webpage(url, story_id) + + entries = [] + all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage)) + for idx, video_id in enumerate(all_ids): + data = self._download_json( + 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, + video_id)[0] + + duration = float_or_none(data['duration'], 1000) + + video = data['sources']['q8c'] + load_balancer_url = video['loadBalancerUrl'] + abr = int_or_none(video.get('audioBitrate')) + vbr = int_or_none(video.get('bitrate')) + fps = int_or_none(video.get('videoFps')) + width = int_or_none(video.get('videoWidth')) + height = int_or_none(video.get('videoHeight')) + thumbnail = video.get('preview') + + rendition = self._download_json( + load_balancer_url, video_id, transform_source=strip_jsonp) + + f = { + 'abr': abr, + 'vbr': vbr, + 'fps': fps, + 'width': width, + 'height': height, + } + + formats = [] + for format_id, format_url in rendition['redirect'].items(): + if format_id == 'rtmp': + ff = f.copy() + ff.update({ + 'url': format_url, + 'format_id': format_id, + }) + formats.append(ff) + elif determine_ext(format_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id)) + elif determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id)) + else: + continue + self._sort_formats(formats) + + title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at') + if idx >= 1: + # Titles are duplicates, make them unique + title += ' (' + str(idx + 1) + ')' + description = self._og_search_description(webpage) + upload_date = unified_strdate(self._html_search_meta( + 'dc.date', webpage, 'upload date')) + + entries.append({ + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'formats': formats, + }) + + return self.playlist_result(entries) From fad9fc537d8ab5141f402f6ccdf60161a8d0302a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 14 Sep 2017 20:47:23 +0200 Subject: [PATCH 0250/2417] [tv4] fix a test URL --- youtube_dl/extractor/tv4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 6487039e5..cfcce020a 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -64,7 +64,7 @@ class TV4IE(InfoExtractor): 'only_matching': True, }, { - 'url': ' http://www.tv4play.se/program/farang/3922081', + 'url': 'http://www.tv4play.se/program/farang/3922081', 'only_matching': True, } ] From c46680fb2a0ee61faa25863d3c10b7b098cdbe67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 01:59:47 +0700 Subject: [PATCH 0251/2417] [condenast] Fix extraction (closes #14196, closes #14207) --- youtube_dl/extractor/condenast.py | 54 ++++++++++++++++++------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 0c3f0c0e4..ed278fefc 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor): entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] return self.playlist_result(entries, playlist_title=title) - def _extract_video_params(self, webpage): - query = {} - params = self._search_regex( - r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None) - if params: - query.update({ - 'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'), - 'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'), - 'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'), - }) + def _extract_video_params(self, webpage, display_id): + query = self._parse_json( + self._search_regex( + r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params', + default='{}'), + display_id, transform_source=js_to_json, fatal=False) + if query: + query['videoId'] = self._search_regex( + r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)', + webpage, 'video id', default=None) else: params = extract_attributes(self._search_regex( r'(<[^>]+data-js="video-player"[^>]+>)', @@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor): video_id = params['videoId'] video_info = None - if params.get('playerId'): - info_page = self._download_json( - 'http://player.cnevids.com/player/video.js', - video_id, 'Downloading video info', fatal=False, query=params) - if info_page: - video_info = info_page.get('video') - if not video_info: - info_page = self._download_webpage( - 'http://player.cnevids.com/player/loader.js', - video_id, 'Downloading loader info', query=params) - else: + + # New API path + query = params.copy() + query['embedType'] = 'inline' + info_page = self._download_json( + 'http://player.cnevids.com/embed-api.json', video_id, + 'Downloading embed info', fatal=False, query=query) + + # Old fallbacks + if not info_page: + if params.get('playerId'): + info_page = self._download_json( + 'http://player.cnevids.com/player/video.js', video_id, + 'Downloading video info', fatal=False, query=params) + if info_page: + video_info = info_page.get('video') + if not video_info: + info_page = self._download_webpage( + 'http://player.cnevids.com/player/loader.js', + video_id, 'Downloading loader info', query=params) + if not video_info: info_page = self._download_webpage( 'https://player.cnevids.com/inline/video/%s.js' % video_id, video_id, 'Downloading inline info', query={ @@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor): if url_type == 'series': return self._extract_series(url, webpage) else: - params = self._extract_video_params(webpage) + params = self._extract_video_params(webpage, display_id) info = self._search_json_ld( webpage, display_id, fatal=False) info.update(self._extract_video(params)) From 86e55e317cb70f07792cfe543186ad520cbe3230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 21:45:18 +0700 Subject: [PATCH 0252/2417] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c286da6c6..38d511362 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Core +* [downloader/fragment] Restart inconsistent incomplete fragment downloads + (#13731) +* [YoutubeDL] Download raw subtitles files (#12909, #14191) + +Extractors +* [condenast] Fix extraction (#14196, #14207) ++ [orf] Add support for f4m stories +* [tv4] Relax URL regular expression (#14206) +* [animeondemand] Bypass geo restriction ++ [animeondemand] Add support for flash videos (#9944) + + version 2017.09.11 Extractors From 159d304a9fa2b91d91a60fe3bdf2211a59bcf346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 21:48:06 +0700 Subject: [PATCH 0253/2417] release 2017.09.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f40cb2c4e..98ab5b6ca 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.11*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.09.11 +[debug] youtube-dl version 2017.09.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 38d511362..041dfd7b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.09.15 Core * [downloader/fragment] Restart inconsistent incomplete fragment downloads diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 798a81d3c..6b01dc910 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -593,6 +593,7 @@ - **Openload** - **OraTV** - **orf:fm4**: radio FM4 + - **orf:fm4:story**: fm4.orf.at stories - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cdcb32e06..8399c04fe 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.09.11' +__version__ = '2017.09.15' From cbf85239bbb835162725cd4c8758831ca1003445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 22:13:30 +0700 Subject: [PATCH 0254/2417] [vgtv] Relax _VALID_URL (closes #14223) --- youtube_dl/extractor/vgtv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 0f8c156a7..c21a09c01 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -42,7 +42,7 @@ class VGTVIE(XstreamIE): ) /? (?: - \#!/(?:video|live)/| + (?:\#!/)?(?:video|live)/| embed?.*id=| articles/ )| @@ -146,7 +146,11 @@ class VGTVIE(XstreamIE): { 'url': 'abtv:140026', 'only_matching': True, - } + }, + { + 'url': 'http://www.vgtv.no/video/84196/hevnen-er-soet-episode-10-abu', + 'only_matching': True, + }, ] def _real_extract(self, url): From b763e1d68c6becc414a802a452f5aa819c5de920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 22:18:38 +0700 Subject: [PATCH 0255/2417] [twitch] Add support for go.twitch.tv URLs (closes #14215) --- youtube_dl/extractor/twitch.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 2daf9dfac..c926c99a9 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -28,7 +28,7 @@ from ..utils import ( class TwitchBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv' + _VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv' _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' @@ -217,7 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/| + (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/| player\.twitch\.tv/\?.*?\bvideo=v ) (?P<id>\d+) @@ -458,7 +458,7 @@ class TwitchStreamIE(TwitchBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?twitch\.tv/| + (?:(?:www|go)\.)?twitch\.tv/| player\.twitch\.tv/\?.*?\bchannel= ) (?P<id>[^/#?]+) @@ -489,6 +489,9 @@ class TwitchStreamIE(TwitchBaseIE): }, { 'url': 'https://player.twitch.tv/?channel=lotsofs', 'only_matching': True, + }, { + 'url': 'https://go.twitch.tv/food', + 'only_matching': True, }] @classmethod From 6be44a50edfe2e75e31553e7a128ce1849301958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 22:25:38 +0700 Subject: [PATCH 0256/2417] [dailymotion:playlist] Relax _VALID_URL (closes #14219) --- youtube_dl/extractor/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 74e991331..e9d0dd19c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): IE_NAME = 'dailymotion:playlist' - _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)' _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' _TESTS = [{ From a4245acef85ac2414e77cf2cda4cb39adb617241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 15 Sep 2017 23:12:19 +0700 Subject: [PATCH 0257/2417] [noovo] Fix extraction (closes #14214) --- youtube_dl/extractor/noovo.py | 61 ++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/noovo.py b/youtube_dl/extractor/noovo.py index f7fa098a5..974de3c3e 100644 --- a/youtube_dl/extractor/noovo.py +++ b/youtube_dl/extractor/noovo.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + js_to_json, smuggle_url, try_get, ) @@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor): 'timestamp': 1491399228, 'upload_date': '20170405', 'uploader_id': '618566855001', - 'creator': 'vtele', - 'view_count': int, 'series': 'RPM+', }, 'params': { @@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor): 'info_dict': { 'id': '5395865725001', 'title': 'Épisode 13 : Les retrouvailles', - 'description': 'md5:336d5ebc5436534e61d16e63ddfca327', + 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473', 'ext': 'mp4', 'timestamp': 1492019320, 'upload_date': '20170412', 'uploader_id': '618566855001', - 'creator': 'vtele', - 'view_count': int, 'series': "L'amour est dans le pré", 'season_number': 5, 'episode': 'Épisode 13', @@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id, - video_id)['data'] + webpage = self._download_webpage(url, video_id) - content = try_get(data, lambda x: x['contents'][0]) + bc_url = BrightcoveNewIE._extract_url(self, webpage) - brightcove_id = data.get('brightcoveId') or content['brightcoveId'] + data = self._parse_json( + self._search_regex( + r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + + title = try_get( + data, lambda x: x['video']['nom'], + compat_str) or self._html_search_meta( + 'dcterms.Title', webpage, 'title', fatal=True) + + description = self._html_search_meta( + ('dcterms.Description', 'description'), webpage, 'description') series = try_get( - data, ( - lambda x: x['show']['title'], - lambda x: x['season']['show']['title']), - compat_str) + data, lambda x: x['emission']['nom']) or self._search_regex( + r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)', + webpage, 'series', default=None) - episode = None - og = data.get('og') - if isinstance(og, dict) and og.get('type') == 'video.episode': - episode = og.get('title') + season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {} + season = try_get(season_el, lambda x: x['nom'], compat_str) + season_number = int_or_none(try_get(season_el, lambda x: x['numero'])) - video = content or data + episode_el = try_get(season_el, lambda x: x['episode'], dict) or {} + episode = try_get(episode_el, lambda x: x['nom'], compat_str) + episode_number = int_or_none(try_get(episode_el, lambda x: x['numero'])) return { '_type': 'url_transparent', 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['CA']}), - 'id': brightcove_id, - 'title': video.get('title'), - 'creator': video.get('source'), - 'view_count': int_or_none(video.get('viewsCount')), + 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}), + 'title': title, + 'description': description, 'series': series, - 'season_number': int_or_none(try_get( - data, lambda x: x['season']['seasonNumber'])), + 'season': season, + 'season_number': season_number, 'episode': episode, - 'episode_number': int_or_none(data.get('episodeNumber')), + 'episode_number': episode_number, } From 68d43a61b552007a718894967b869c0f1d8ff00f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 16 Sep 2017 12:14:48 +0800 Subject: [PATCH 0258/2417] Ignore TTML subtitles --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a5b585f43..fbf7cecb2 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ cover/ updates_key.pem *.egg-info *.srt +*.ttml *.sbv *.vtt *.flv From 3869028ffb6be6ab719e5cf1004276dfdfd1216d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 16 Sep 2017 12:18:38 +0800 Subject: [PATCH 0259/2417] [utils] Use bytes-like objects in dfxp2srt This fixes handling of non-UTF8 TTML subtitles Closes #14191 --- ChangeLog | 6 ++++++ test/test_utils.py | 26 +++++++++++++++++++++++--- youtube_dl/postprocessor/ffmpeg.py | 2 +- youtube_dl/utils.py | 18 +++++++++++------- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 041dfd7b9..ba9260e3e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Core +* [utils] Fix handling raw TTML subtitles (#14191) + + version 2017.09.15 Core diff --git a/test/test_utils.py b/test/test_utils.py index e50f3764e..efa73d0f4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1064,7 +1064,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') <p begin="3" dur="-1">Ignored, three</p> </div> </body> - </tt>''' + </tt>'''.encode('utf-8') srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols @@ -1089,7 +1089,7 @@ Line <p begin="0" end="1">The first line</p> </div> </body> - </tt>''' + </tt>'''.encode('utf-8') srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line @@ -1115,7 +1115,7 @@ The first line <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> </div> </body> -</tt>''' +</tt>'''.encode('utf-8') srt_data = '''1 00:00:02,080 --> 00:00:05,839 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> @@ -1138,6 +1138,26 @@ part 3</font></u> ''' self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data) + dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?> + <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">Line 1</p> + <p begin="1" end="2">第二行</p> + </div> + </body> + </tt>'''.encode('utf-16') + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +Line 1 + +2 +00:00:01,000 --> 00:00:02,000 +第二行 + +''' + self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data) + def test_cli_option(self): self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 51256a3fb..f71d413b5 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -585,7 +585,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): dfxp_file = old_file srt_file = subtitles_filename(filename, lang, 'srt') - with io.open(dfxp_file, 'rt', encoding='utf-8') as f: + with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) with io.open(srt_file, 'wt', encoding='utf-8') as f: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9e4492d40..b724e0b70 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2572,14 +2572,18 @@ def srt_subtitles_timecode(seconds): def dfxp2srt(dfxp_data): + ''' + @param dfxp_data A bytes-like object containing DFXP data + @returns A unicode object containing converted SRT data + ''' LEGACY_NAMESPACES = ( - ('http://www.w3.org/ns/ttml', [ - 'http://www.w3.org/2004/11/ttaf1', - 'http://www.w3.org/2006/04/ttaf1', - 'http://www.w3.org/2006/10/ttaf1', + (b'http://www.w3.org/ns/ttml', [ + b'http://www.w3.org/2004/11/ttaf1', + b'http://www.w3.org/2006/04/ttaf1', + b'http://www.w3.org/2006/10/ttaf1', ]), - ('http://www.w3.org/ns/ttml#styling', [ - 'http://www.w3.org/ns/ttml#style', + (b'http://www.w3.org/ns/ttml#styling', [ + b'http://www.w3.org/ns/ttml#style', ]), ) @@ -2674,7 +2678,7 @@ def dfxp2srt(dfxp_data): for ns in v: dfxp_data = dfxp_data.replace(ns, k) - dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) + dfxp = compat_etree_fromstring(dfxp_data) out = [] paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') From 790d379e4df2f85ece7cab02e805643234bb5c16 Mon Sep 17 00:00:00 2001 From: Windom <windom@users.noreply.github.com> Date: Sat, 16 Sep 2017 18:39:46 +0300 Subject: [PATCH 0260/2417] [morningstar] Relax _VALID_URL --- youtube_dl/extractor/morningstar.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/morningstar.py b/youtube_dl/extractor/morningstar.py index 320d27bdd..0093bcd6c 100644 --- a/youtube_dl/extractor/morningstar.py +++ b/youtube_dl/extractor/morningstar.py @@ -8,8 +8,8 @@ from .common import InfoExtractor class MorningstarIE(InfoExtractor): IE_DESC = 'morningstar.com' - _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869', 'md5': '6c0acface7a787aadc8391e4bbf7b0f5', 'info_dict': { @@ -19,7 +19,10 @@ class MorningstarIE(InfoExtractor): 'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.", 'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$' } - } + }, { + 'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 8251af63a12cd73cf2578c81dbb869232da2592c Mon Sep 17 00:00:00 2001 From: Vijay Singh <sudovijay@users.noreply.github.com> Date: Sat, 16 Sep 2017 21:15:23 +0530 Subject: [PATCH 0261/2417] [viki] Update app data (closes #14181) --- youtube_dl/extractor/viki.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index e9c8bf824..853e5c75f 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -23,9 +23,9 @@ class VikiBaseIE(InfoExtractor): _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com' _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s' - _APP = '65535a' + _APP = '100005a' _APP_VERSION = '2.2.5.1428709186' - _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' + _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad' _GEO_BYPASS = False _NETRC_MACHINE = 'viki' From 4ed2d7b7d1f67e499a46e507d957616e364565ca Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 17 Sep 2017 13:53:04 +0800 Subject: [PATCH 0262/2417] Fix flake8 issues after #14225 --- youtube_dl/extractor/common.py | 2 +- youtube_dl/utils.py | 33 +++++++++++++++++---------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 317a9a76f..2bbbf8f4d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2452,7 +2452,7 @@ class InfoExtractor(object): def _set_cookie(self, domain, name, value, expire_time=None, port=None, path='/', secure=False, discard=False, rest={}, **kwargs): cookie = compat_cookiejar.Cookie( - 0, name, value, port, not port is None, domain, True, + 0, name, value, port, port is not None, domain, True, domain.startswith('.'), path, True, secure, expire_time, discard, None, None, rest) self._downloader.cookiejar.set_cookie(cookie) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b724e0b70..acc4f987b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3830,23 +3830,23 @@ def cookie_to_dict(cookie): cookie_dict = { 'name': cookie.name, 'value': cookie.value, - }; + } if cookie.port_specified: cookie_dict['port'] = cookie.port if cookie.domain_specified: cookie_dict['domain'] = cookie.domain if cookie.path_specified: cookie_dict['path'] = cookie.path - if not cookie.expires is None: + if cookie.expires is not None: cookie_dict['expires'] = cookie.expires - if not cookie.secure is None: + if cookie.secure is not None: cookie_dict['secure'] = cookie.secure - if not cookie.discard is None: + if cookie.discard is not None: cookie_dict['discard'] = cookie.discard try: if (cookie.has_nonstandard_attr('httpOnly') or - cookie.has_nonstandard_attr('httponly') or - cookie.has_nonstandard_attr('HttpOnly')): + cookie.has_nonstandard_attr('httponly') or + cookie.has_nonstandard_attr('HttpOnly')): cookie_dict['httponly'] = True except TypeError: pass @@ -3957,7 +3957,7 @@ class PhantomJSwrapper(object): cookies = json.loads(f.read().decode('utf-8')) for cookie in cookies: if cookie['httponly'] is True: - cookie['rest'] = { 'httpOnly': None } + cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] self.extractor._set_cookie(**cookie) @@ -3965,7 +3965,7 @@ class PhantomJSwrapper(object): def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ Downloads webpage (if needed) and executes JS - + Params: url: website url html: optional, html code of website @@ -3974,11 +3974,11 @@ class PhantomJSwrapper(object): note2: optional, displayed when executing JS headers: custom http headers jscode: code to be executed when page is loaded - + Returns tuple with: * downloaded website (after JS execution) * anything you print with `console.log` (but not inside `page.execute`!) - + In most cases you don't need to add any `jscode`. It is executed in `page.onLoadFinished`. `saveAndExit();` is mandatory, use it instead of `phantom.exit()` @@ -3992,7 +3992,7 @@ class PhantomJSwrapper(object): else window.setTimeout(check, 500); } - + page.evaluate(function(){ document.querySelector('#a').click(); }); @@ -4024,13 +4024,14 @@ class PhantomJSwrapper(object): else: self.extractor.to_screen('%s: %s' % (video_id, note2)) - p = subprocess.Popen([self.exe, '--ssl-protocol=any', - self._TMP_FILES['script'].name], stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + p = subprocess.Popen([ + self.exe, '--ssl-protocol=any', + self._TMP_FILES['script'].name + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() if p.returncode != 0: - raise ExtractorError('Executing JS failed\n:' - + encodeArgument(err)) + raise ExtractorError( + 'Executing JS failed\n:' + encodeArgument(err)) with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') From 9c2a17f2ce7b2b9dc45b603be413a943f6637498 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Sep 2017 22:19:57 +0700 Subject: [PATCH 0263/2417] [popcorntv] Add extractor (closes #5914, closes #14211) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/popcorntv.py | 78 ++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 youtube_dl/extractor/popcorntv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a3a97e940..ab95c8575 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -808,6 +808,7 @@ from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, ) +from .popcorntv import PopcornTVIE from .porn91 import Porn91IE from .porncom import PornComIE from .pornflip import PornFlipIE diff --git a/youtube_dl/extractor/popcorntv.py b/youtube_dl/extractor/popcorntv.py new file mode 100644 index 000000000..ac901f426 --- /dev/null +++ b/youtube_dl/extractor/popcorntv.py @@ -0,0 +1,78 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, + unified_timestamp, +) + + +class PopcornTVIE(InfoExtractor): + _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', + 'md5': '47d65a48d147caf692ab8562fe630b45', + 'info_dict': { + 'id': '9183', + 'display_id': 'food-wars-battaglie-culinarie-episodio-01', + 'ext': 'mp4', + 'title': 'Food Wars, Battaglie Culinarie | Episodio 01', + 'description': 'md5:b8bea378faae4651d3b34c6e112463d0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1497610857, + 'upload_date': '20170616', + 'duration': 1440, + 'view_count': int, + }, + }, { + 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id, video_id = mobj.group('display_id', 'id') + + webpage = self._download_webpage(url, display_id) + + m3u8_url = extract_attributes( + self._search_regex( + r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)', + webpage, 'content' + ))['href'] + + formats = self._extract_m3u8_formats( + m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + title = self._search_regex( + r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage, + 'title', default=None) or self._og_search_title(webpage) + + description = self._html_search_regex( + r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')) + print(self._html_search_meta( + 'duration', webpage)) + duration = int_or_none(self._html_search_meta( + 'duration', webpage), invscale=60) + view_count = int_or_none(self._html_search_meta( + 'interactionCount', webpage, 'view count')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } From 4d8c4b46d5668387cc685123f80fb64bbc7c5aff Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Sun, 17 Sep 2017 15:46:52 +0000 Subject: [PATCH 0264/2417] [heise] Add support for YouTube embeds --- youtube_dl/extractor/heise.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 382f32771..495ffb7dc 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( determine_ext, int_or_none, @@ -25,6 +26,22 @@ class HeiseIE(InfoExtractor): 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', 'thumbnail': r're:^https?://.*/gallery/$', } + }, { + # YouTube embed + 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html', + 'md5': 'e403d2b43fea8e405e88e3f8623909f1', + 'info_dict': { + 'id': '6kmWbXleKW4', + 'ext': 'mp4', + 'title': 'NEU IM SEPTEMBER | Netflix', + 'description': 'md5:2131f3c7525e540d5fd841de938bd452', + 'upload_date': '20170830', + 'uploader': 'Netflix Deutschland, Österreich und Schweiz', + 'uploader_id': 'netflixdach', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 'only_matching': True, @@ -40,6 +57,16 @@ class HeiseIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('fulltitle', webpage, default=None) + if not title or title == "c't": + title = self._search_regex( + r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') + + yt_urls = YoutubeIE._extract_urls(webpage) + if yt_urls: + return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key()) + container_id = self._search_regex( r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"', webpage, 'container ID') @@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor): r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - title = self._html_search_meta('fulltitle', webpage, default=None) - if not title or title == "c't": - title = self._search_regex( - r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') - doc = self._download_xml( 'http://www.heise.de/videout/feed', video_id, query={ 'container': container_id, From 8a1a60d17397721620e75d83f2aad3a353286f15 Mon Sep 17 00:00:00 2001 From: Kareem Moussa <itstehkman@gmail.com> Date: Tue, 19 Sep 2017 08:51:20 -0700 Subject: [PATCH 0265/2417] [devscripts/check-porn] Fix gettestcases import --- devscripts/check-porn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 7a219ebe9..72b2ee422 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -14,7 +14,7 @@ import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import get_testcases +from test.helper import gettestcases from youtube_dl.utils import compat_urllib_parse_urlparse from youtube_dl.utils import compat_urllib_request @@ -24,7 +24,7 @@ if len(sys.argv) > 1: else: METHOD = 'EURISTIC' -for test in get_testcases(): +for test in gettestcases(): if METHOD == 'EURISTIC': try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() From dc76eef092dc10f7e3f599fa7d85a04de8d84b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 19 Sep 2017 23:59:36 +0700 Subject: [PATCH 0266/2417] [tvplay] Bypass geo restriction --- youtube_dl/extractor/tvplay.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 99ff82a5d..46132eda1 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -15,7 +15,9 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, + smuggle_url, try_get, + unsmuggle_url, update_url_query, ) @@ -224,6 +226,9 @@ class TVPlayIE(InfoExtractor): ] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + video_id = self._match_id(url) geo_country = self._search_regex( r'https?://[^/]+\.([a-z]{2})', url, @@ -426,4 +431,9 @@ class ViafreeIE(InfoExtractor): r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})', webpage, 'video id') - return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key()) + return self.url_result( + smuggle_url( + 'mtg:%s' % video_id, + {'geo_countries': [ + compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}), + ie=TVPlayIE.ie_key(), video_id=video_id) From 3b65a6fbf31256030ff35210a7be2b50369a6c4f Mon Sep 17 00:00:00 2001 From: capital-G <dennis.scheiba@gmx.de> Date: Tue, 19 Sep 2017 22:58:06 +0200 Subject: [PATCH 0267/2417] [twitter] Fix duration extraction --- youtube_dl/extractor/twitter.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 6eaf360a6..7399cf538 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -229,7 +229,7 @@ class TwitterCardIE(TwitterBaseIE): title = self._search_regex(r'<title>([^<]+)', webpage, 'title') thumbnail = config.get('posterImageUrl') or config.get('image_src') - duration = float_or_none(config.get('duration')) or duration + duration = float_or_none(config.get('duration'), scale=1000) or duration return { 'id': video_id, @@ -255,6 +255,7 @@ class TwitterIE(InfoExtractor): 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', + 'duration': 12.922, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -305,11 +306,12 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'Donte', + 'uploader': 'あかさ', 'uploader_id': 'jaydingeer', + 'duration': 30.0, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -337,6 +339,7 @@ class TwitterIE(InfoExtractor): 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"', 'uploader_id': 'captainamerica', 'uploader': 'Captain America', + 'duration': 3.17, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -364,6 +367,7 @@ class TwitterIE(InfoExtractor): 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', 'uploader': 'عالم الأخبار', 'uploader_id': 'news_al3alm', + 'duration': 277.4, }, 'params': { 'format': 'best[format_id^=http-]', From 12ea5c79fb0bfa878d62d130cf67057fc230dfa7 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Wed, 20 Sep 2017 14:53:06 -0500 Subject: [PATCH 0268/2417] [nbcsports:vplayer] Correct theplatform URL (closes #13873) --- youtube_dl/extractor/nbc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 62db70b43..836a41f06 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -109,10 +109,10 @@ class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z_]+)' _TESTS = [{ - 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', 'info_dict': { 'id': '9CsDKds0kvHI', - 'ext': 'flv', + 'ext': 'mp4', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', 'timestamp': 1426270238, @@ -120,7 +120,7 @@ class NBCSportsVPlayerIE(InfoExtractor): 'uploader': 'NBCU-SPORTS', } }, { - 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z', 'only_matching': True, }] @@ -134,7 +134,8 @@ class NBCSportsVPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - theplatform_url = self._og_search_video_url(webpage) + theplatform_url = self._og_search_video_url(webpage).replace( + 'vplayer.nbcsports.com', 'player.theplatform.com') return self.url_result(theplatform_url, 'ThePlatform') From f6ff52b473c9ed969fadb3e3d50852c4a27ba17e Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Wed, 20 Sep 2017 23:05:33 +0200 Subject: [PATCH 0269/2417] [beeg] Fix extraction (closes #14275) --- youtube_dl/extractor/beeg.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index d5c5822f2..bbeae4bac 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -9,6 +9,7 @@ from ..compat import ( from ..utils import ( int_or_none, parse_iso8601, + urljoin, ) @@ -36,9 +37,11 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) cpl_url = self._search_regex( - r']+src=(["\'])(?P(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1', + r']+src=(["\'])(?P(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1', webpage, 'cpl', default=None, group='url') + cpl_url = urljoin(url, cpl_url) + beeg_version, beeg_salt = [None] * 2 if cpl_url: @@ -54,7 +57,7 @@ class BeegIE(InfoExtractor): r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg salt', default=None, group='beeg_salt') - beeg_version = beeg_version or '2000' + beeg_version = beeg_version or '2185' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' video = self._download_json( From 8c6919e4331e1cd44f50e700e8fc4e630d913a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Sep 2017 23:00:35 +0700 Subject: [PATCH 0270/2417] [lynda] Add support for educourse.ga (closes #14286) --- youtube_dl/extractor/lynda.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index d2f75296a..1b6f5091d 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P\d+)|player/embed)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P\d+)|player/embed)/(?P\d+)' _TIMECODE_REGEX = r'\[(?P\d+:\d+:\d+[\.,]\d+)\]' @@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0', 'only_matching': True, + }, { + 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', + 'only_matching': True, }] def _raise_unavailable(self, video_id): @@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE): # Course link equals to welcome/introduction video link of same course # We will recognize it as course link - _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P[^/]+/[^/]+/(?P\d+))-\d\.html' + _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P[^/]+/[^/]+/(?P\d+))-\d\.html' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 8c2895305dc09920055611c8120f5a65fcd2614f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Sep 2017 02:30:03 +0800 Subject: [PATCH 0271/2417] [options] Accept lrc as a subtitle conversion target format (closes #14292) --- ChangeLog | 1 + youtube_dl/__init__.py | 2 +- youtube_dl/options.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index ba9260e3e..42ba879b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ [options] Accept lrc as a subtitle conversion target format (#14292) * [utils] Fix handling raw TTML subtitles (#14191) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c4589411e..ba684a075 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -206,7 +206,7 @@ def _real_main(argv=None): if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']: parser.error('invalid video recode format specified') if opts.convertsubtitles is not None: - if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: + if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: parser.error('invalid subtitle format specified') if opts.date is not None: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 38439c971..4c0455044 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -847,7 +847,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--convert-subs', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, - help='Convert the subtitles to other format (currently supported: srt|ass|vtt)') + help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)') parser.add_option_group(general) parser.add_option_group(network) From 2384f5a64e501d7abb844e8d31fe340b34d8d4e7 Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi Date: Wed, 6 Sep 2017 11:24:34 +0900 Subject: [PATCH 0272/2417] [mixcloud] Fix extraction (closes #14088) --- youtube_dl/compat.py | 10 +- youtube_dl/extractor/mixcloud.py | 157 ++++++++++++++++++++----------- 2 files changed, 109 insertions(+), 58 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 9e4e13bcf..2a62248ef 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -6,6 +6,7 @@ import collections import email import getpass import io +import itertools import optparse import os import re @@ -15,7 +16,6 @@ import socket import struct import subprocess import sys -import itertools import xml.etree.ElementTree @@ -2898,6 +2898,13 @@ else: compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack +try: + from future_builtins import zip as compat_zip +except ImportError: # not 2.6+ or is 3.x + try: + from itertools import izip as compat_zip # < 2.5 or 3.x + except ImportError: + compat_zip = zip __all__ = [ 'compat_HTMLParseError', @@ -2948,5 +2955,6 @@ __all__ = [ 'compat_urlretrieve', 'compat_xml_parse_error', 'compat_xpath', + 'compat_zip', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index f6360cce6..481182380 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,16 +9,16 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, - compat_str, compat_urllib_parse_unquote, compat_urlparse, + compat_zip ) from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, str_to_int, -) + try_get) class MixcloudIE(InfoExtractor): @@ -54,27 +54,19 @@ class MixcloudIE(InfoExtractor): 'only_matching': True, }] - _keys = [ - 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', - 'pleasedontdownloadourmusictheartistswontgetpaid', - 'window.addEventListener = window.addEventListener || function() {};', - '(function() { return new Date().toLocaleDateString(); })()' - ] - _current_key = None + @staticmethod + def _decrypt_xor_cipher(key, ciphertext): + """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" + return ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(k)) + for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) - # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js - def _decrypt_play_info(self, play_info, video_id): - play_info = base64.b64decode(play_info.encode('ascii')) - for num, key in enumerate(self._keys, start=1): - try: - return self._parse_json( - ''.join([ - compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)])) - for idx, ch in enumerate(play_info)]), - video_id) - except ExtractorError: - if num == len(self._keys): - raise + @staticmethod + def _decrypt_and_extend(stream_info, url_key, getter, key, formats): + maybe_url = stream_info.get(url_key) + if maybe_url is not None: + decrypted = MixcloudIE._decrypt_xor_cipher(key, base64.b64decode(maybe_url)) + formats.extend(getter(decrypted)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -84,54 +76,105 @@ class MixcloudIE(InfoExtractor): webpage = self._download_webpage(url, track_id) - if not self._current_key: - js_url = self._search_regex( - r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', - webpage, 'js url', default=None) - if js_url: - js = self._download_webpage(js_url, track_id, fatal=False) - if js: - KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' - for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'): - key = self._search_regex( - KEY_RE_TEMPLATE % key_name, js, 'key', - default=None, group='key') - if key and isinstance(key, compat_str): - self._keys.insert(0, key) - self._current_key = key + # Legacy path + encrypted_play_info = self._search_regex( + r'm-play-info="([^"]+)"', webpage, 'play info', default=None) + + if encrypted_play_info is not None: + # Decode + encrypted_play_info = base64.b64decode(encrypted_play_info) + else: + # New path + full_info_json = self._parse_json(self._html_search_regex( + r'', webpage, 'play info'), 'play info') + for item in full_info_json: + item_data = try_get(item, lambda x: x['cloudcast']['data']['cloudcastLookup']) + if try_get(item_data, lambda x: x['streamInfo']['url']): + info_json = item_data + break + else: + raise ExtractorError('Failed to extract matching stream info') message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None) - encrypted_play_info = self._search_regex( - r'm-play-info="([^"]+)"', webpage, 'play info') + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', + webpage, 'js url', default=None) + if js_url is None: + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js/www\.[^>]+\.js)', + webpage, 'js url') + js = self._download_webpage(js_url, track_id) + # Known plaintext attack + if encrypted_play_info: + kps = ['{"stream_url":'] + kpa_target = encrypted_play_info + else: + kps = ['https://', 'http://'] + kpa_target = base64.b64decode(info_json['streamInfo']['url']) + for kp in kps: + partial_key = self._decrypt_xor_cipher(kpa_target, kp) + for quote in ["'", '"']: + key = self._search_regex(r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), js, + "encryption key", default=None) + if key is not None: + break + else: + continue + break + else: + raise ExtractorError('Failed to extract encryption key') - play_info = self._decrypt_play_info(encrypted_play_info, track_id) + if encrypted_play_info is not None: + play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info') + if message and 'stream_url' not in play_info: + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + song_url = play_info['stream_url'] + formats = [{ + 'format_id': 'normal', + 'url': song_url + }] - if message and 'stream_url' not in play_info: - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') + thumbnail = self._proto_relative_url(self._html_search_regex( + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) + uploader = self._html_search_regex( + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) + uploader_id = self._search_regex( + r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) + description = self._og_search_description(webpage) + view_count = str_to_int(self._search_regex( + [r'([0-9,.]+)', + r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], + webpage, 'play count', default=None)) - song_url = play_info['stream_url'] + else: + title = info_json['name'] + thumbnail = try_get(info_json, + lambda x: 'https://thumbnailer.mixcloud.com/unsafe/600x600/' + x['picture']['urlRoot']) + uploader = try_get(info_json, lambda x: x['owner']['displayName']) + uploader_id = try_get(info_json, lambda x: x['owner']['username']) + description = try_get(info_json, lambda x: x['description']) + view_count = try_get(info_json, lambda x: x['plays']) - title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') - thumbnail = self._proto_relative_url(self._html_search_regex( - r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) - uploader = self._html_search_regex( - r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) - uploader_id = self._search_regex( - r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) - description = self._og_search_description(webpage) - view_count = str_to_int(self._search_regex( - [r'([0-9,.]+)', - r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], - webpage, 'play count', default=None)) + stream_info = info_json['streamInfo'] + formats = [] + self._decrypt_and_extend(stream_info, 'url', lambda x: [{ + 'format_id': 'normal', + 'url': x + }], key, formats) + self._decrypt_and_extend(stream_info, 'hlsUrl', lambda x: self._extract_m3u8_formats(x, title), key, + formats) + self._decrypt_and_extend(stream_info, 'dashUrl', lambda x: self._extract_mpd_formats(x, title), key, + formats) return { 'id': track_id, 'title': title, - 'url': song_url, + 'formats': formats, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, From 095774e59130c999ed8ce132f80a7164c5ee39a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 05:35:55 +0700 Subject: [PATCH 0273/2417] [mixcloud] Improve and simplify (closes #14132) --- youtube_dl/extractor/mixcloud.py | 71 ++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 481182380..f331db890 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..compat import ( compat_chr, compat_ord, + compat_str, compat_urllib_parse_unquote, compat_urlparse, compat_zip @@ -16,9 +17,12 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, + int_or_none, OnDemandPagedList, str_to_int, - try_get) + try_get, + urljoin, +) class MixcloudIE(InfoExtractor): @@ -61,13 +65,6 @@ class MixcloudIE(InfoExtractor): compat_chr(compat_ord(ch) ^ compat_ord(k)) for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) - @staticmethod - def _decrypt_and_extend(stream_info, url_key, getter, key, formats): - maybe_url = stream_info.get(url_key) - if maybe_url is not None: - decrypted = MixcloudIE._decrypt_xor_cipher(key, base64.b64decode(maybe_url)) - formats.extend(getter(decrypted)) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group(1) @@ -86,9 +83,12 @@ class MixcloudIE(InfoExtractor): else: # New path full_info_json = self._parse_json(self._html_search_regex( - r'', webpage, 'play info'), 'play info') + r'', + webpage, 'play info'), 'play info') for item in full_info_json: - item_data = try_get(item, lambda x: x['cloudcast']['data']['cloudcastLookup']) + item_data = try_get( + item, lambda x: x['cloudcast']['data']['cloudcastLookup'], + dict) if try_get(item_data, lambda x: x['streamInfo']['url']): info_json = item_data break @@ -100,13 +100,9 @@ class MixcloudIE(InfoExtractor): webpage, 'error message', default=None) js_url = self._search_regex( - r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', - webpage, 'js url', default=None) - if js_url is None: - js_url = self._search_regex( - r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js/www\.[^>]+\.js)', - webpage, 'js url') - js = self._download_webpage(js_url, track_id) + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)', + webpage, 'js url') + js = self._download_webpage(js_url, track_id, 'Downloading JS') # Known plaintext attack if encrypted_play_info: kps = ['{"stream_url":'] @@ -117,8 +113,9 @@ class MixcloudIE(InfoExtractor): for kp in kps: partial_key = self._decrypt_xor_cipher(kpa_target, kp) for quote in ["'", '"']: - key = self._search_regex(r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), js, - "encryption key", default=None) + key = self._search_regex( + r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)), + js, 'encryption key', default=None) if key is not None: break else: @@ -153,23 +150,37 @@ class MixcloudIE(InfoExtractor): else: title = info_json['name'] - thumbnail = try_get(info_json, - lambda x: 'https://thumbnailer.mixcloud.com/unsafe/600x600/' + x['picture']['urlRoot']) + thumbnail = urljoin( + 'https://thumbnailer.mixcloud.com/unsafe/600x600/', + try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str)) uploader = try_get(info_json, lambda x: x['owner']['displayName']) uploader_id = try_get(info_json, lambda x: x['owner']['username']) description = try_get(info_json, lambda x: x['description']) - view_count = try_get(info_json, lambda x: x['plays']) + view_count = int_or_none(try_get(info_json, lambda x: x['plays'])) stream_info = info_json['streamInfo'] formats = [] - self._decrypt_and_extend(stream_info, 'url', lambda x: [{ - 'format_id': 'normal', - 'url': x - }], key, formats) - self._decrypt_and_extend(stream_info, 'hlsUrl', lambda x: self._extract_m3u8_formats(x, title), key, - formats) - self._decrypt_and_extend(stream_info, 'dashUrl', lambda x: self._extract_mpd_formats(x, title), key, - formats) + + for url_key in ('url', 'hlsUrl', 'dashUrl'): + format_url = stream_info.get(url_key) + if not format_url: + continue + decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url)) + if not decrypted: + continue + if url_key == 'hlsUrl': + formats.extend(self._extract_m3u8_formats( + decrypted, track_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif url_key == 'dashUrl': + formats.extend(self._extract_mpd_formats( + decrypted, track_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'format_id': 'http', + 'url': decrypted, + }) + self._sort_formats(formats) return { 'id': track_id, From 9ce1ac404648142139f8b231c674d434ad4f9ffe Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Fri, 22 Sep 2017 22:49:48 +0000 Subject: [PATCH 0274/2417] [generic] Fix support for multiple HTML5 videos on one page (closes #14080) --- youtube_dl/extractor/generic.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b83c18380..7d0edf09c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1879,6 +1879,15 @@ class GenericIE(InfoExtractor): 'title': 'Building A Business Online: Principal Chairs Q & A', }, }, + { + # multiple HTML5 videos on one page + 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html', + 'info_dict': { + 'id': 'keyscenarios', + 'title': 'Rescue Kit 14 Free Edition - Getting started', + }, + 'playlist_count': 4, + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2849,13 +2858,20 @@ class GenericIE(InfoExtractor): # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: - for entry in entries: - entry.update({ + if len(entries) == 1: + entries[0].update({ 'id': video_id, 'title': video_title, }) + else: + for num, entry in enumerate(entries, start=1): + entry.update({ + 'id': '%s-%s' % (video_id, num), + 'title': '%s (%d)' % (video_title, num), + }) + for entry in entries: self._sort_formats(entry['formats']) - return self.playlist_result(entries) + return self.playlist_result(entries, video_id, video_title) jwplayer_data = self._find_jwplayer_data( webpage, video_id, transform_source=js_to_json) From 13de91c9e92bd831fee38fddbdabce7f6e82ef91 Mon Sep 17 00:00:00 2001 From: Dan Weber Date: Tue, 12 Sep 2017 22:52:54 -0400 Subject: [PATCH 0275/2417] [americastestkitchen] Add extractor (closes #10764) --- youtube_dl/extractor/americastestkitchen.py | 85 +++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 86 insertions(+) create mode 100755 youtube_dl/extractor/americastestkitchen.py diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py new file mode 100755 index 000000000..f231e7f6e --- /dev/null +++ b/youtube_dl/extractor/americastestkitchen.py @@ -0,0 +1,85 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class AmericasTestKitchenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/episode/(?P\d+)' + _TESTS = [{ + 'url': + 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', + 'md5': 'b861c3e365ac38ad319cfd509c30577f', + 'info_dict': { + 'id': '1_5g5zua6e', + 'title': 'atk_s17_e24.mp4', + 'ext': 'mp4', + 'description': '

    Host Julia Collin Davison goes into the test kitchen with test cook Dan Souza to learn how to make the ultimate Grill-Roasted Beef Tenderloin. Next, equipment expert Adam Ried reviews gas grills in the Equipment Corner. Then, gadget guru Lisa McManus uncovers the best quirky gadgets. Finally, test cook Erin McMurrer shows host Bridget Lancaster how to make an elegant Pear-Walnut Upside-Down Cake.

    ', + 'timestamp': 1497285541, + 'upload_date': '20170612', + 'uploader_id': 'roger.metcalf@americastestkitchen.com', + 'release_date': '2017-06-17', + 'thumbnail': 'http://d3cizcpymoenau.cloudfront.net/images/35973/e24-tenderloin-16.jpg', + 'episode_number': 24, + 'episode': 'Summer Dinner Party', + 'episode_id': '548-summer-dinner-party', + 'season_number': 17 + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': + 'https://www.americastestkitchen.com/episode/546-a-spanish-affair', + 'only_matching': + True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex( + r'partner_id/(?P\d+)', + webpage, + 'partner_id', + group='partner_id') + + video_data = self._parse_json( + self._search_regex( + r'window\.__INITIAL_STATE__\s*=\s*({.+?});\s*', + webpage, 'initial context'), + video_id) + + episode_data = video_data['episodeDetail']['content']['data'] + episode_content_meta = episode_data['full_video'] + external_id = episode_content_meta['external_id'] + + # photo data + photo_data = episode_content_meta.get('photo') + thumbnail = photo_data.get('image_url') if photo_data else None + + # meta + release_date = episode_data.get('aired_at') + description = episode_content_meta.get('description') + episode_number = int(episode_content_meta.get('episode_number')) + episode = episode_content_meta.get('title') + episode_id = episode_content_meta.get('episode_slug') + season_number = int(episode_content_meta.get('season_number')) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, external_id), + 'ie_key': 'Kaltura', + 'id': video_id, + 'release_date': release_date, + 'thumbnail': thumbnail, + 'description': description, + 'episode_number': episode_number, + 'episode': episode, + 'episode_id': episode_id, + 'season_number': season_number + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ab95c8575..585300500 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -39,6 +39,7 @@ from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amcnetworks import AMCNetworksIE +from .americastestkitchen import AmericasTestKitchenIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE from .anvato import AnvatoIE From 4bb58fa118a8c75b2ecf05f7b29a0ae27eef6239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 06:28:46 +0700 Subject: [PATCH 0276/2417] [americastestkitchen] Improve (closes #13996) --- youtube_dl/extractor/americastestkitchen.py | 82 ++++++++++----------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py index f231e7f6e..01736872d 100755 --- a/youtube_dl/extractor/americastestkitchen.py +++ b/youtube_dl/extractor/americastestkitchen.py @@ -1,85 +1,85 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + try_get, + unified_strdate, +) class AmericasTestKitchenIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/episode/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P\d+)' _TESTS = [{ - 'url': - 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', + 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', 'md5': 'b861c3e365ac38ad319cfd509c30577f', 'info_dict': { 'id': '1_5g5zua6e', - 'title': 'atk_s17_e24.mp4', + 'title': 'Summer Dinner Party', 'ext': 'mp4', - 'description': '

    Host Julia Collin Davison goes into the test kitchen with test cook Dan Souza to learn how to make the ultimate Grill-Roasted Beef Tenderloin. Next, equipment expert Adam Ried reviews gas grills in the Equipment Corner. Then, gadget guru Lisa McManus uncovers the best quirky gadgets. Finally, test cook Erin McMurrer shows host Bridget Lancaster how to make an elegant Pear-Walnut Upside-Down Cake.

    ', + 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1497285541, 'upload_date': '20170612', 'uploader_id': 'roger.metcalf@americastestkitchen.com', - 'release_date': '2017-06-17', - 'thumbnail': 'http://d3cizcpymoenau.cloudfront.net/images/35973/e24-tenderloin-16.jpg', - 'episode_number': 24, + 'release_date': '20170617', + 'series': "America's Test Kitchen", + 'season_number': 17, 'episode': 'Summer Dinner Party', - 'episode_id': '548-summer-dinner-party', - 'season_number': 17 + 'episode_number': 24, }, 'params': { - # m3u8 download 'skip_download': True, }, }, { - 'url': - 'https://www.americastestkitchen.com/episode/546-a-spanish-affair', - 'only_matching': - True, + 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) partner_id = self._search_regex( - r'partner_id/(?P\d+)', - webpage, - 'partner_id', - group='partner_id') + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id') video_data = self._parse_json( self._search_regex( - r'window\.__INITIAL_STATE__\s*=\s*({.+?});\s*', + r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*', webpage, 'initial context'), video_id) - episode_data = video_data['episodeDetail']['content']['data'] - episode_content_meta = episode_data['full_video'] - external_id = episode_content_meta['external_id'] + ep_data = try_get( + video_data, + (lambda x: x['episodeDetail']['content']['data'], + lambda x: x['videoDetail']['content']['data']), dict) + ep_meta = ep_data.get('full_video', {}) + external_id = ep_data.get('external_id') or ep_meta['external_id'] - # photo data - photo_data = episode_content_meta.get('photo') - thumbnail = photo_data.get('image_url') if photo_data else None + title = ep_data.get('title') or ep_meta.get('title') + description = clean_html(ep_meta.get('episode_description') or ep_data.get( + 'description') or ep_meta.get('description')) + thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url']) + release_date = unified_strdate(ep_data.get('aired_at')) - # meta - release_date = episode_data.get('aired_at') - description = episode_content_meta.get('description') - episode_number = int(episode_content_meta.get('episode_number')) - episode = episode_content_meta.get('title') - episode_id = episode_content_meta.get('episode_slug') - season_number = int(episode_content_meta.get('season_number')) + season_number = int_or_none(ep_meta.get('season_number')) + episode = ep_meta.get('title') + episode_number = int_or_none(ep_meta.get('episode_number')) return { '_type': 'url_transparent', 'url': 'kaltura:%s:%s' % (partner_id, external_id), 'ie_key': 'Kaltura', - 'id': video_id, - 'release_date': release_date, - 'thumbnail': thumbnail, + 'title': title, 'description': description, - 'episode_number': episode_number, + 'thumbnail': thumbnail, + 'release_date': release_date, + 'series': "America's Test Kitchen", + 'season_number': season_number, 'episode': episode, - 'episode_id': episode_id, - 'season_number': season_number + 'episode_number': episode_number, } From 5c1452e8f1e744db14be1baef840e9f531e8f144 Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Sat, 23 Sep 2017 01:38:09 +0200 Subject: [PATCH 0277/2417] [twitter] Add support for user_id-less URLs (closes #14270) --- youtube_dl/extractor/twitter.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 7399cf538..0df3ad7c7 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -242,8 +242,9 @@ class TwitterCardIE(TwitterBaseIE): class TwitterIE(InfoExtractor): IE_NAME = 'twitter' - _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P[^/]+)/status/(?P\d+)' + _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P[^/]+))/status/(?P\d+)' _TEMPLATE_URL = 'https://twitter.com/%s/status/%s' + _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s' _TESTS = [{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', @@ -322,9 +323,9 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': 'MIOxnrUteUd', 'ext': 'mp4', - 'title': 'FilmDrunk - Vine of the day', - 'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', - 'uploader': 'FilmDrunk', + 'title': 'Vince Mancini - Vine of the day', + 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', + 'uploader': 'Vince Mancini', 'uploader_id': 'Filmdrunk', 'timestamp': 1402826626, 'upload_date': '20140615', @@ -372,6 +373,21 @@ class TwitterIE(InfoExtractor): 'params': { 'format': 'best[format_id^=http-]', }, + }, { + 'url': 'https://twitter.com/i/web/status/910031516746514432', + 'info_dict': { + 'id': '910031516746514432', + 'ext': 'mp4', + 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"', + 'uploader': 'Préfet de Guadeloupe', + 'uploader_id': 'Prefet971', + 'duration': 47.48, + }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, }] def _real_extract(self, url): @@ -380,11 +396,15 @@ class TwitterIE(InfoExtractor): twid = mobj.group('id') webpage, urlh = self._download_webpage_handle( - self._TEMPLATE_URL % (user_id, twid), twid) + self._TEMPLATE_STATUSES_URL % twid, twid) if 'twitter.com/account/suspended' in urlh.geturl(): raise ExtractorError('Account suspended by Twitter.', expected=True) + if user_id is None: + mobj = re.match(self._VALID_URL, urlh.geturl()) + user_id = mobj.group('user_id') + username = remove_end(self._og_search_title(webpage), ' on Twitter') title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”') From 1c22d7a7f30917abfd2b7495f7bd02d51cb8528a Mon Sep 17 00:00:00 2001 From: Namnamseo <0201ssw+github@gmail.com> Date: Thu, 24 Aug 2017 11:32:24 +0900 Subject: [PATCH 0278/2417] [kakao] Add extractor (closes #12298) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/kakao.py | 140 +++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 youtube_dl/extractor/kakao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 585300500..4232a4fef 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -483,6 +483,7 @@ from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE +from .kakao import KakaoIE from .kaltura import KalturaIE from .kamcord import KamcordIE from .kanalplay import KanalPlayIE diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py new file mode 100644 index 000000000..0caa41e9e --- /dev/null +++ b/youtube_dl/extractor/kakao.py @@ -0,0 +1,140 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + compat_str, + unified_timestamp, +) + + +class KakaoIE(InfoExtractor): + _VALID_URL = r'https?://tv.kakao.com/channel/(?P\d+)/cliplink/(?P\d+)' + IE_NAME = 'kakao.com' + + _TESTS = [{ + 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', + 'md5': '702b2fbdeb51ad82f5c904e8c0766340', + 'info_dict': { + 'id': '301965083', + 'ext': 'mp4', + 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』', + 'uploader_id': 2671005, + 'uploader': '그랑그랑이', + 'timestamp': 1488160199, + 'upload_date': '20170227', + } + }, { + 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180', + 'md5': 'a8917742069a4dd442516b86e7d66529', + 'info_dict': { + 'id': '300103180', + 'ext': 'mp4', + 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', + 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', + 'uploader_id': 2653210, + 'uploader': '쇼 음악중심', + 'timestamp': 1485684628, + 'upload_date': '20170129', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_url = 'http://tv.kakao.com/embed/player/cliplink/' + video_id + \ + '?service=kakao_tv&autoplay=1&profile=HIGH&wmode=transparent' + player_header = {'Referer': player_url} + + impress = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/impress' % video_id, + video_id, 'Downloading video info', + query={ + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'dteType': 'PC', + 'fields': 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' + }, headers=player_header) + + clipLink = impress['clipLink'] + clip = clipLink['clip'] + + video_info = { + 'id': video_id, + 'title': clip['title'], + 'description': clip.get('description'), + 'uploader': clipLink.get('channel', {}).get('name'), + 'uploader_id': clipLink.get('channelId'), + 'duration': int_or_none(clip.get('duration')), + 'view_count': int_or_none(clip.get('playCount')), + 'like_count': int_or_none(clip.get('likeCount')), + 'comment_count': int_or_none(clip.get('commentCount')), + } + + tid = impress.get('tid', '') + raw = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, + video_id, 'Downloading video formats info', + query={ + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': 'HIGH', + 'dteType': 'PC', + }, headers=player_header, fatal=False) + + formats = [] + for fmt in raw.get('outputList', []): + try: + profile_name = fmt['profile'] + fmt_url_json = self._download_json( + 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, + video_id, 'Downloading video URL for profile %s' % profile_name, + query={ + 'service': 'kakao_tv', + 'section': '', + 'tid': tid, + 'profile': profile_name + }, headers=player_header, fatal=False) + + if fmt_url_json is None: + continue + + fmt_url = fmt_url_json['url'] + formats.append({ + 'url': fmt_url, + 'format_id': profile_name, + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'format_note': fmt.get('label'), + 'filesize': int_or_none(fmt.get('filesize')) + }) + except KeyError: + pass + + self._sort_formats(formats) + video_info['formats'] = formats + + top_thumbnail = clip.get('thumbnailUrl') + thumbs = [] + for thumb in clip.get('clipChapterThumbnailList', []): + thumbs.append({ + 'url': thumb.get('thumbnailUrl'), + 'id': compat_str(thumb.get('timeInSec')), + 'preference': -1 if thumb.get('isDefault') else 0 + }) + video_info['thumbnail'] = top_thumbnail + video_info['thumbnails'] = thumbs + + upload_date = unified_timestamp(clipLink.get('createTime')) + video_info['timestamp'] = upload_date + + return video_info From f70ddd4aebbfb0bdf2f63c1eba5b5614d2cfb70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:25:15 +0700 Subject: [PATCH 0279/2417] [kakao] Improve (closes #14007) --- youtube_dl/extractor/kakao.py | 117 ++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 0caa41e9e..c9b438efc 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -3,16 +3,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, - compat_str, unified_timestamp, + update_url_query, ) class KakaoIE(InfoExtractor): _VALID_URL = r'https?://tv.kakao.com/channel/(?P\d+)/cliplink/(?P\d+)' - IE_NAME = 'kakao.com' + _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -44,60 +45,57 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - player_url = 'http://tv.kakao.com/embed/player/cliplink/' + video_id + \ - '?service=kakao_tv&autoplay=1&profile=HIGH&wmode=transparent' - player_header = {'Referer': player_url} - - impress = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/impress' % video_id, - video_id, 'Downloading video info', - query={ - 'player': 'monet_html5', - 'referer': url, - 'uuid': '', - 'service': 'kakao_tv', - 'section': '', - 'dteType': 'PC', - 'fields': 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' - }, headers=player_header) - - clipLink = impress['clipLink'] - clip = clipLink['clip'] - - video_info = { - 'id': video_id, - 'title': clip['title'], - 'description': clip.get('description'), - 'uploader': clipLink.get('channel', {}).get('name'), - 'uploader_id': clipLink.get('channelId'), - 'duration': int_or_none(clip.get('duration')), - 'view_count': int_or_none(clip.get('playCount')), - 'like_count': int_or_none(clip.get('likeCount')), - 'comment_count': int_or_none(clip.get('commentCount')), + player_header = { + 'Referer': update_url_query( + 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, { + 'service': 'kakao_tv', + 'autoplay': '1', + 'profile': 'HIGH', + 'wmode': 'transparent', + }) } + QUERY_COMMON = { + 'player': 'monet_html5', + 'referer': url, + 'uuid': '', + 'service': 'kakao_tv', + 'section': '', + 'dteType': 'PC', + } + + query = QUERY_COMMON.copy() + query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' + impress = self._download_json( + '%s/%s/impress' % (self._API_BASE, video_id), + video_id, 'Downloading video info', + query=query, headers=player_header) + + clip_link = impress['clipLink'] + clip = clip_link['clip'] + + title = clip.get('title') or clip_link.get('displayTitle') + tid = impress.get('tid', '') + + query = QUERY_COMMON.copy() + query.update({ + 'tid': tid, + 'profile': 'HIGH', + }) raw = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw' % video_id, + '%s/%s/raw' % (self._API_BASE, video_id), video_id, 'Downloading video formats info', - query={ - 'player': 'monet_html5', - 'referer': url, - 'uuid': '', - 'service': 'kakao_tv', - 'section': '', - 'tid': tid, - 'profile': 'HIGH', - 'dteType': 'PC', - }, headers=player_header, fatal=False) + query=query, headers=player_header) formats = [] for fmt in raw.get('outputList', []): try: profile_name = fmt['profile'] fmt_url_json = self._download_json( - 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/raw/videolocation' % video_id, - video_id, 'Downloading video URL for profile %s' % profile_name, + '%s/%s/raw/videolocation' % (self._API_BASE, video_id), + video_id, + 'Downloading video URL for profile %s' % profile_name, query={ 'service': 'kakao_tv', 'section': '', @@ -119,11 +117,8 @@ class KakaoIE(InfoExtractor): }) except KeyError: pass - self._sort_formats(formats) - video_info['formats'] = formats - top_thumbnail = clip.get('thumbnailUrl') thumbs = [] for thumb in clip.get('clipChapterThumbnailList', []): thumbs.append({ @@ -131,10 +126,24 @@ class KakaoIE(InfoExtractor): 'id': compat_str(thumb.get('timeInSec')), 'preference': -1 if thumb.get('isDefault') else 0 }) - video_info['thumbnail'] = top_thumbnail - video_info['thumbnails'] = thumbs + top_thumbnail = clip.get('thumbnailUrl') + if top_thumbnail: + thumbs.append({ + 'url': top_thumbnail, + 'preference': 10, + }) - upload_date = unified_timestamp(clipLink.get('createTime')) - video_info['timestamp'] = upload_date - - return video_info + return { + 'id': video_id, + 'title': title, + 'description': clip.get('description'), + 'uploader': clip_link.get('channel', {}).get('name'), + 'uploader_id': clip_link.get('channelId'), + 'thumbnails': thumbs, + 'timestamp': unified_timestamp(clip_link.get('createTime')), + 'duration': int_or_none(clip.get('duration')), + 'view_count': int_or_none(clip.get('playCount')), + 'like_count': int_or_none(clip.get('likeCount')), + 'comment_count': int_or_none(clip.get('commentCount')), + 'formats': formats, + } From 7f4921b38d10c17fe354bab20b741b362c5ae0aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:26:40 +0700 Subject: [PATCH 0280/2417] [heise] PEP 8 --- youtube_dl/extractor/heise.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 495ffb7dc..82e11a7d8 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -60,8 +60,8 @@ class HeiseIE(InfoExtractor): title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": title = self._search_regex( - r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') + r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') yt_urls = YoutubeIE._extract_urls(webpage) if yt_urls: From 136507b39a2b48cb775249e9724eeeedb56baed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:41:22 +0700 Subject: [PATCH 0281/2417] [24video] Add support for 24video.adult (closes #14295) --- youtube_dl/extractor/twentyfourvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 7af11659f..cc51ca0ae 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -14,7 +14,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' + _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', From e3440d824a7326d0ba609d8f0896203208ecc558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Sep 2017 07:42:17 +0700 Subject: [PATCH 0282/2417] [24video] Fix timestamp extraction and make non fatal (#14295) --- youtube_dl/extractor/twentyfourvideo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index cc51ca0ae..96e0b96e3 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -60,8 +60,8 @@ class TwentyFourVideoIE(InfoExtractor): duration = int_or_none(self._og_search_property( 'duration', webpage, 'duration', fatal=False)) timestamp = parse_iso8601(self._search_regex( - r'

    ', webpage, 'title', default=None) or self._og_search_title( - webpage)).strip() + webpage, default=None)) video_id = self._html_search_regex( - r'data-video=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', group='id') - - data = self._download_json( - 'https://mediazone.vrt.be/api/v1/%s/assets/%s' - % (site_id, video_id), display_id) - - formats = [] - for target in data['targetUrls']: - format_url, format_type = target.get('url'), target.get('type') - if not format_url or not format_type: - continue - if format_type == 'HLS': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, entry_protocol='m3u8_native', - ext='mp4', preference=0, fatal=False, m3u8_id=format_type)) - elif format_type == 'HDS': - formats.extend(self._extract_f4m_formats( - format_url, display_id, f4m_id=format_type, fatal=False)) - elif format_type == 'MPEG_DASH': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id=format_type, fatal=False)) - else: - formats.append({ - 'format_id': format_type, - 'url': format_url, - }) - self._sort_formats(formats) - - subtitles = {} - subtitle_urls = data.get('subtitleUrls') - if isinstance(subtitle_urls, list): - for subtitle in subtitle_urls: - subtitle_url = subtitle.get('url') - if subtitle_url and subtitle.get('type') == 'CLOSED': - subtitles.setdefault('nl', []).append({'url': subtitle_url}) + r'data-video=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', + group='id') return { + '_type': 'url_transparent', + 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id), + 'ie_key': CanvasIE.ie_key(), 'id': video_id, 'display_id': display_id, 'title': title, 'description': self._og_search_description(webpage), - 'formats': formats, - 'duration': float_or_none(data.get('duration'), 1000), - 'thumbnail': data.get('posterImageUrl'), - 'subtitles': subtitles, } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4232a4fef..24e9acda6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -150,7 +150,10 @@ from .camdemy import ( from .camwithher import CamWithHerIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE -from .canvas import CanvasIE +from .canvas import ( + CanvasIE, + CanvasEenIE, +) from .carambatv import ( CarambaTVIE, CarambaTVPageIE, From 544ffb7790ea1ac7bbc081d2e486101c0382b900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Oct 2017 04:15:12 +0700 Subject: [PATCH 0312/2417] [ketnet] Add support for videos without direct sources (closes #14377) --- youtube_dl/extractor/ketnet.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py index fb9c2dbd4..93a98e1e0 100644 --- a/youtube_dl/extractor/ketnet.py +++ b/youtube_dl/extractor/ketnet.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +from .canvas import CanvasIE from .common import InfoExtractor @@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', - 'md5': 'd907f7b1814ef0fa285c0475d9994ed7', + 'md5': '6bdeb65998930251bbd1c510750edba9', 'info_dict': { 'id': 'zomerse-filmpjes', 'ext': 'mp4', @@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor): 'description': 'Gluur mee met Ghost Rockers op de filmset', 'thumbnail': r're:^https?://.*\.jpg$', } + }, { + # mzid in playerConfig instead of sources + 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook', + 'md5': '90139b746a0a9bd7bb631283f6e2a64e', + 'info_dict': { + 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'ext': 'flv', + 'title': 'Nachtwacht: De Greystook', + 'description': 'md5:1db3f5dc4c7109c821261e7512975be7', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1468.03, + }, + 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], }, { 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', 'only_matching': True, @@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor): 'player config'), video_id) + mzid = config.get('mzid') + if mzid: + return self.url_result( + 'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid, + CanvasIE.ie_key(), video_id=mzid) + title = config['title'] formats = [] From d2ae7e24e5c574fa45621771a134e58b21443e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Oct 2017 04:43:25 +0700 Subject: [PATCH 0313/2417] [postprocessor/ffmpeg] Convert to opus using libopus (closes #14381) --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index f71d413b5..3ea1afcf3 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -44,7 +44,7 @@ ACODECS = { 'aac': 'aac', 'flac': 'flac', 'm4a': 'aac', - 'opus': 'opus', + 'opus': 'libopus', 'vorbis': 'libvorbis', 'wav': None, } From b7e14f06a4a4fbaafc593c2f118e4b0f5d8d7937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ku=CC=88ch?= Date: Tue, 3 Oct 2017 15:17:28 +0200 Subject: [PATCH 0314/2417] Fix for JSON meta data download Added fixes according to #13651 and user @remitamine --- youtube_dl/extractor/nbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 836a41f06..35151f527 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -15,7 +15,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' + _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' _TESTS = [ { @@ -72,6 +72,7 @@ class NBCIE(AdobePassIE): def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() + permalink = 'http' + permalink video_data = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ 'filter[permalink]': permalink, From 3e4cedf9e8cd3157df2457df7274d0c842421945 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 3 Oct 2017 18:28:13 +0200 Subject: [PATCH 0315/2417] [tvn24] Relax _VALID_URL --- youtube_dl/extractor/tvn24.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 12ed6039c..6590e1fd0 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -9,7 +9,7 @@ from ..utils import ( class TVN24IE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P[^/]+)\.html' + _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'md5': 'fbdec753d7bc29d96036808275f2130c', @@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor): 'ext': 'mp4', 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', - 'thumbnail': 're:http://.*[.]jpeg', + 'thumbnail': 're:https?://.*[.]jpeg', } }, { 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', @@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor): }, { 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', 'only_matching': True, + }, { + 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158', + 'only_matching': True, }] def _real_extract(self, url): From 9524dca3accc1e60cddd141f06924ed7404db9bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Oct 2017 02:53:20 +0700 Subject: [PATCH 0316/2417] [README.md] Use revision bound link to YoutubeDL options (closes #14401) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7818e58df..2879aad24 100644 --- a/README.md +++ b/README.md @@ -1167,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From c110944fa2f21af733b4f3168764e1b008e11514 Mon Sep 17 00:00:00 2001 From: "M.K" Date: Tue, 3 Oct 2017 22:50:27 +0200 Subject: [PATCH 0317/2417] [extractor/common] Fix typo in _parse_mpd_formats --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2bbbf8f4d..a878550c2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1920,7 +1920,7 @@ class InfoExtractor(object): # can't be used at the same time if '%(Number' in media_template and 's' not in representation_ms_info: segment_duration = None - if 'total_number' not in representation_ms_info and 'segment_duration': + if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['fragments'] = [{ From 6e736d86e7d03d14279c403202fe2a632e6e5023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Oct 2017 04:27:42 +0700 Subject: [PATCH 0318/2417] [beeg] Fix extraction (closes #14403) --- youtube_dl/extractor/beeg.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index bbeae4bac..bf22a41b7 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -60,9 +60,13 @@ class BeegIE(InfoExtractor): beeg_version = beeg_version or '2185' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' - video = self._download_json( - 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), - video_id) + for api_path in ('', 'api.'): + video = self._download_json( + 'https://%sbeeg.com/api/v6/%s/video/%s' + % (api_path, beeg_version, video_id), video_id, + fatal=api_path == 'api.') + if video: + break def split(o, e): def cut(s, x): From 6b46285e850f8bc8155a93adf13920752537e55d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 4 Oct 2017 07:45:13 +0200 Subject: [PATCH 0319/2417] [comedycentral] new shortcut :theopposition for "The Opposition" show --- youtube_dl/extractor/comedycentral.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 4cac29415..d08b909a6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor): class ComedyCentralShortnameIE(InfoExtractor): - _VALID_URL = r'^:(?Ptds|thedailyshow)$' + _VALID_URL = r'^:(?Ptds|thedailyshow|theopposition)$' _TESTS = [{ 'url': ':tds', 'only_matching': True, }, { 'url': ':thedailyshow', 'only_matching': True, + }, { + 'url': ':theopposition', + 'only_matching': True, }] def _real_extract(self, url): @@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor): shortcut_map = { 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes', } return self.url_result(shortcut_map[video_id]) From cf5f6ed5be8bab252f3ded345b6b1fdf31426661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Oct 2017 00:27:24 +0700 Subject: [PATCH 0320/2417] [xvideos] Add support for embed URLs and improve extraction (closes #14409) --- youtube_dl/extractor/xvideos.py | 42 ++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index eca603028..085c8d4f3 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -14,8 +14,16 @@ from ..utils import ( class XVideosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?xvideos\.com/video| + flashservice\.xvideos\.com/embedframe/| + static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= + ) + (?P[0-9]+) + ''' + _TESTS = [{ 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'md5': '14cea69fcb84db54293b1e971466c2e1', 'info_dict': { @@ -25,21 +33,33 @@ class XVideosIE(InfoExtractor): 'duration': 108, 'age_limit': 18, } - } + }, { + 'url': 'https://flashservice.xvideos.com/embedframe/4588838', + 'only_matching': True, + }, { + 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + webpage = self._download_webpage( + 'http://www.xvideos.com/video%s/' % video_id, video_id) mobj = re.search(r'

    (.+?)

    ', webpage) if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_title = self._html_search_regex( - r'(.*?)\s+-\s+XVID', webpage, 'title') - video_thumbnail = self._search_regex( + title = self._html_search_regex( + (r'<title>(?P<title>.+?)\s+-\s+XVID', + r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + + thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) - video_duration = int_or_none(self._og_search_property( + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)', @@ -74,8 +94,8 @@ class XVideosIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': video_title, - 'duration': video_duration, - 'thumbnail': video_thumbnail, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, 'age_limit': 18, } From 6be08ce60205a65a6739667783eead56ccc34456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:13:53 +0700 Subject: [PATCH 0321/2417] [utils] Use in OnDemandPagedList by default Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m --- youtube_dl/extractor/mixcloud.py | 2 +- youtube_dl/extractor/nba.py | 2 +- youtube_dl/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index f331db890..7b2bb6e20 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -291,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): functools.partial( self._tracks_page_func, '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), - self._PAGE_SIZE, use_cache=True) + self._PAGE_SIZE) return self.playlist_result( entries, video_id, '%s (%s)' % (username, list_type), description) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 53561961c..be295a7a3 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE): playlist_title = self._og_search_title(webpage, fatal=False) entries = OnDemandPagedList( functools.partial(self._fetch_page, team, video_id), - self._PAGE_SIZE, use_cache=True) + self._PAGE_SIZE) return self.playlist_result(entries, team, playlist_title) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 92b22e639..59fb33435 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1933,7 +1933,7 @@ class PagedList(object): class OnDemandPagedList(PagedList): - def __init__(self, pagefunc, pagesize, use_cache=False): + def __init__(self, pagefunc, pagesize, use_cache=True): self._pagefunc = pagefunc self._pagesize = pagesize self._use_cache = use_cache From 7e85e8729f1e2ed9817466acef30fa6dc7e03e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:34:46 +0700 Subject: [PATCH 0322/2417] [YoutubeDL] Fix out of range --playlist-items for iterable playlists and reduce code duplication (closes #14425) --- youtube_dl/YoutubeDL.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 474d6c95f..9036f0f94 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -911,12 +911,22 @@ class YoutubeDL(object): playlistitems = iter_playlistitems(playlistitems_str) ie_entries = ie_result['entries'] + + def make_playlistitems_entries(list_ie_entries): + num_entries = len(list_ie_entries) + return [ + list_ie_entries[i - 1] for i in playlistitems + if -num_entries <= i - 1 < num_entries] + + def report_download(num_entries): + self.to_screen( + '[%s] playlist %s: Downloading %d videos' % + (ie_result['extractor'], playlist, num_entries)) + if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: - entries = [ - ie_entries[i - 1] for i in playlistitems - if -n_all_entries <= i - 1 < n_all_entries] + entries = make_playlistitems_entries(ie_entries) else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) @@ -934,20 +944,15 @@ class YoutubeDL(object): entries = ie_entries.getslice( playliststart, playlistend) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) else: # iterable if playlistitems: - entry_list = list(ie_entries) - entries = [entry_list[i - 1] for i in playlistitems] + entries = make_playlistitems_entries(list(ie_entries)) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) if self.params.get('playlistreverse', False): entries = entries[::-1] From 86a15ed64b410336b2145f4a6b8e8a22cbf24f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:41:28 +0700 Subject: [PATCH 0323/2417] [test_YoutubeDL] Add test for #14425 --- test/test_YoutubeDL.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e70cbcd37..5ac34e663 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -770,6 +770,9 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '10'}) self.assertEqual(result, []) + result = get_ids({'playlist_items': '3-10'}) + self.assertEqual(result, [3, 4]) + def test_urlopen_no_file_protocol(self): # see https://github.com/rg3/youtube-dl/issues/8227 ydl = YDL() From cd6fc19ed76af6687fb2cb5f87d9ed4c3071c203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:46:57 +0700 Subject: [PATCH 0324/2417] [YoutubeDL] Ignore duplicates in --playlist-items E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]' --- test/test_YoutubeDL.py | 3 +++ youtube_dl/YoutubeDL.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5ac34e663..db936bfb8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -773,6 +773,9 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '3-10'}) self.assertEqual(result, [3, 4]) + result = get_ids({'playlist_items': '2-4,3-4,3'}) + self.assertEqual(result, [2, 3, 4]) + def test_urlopen_no_file_protocol(self): # see https://github.com/rg3/youtube-dl/issues/8227 ydl = YDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9036f0f94..855d6b8e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( locked_file, make_HTTPS_handler, MaxDownloadsReached, + orderedSet, PagedList, parse_filesize, PerRequestProxyHandler, @@ -908,7 +909,7 @@ class YoutubeDL(object): yield int(item) else: yield int(string_segment) - playlistitems = iter_playlistitems(playlistitems_str) + playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] From ac93c09ab2c4d099a628c1ed59670bef008db89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:53:32 +0700 Subject: [PATCH 0325/2417] [xtube] Add support for embedded URLs (closes #14417) --- youtube_dl/extractor/xtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index bea9b87ad..c6c0b3291 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -18,7 +18,7 @@ class XTubeIE(InfoExtractor): _VALID_URL = r'''(?x) (?: xtube:| - https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-) + https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-) ) (?P<id>[^/?&#]+) ''' @@ -64,6 +64,9 @@ class XTubeIE(InfoExtractor): }, { 'url': 'xtube:kVTUy_G222_', 'only_matching': True, + }, { + 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big', + 'only_matching': True, }] def _real_extract(self, url): From 2e2a8e97d53d7759d663be2a1dc3f4108342ea40 Mon Sep 17 00:00:00 2001 From: Jalaz Kumar <jaykay12@users.noreply.github.com> Date: Fri, 6 Oct 2017 22:26:31 +0530 Subject: [PATCH 0326/2417] [pornflip] Extend _VALID_URL (closes #14405) --- youtube_dl/extractor/pornflip.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index a4a5d390e..ee04936e1 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -14,7 +14,7 @@ from ..utils import ( class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})' _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', @@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor): }, { 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s', + 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s', + 'only_matching': True, }] def _real_extract(self, url): From b1a7bf44b95dee2803e8638b13c4467a253b5b8f Mon Sep 17 00:00:00 2001 From: remis <r-pankevicius@users.noreply.github.com> Date: Fri, 6 Oct 2017 19:59:09 +0300 Subject: [PATCH 0327/2417] [lnkgo] Relax _VALID_URL --- youtube_dl/extractor/lnkgo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index 068378c9c..cfec0d3d0 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -11,7 +11,7 @@ from ..utils import ( class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' _TESTS = [{ 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', 'info_dict': { @@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, + }, { + 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', + 'only_matching': True, }] _AGE_LIMITS = { 'N-7': 7, From e95284754192ea4b5b4e32f1a2274e5767d980b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 00:57:09 +0700 Subject: [PATCH 0328/2417] [PULL_REQUEST_TEMPLATE.md] Add explicit entry on flake8 --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 46fa26f02..ba4ca7553 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -9,6 +9,7 @@ ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) From 665f42d8c14626404372b349624632b8d39f3c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 01:40:00 +0700 Subject: [PATCH 0329/2417] [reddit] Sort formats (closes #14430) --- youtube_dl/extractor/reddit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py index 01c85ee01..4d44b9d74 100644 --- a/youtube_dl/extractor/reddit.py +++ b/youtube_dl/extractor/reddit.py @@ -35,6 +35,8 @@ class RedditIE(InfoExtractor): 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + return { 'id': video_id, 'title': video_id, From 3fc8f5b7c239ddd366012d2f0da754fcfe247297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 05:01:38 +0700 Subject: [PATCH 0330/2417] [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 80299d522..760d09047 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version <unreleased> + +Core +* [YoutubeDL] Ignore duplicates in --playlist-items +* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and + reduce code duplication (#14425) ++ [utils] Use cache in OnDemandPagedList by default +* [postprocessor/ffmpeg] Convert to opus using libopus (#14381) + +Extractors +* [reddit] Sort formats (#14430) +* [lnkgo] Relax URL regular expression (#14423) +* [pornflip] Extend URL regular expression (#14405, #14406) ++ [xtube] Add support for embed URLs (#14417) ++ [xvideos] Add support for embed URLs and improve extraction (#14409) +* [beeg] Fix extraction (#14403) +* [tvn24] Relax URL regular expression (#14395) +* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378, + #14392, #14414, #14419, #14431) ++ [ketnet] Add support for videos without direct sources (#14377) +* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een ++ [afreecatv] Add support for adult videos (#14376) + + version 2017.10.01 Core From 8e751a185c53a81cd35900010118894fd8201b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 05:02:53 +0700 Subject: [PATCH 0331/2417] release 2017.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3be306203..9fc425ca8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.01 +[debug] youtube-dl version 2017.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 760d09047..2feb3dc2d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.10.07 Core * [YoutubeDL] Ignore duplicates in --playlist-items diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d36a07cf6..be9df7e31 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -130,7 +130,8 @@ - **CamWithHer** - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - - **Canvas**: canvas.be and een.be + - **Canvas** + - **CanvasEen**: canvas.be and een.be - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 49fa02d43..705fcade6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.01' +__version__ = '2017.10.07' From 8b561bfc9d15bf487be0e0bb5bb1d7248fd321d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 21:59:04 +0700 Subject: [PATCH 0332/2417] [youtube] Add support for hooktube.com (closes #14437) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ad2e933ee..edd8713b7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -332,6 +332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| (?:www\.)?deturl\.com/www\.youtube\.com/| (?:www\.)?pwnyoutube\.com/| + (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains From a22ccac1f02b5957f1f083a54e1f83d22a7e6f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Oct 2017 01:34:17 +0700 Subject: [PATCH 0333/2417] [fox] Delegate to uplynk:preplay (#14147) --- youtube_dl/extractor/fox.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index facc665f6..5f98d017b 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE +from .uplynk import UplynkPreplayIE +from ..compat import compat_str from ..utils import ( + HEADRequest, int_or_none, parse_age_limit, parse_duration, @@ -53,14 +56,7 @@ class FOXIE(AdobePassIE): }) title = video['name'] - - m3u8_url = self._download_json( - video['videoRelease']['url'], video_id)['playURL'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) + release_url = video['videoRelease']['url'] description = video.get('description') duration = int_or_none(video.get('durationInSeconds')) or int_or_none( @@ -84,7 +80,7 @@ class FOXIE(AdobePassIE): # TODO: AP pass - return { + info = { 'id': video_id, 'title': title, 'description': description, @@ -97,5 +93,22 @@ class FOXIE(AdobePassIE): 'episode': episode, 'episode_number': episode_number, 'release_year': release_year, - 'formats': formats, } + + urlh = self._request_webpage(HEADRequest(release_url), video_id) + video_url = compat_str(urlh.geturl()) + + if UplynkPreplayIE.suitable(video_url): + info.update({ + '_type': 'url_transparent', + 'url': video_url, + 'ie_key': UplynkPreplayIE.ie_key(), + }) + else: + m3u8_url = self._download_json(release_url, video_id)['playURL'] + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + info['formats'] = formats + return info From b0dde6686c7110c9c2515a808d803239a81e6505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksandar=20Topuzovi=C4=87?= <aleksandar.topuzovic@gmail.com> Date: Sat, 7 Oct 2017 23:40:08 +0100 Subject: [PATCH 0334/2417] [hrti] Relax _VALID_URL --- youtube_dl/extractor/hrti.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 656ce6d05..4f0369433 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE): (?: hrti:(?P<short_id>[0-9]+)| https?:// - hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? + hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? ) ''' _TESTS = [{ @@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE): }, { 'url': 'hrti:2181385', 'only_matching': True, + }, { + 'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14', + 'only_matching': True, }] def _real_extract(self, url): From 89923316210f8e17bb1a085278940e1c56fcff48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Oct 2017 21:36:50 +0700 Subject: [PATCH 0335/2417] [wdr] Relax media link regex (closes #14447) --- youtube_dl/extractor/wdr.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 8bb7362bb..621de1e1e 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor): # for wdrmaus, in a tag with the class "videoButton" (previously a link # to the page in a multiline "videoLink"-tag) json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', - webpage, 'media link', default=None, flags=re.MULTILINE) + r'''(?sx)class= + (?: + (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+| + (["\'])videoLink\b.*?\2[\s]*>\n[^\n]* + )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3 + ''', + webpage, 'media link', default=None, group='data') if not json_metadata: return From 197224b7a4e37a6581bf1a0da18d0f67ea61a476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Oct 2017 23:50:53 +0700 Subject: [PATCH 0336/2417] Fix some regexes --- youtube_dl/extractor/aenetworks.py | 2 +- youtube_dl/extractor/appletrailers.py | 4 ++-- youtube_dl/extractor/ard.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/dailymotion.py | 2 +- youtube_dl/extractor/deezer.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/hrti.py | 2 +- youtube_dl/extractor/ign.py | 2 +- youtube_dl/extractor/infoq.py | 6 +++--- youtube_dl/extractor/jeuxvideo.py | 2 +- youtube_dl/extractor/livestream.py | 2 +- youtube_dl/extractor/makertv.py | 2 +- youtube_dl/extractor/mangomolo.py | 2 +- youtube_dl/extractor/meipai.py | 2 +- youtube_dl/extractor/mtv.py | 2 +- youtube_dl/extractor/myvideo.py | 2 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/npo.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/stanfordoc.py | 4 ++-- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/thisav.py | 4 ++-- youtube_dl/extractor/twitter.py | 2 +- youtube_dl/extractor/vice.py | 2 +- youtube_dl/extractor/videopremium.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 30 files changed, 35 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 2dcdba9d2..da1b566c2 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE): r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'], webpage, 'video url', group='url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) + r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b45b431e1..a9ef733e0 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor): continue formats.append({ 'format_id': '%s-%s' % (version, size), - 'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), + 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), 'language': version[:2], @@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor): formats = [] for format in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) formats.append({ 'url': format_url, 'format': format['type'], diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 3f248b147..915f8862e 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor): title = self._html_search_regex( [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', - r'<meta name="dcterms.title" content="(.*?)"/>', + r'<meta name="dcterms\.title" content="(.*?)"/>', r'<h4 class="headline">(.*?)</h4>'], webpage, 'title') description = self._html_search_meta( diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 8b20c03d6..5525f7c9b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): m3u8_id=format_id, fatal=False)) if re.search(self._USP_RE, href): usp_formats = self._extract_m3u8_formats( - re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), + re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) for f in usp_formats: diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index e9d0dd19c..21a2d0239 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): # vevo embed vevo_id = self._search_regex( - r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)', + r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)', webpage, 'vevo embed', default=None) if vevo_id: return self.url_result('vevo:%s' % vevo_id, 'Vevo') diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index ec87b94db..a38b2683d 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor): 'id': '176747451', 'title': 'Best!', 'uploader': 'Anonymous', - 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', + 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$', }, 'playlist_count': 30, 'skip': 'Only available in .de', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 0a70ca763..7fa271b51 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) - info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') + info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info') info = json.loads(info_json) return { diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1721a3dbd..68b633839 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2206,7 +2206,7 @@ class GenericIE(InfoExtractor): # And then there are the jokers who advertise that they use RTA, # but actually don't. AGE_LIMIT_MARKERS = [ - r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>', + r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', ] if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS): age_limit = 18 diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 427499b11..6b927bb44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor): 'width': int(width), 'height': int(height), } for width, height, video_url in re.findall( - r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)] + r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)] self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 4f0369433..7cef5f6ce 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index c1367cf51..a96ea8010 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -203,7 +203,7 @@ class PCMagIE(IGNIE): _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)' IE_NAME = 'pcmag' - _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]' + _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]' _TESTS = [{ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index fe425e786..57c9b0cc4 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -69,9 +69,9 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_cookies(self, webpage): - policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') - signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') - key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') + signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') + key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( policy, signature, key_pair_id) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 1a4227f6b..e9f4ed738 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor): webpage = self._download_webpage(url, title) title = self._html_search_meta('name', webpage) or self._og_search_title(webpage) config_url = self._html_search_regex( - r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"', + r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"', webpage, 'config URL') config_url = 'http://www.jeuxvideo.com' + config_url diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 7f946c6ed..317ebbc4e 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor): info = { 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), + 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), } video_data = self._download_json(stream_url, content_id) is_live = video_data.get('isLive') diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py index 3c34d4604..8eda69cfc 100644 --- a/youtube_dl/extractor/makertv.py +++ b/youtube_dl/extractor/makertv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' + _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' _TEST = { 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 1885ac7df..dbd761a67 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor): format_url = self._html_search_regex( [ - r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r'<a[^>]+href="(rtsp://[^"]+)"' ], webpage, 'format url') formats = self._extract_wowza_formats( diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index c8eacb4f4..2445b8b39 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -11,7 +11,7 @@ from ..utils import ( class MeipaiIE(InfoExtractor): IE_DESC = '美拍' - _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)' _TESTS = [{ # regular uploaded video 'url': 'http://www.meipai.com/media/531697625', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 25af5ddfd..1154a3536 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid is None or ':' not in mgid: mgid = self._search_regex( - [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], + [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], webpage, 'mgid', default=None) if not mgid: diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 6bb64eb63..367e811db 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor): else: video_playpath = '' - video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') + video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj') video_swfobj = compat_urllib_parse_unquote(video_swfobj) video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index b91d86528..9e8d28f48 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): release_url = self._search_regex( r'video_auth_playlist_url\s*=\s*"([^"]+)"', webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path') + theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') video_id = theplatform_path.split('/')[-1] query = { 'mbr': 'true', diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index e8131333f..2047d4402 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -43,7 +43,7 @@ class NaverIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', + m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: error = self._html_search_regex( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index fa4ef20c5..b8fe24407 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): IE_NAME = 'hetklokhuis' - _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' _TEST = { 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 2b830cf47..3c8053a26 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor): video_url = self._html_search_regex( r'<param name="src" value="([^"]+)"', webpage, 'video url') title = self._html_search_regex( - r'<title>([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', + r'([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', webpage, 'title') description = self._html_search_regex( r'(?s)
    (.+?)', diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index cce65fb10..ae3dd1380 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): r'(?s)([^<]+)', coursepage, 'description', fatal=False) - links = orderedSet(re.findall(r'', coursepage)) + links = orderedSet(re.findall(r'', coursepage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] @@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor): rootpage = self._download_webpage(rootURL, info['id'], errnote='Unable to download course info page') - links = orderedSet(re.findall(r'', rootpage)) + links = orderedSet(re.findall(r'', rootpage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index de236bbba..b1a985ff6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): def hex_to_bytes(hex): return binascii.a2b_hex(hex.encode('ascii')) - relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) + relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1) clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 33683b139..dc3dd03c8 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor): info_dict = self._extract_jwplayer_data( webpage, video_id, require_title=False) uploader = self._html_search_regex( - r': ([^<]+)', + r': ([^<]+)', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( - r': (?:[^<]+)', + r': (?:[^<]+)', webpage, 'uploader id', fatal=False) info_dict.update({ diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 0df3ad7c7..1b0b96371 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE): webpage = self._download_webpage(url, video_id) iframe_url = self._html_search_regex( - r']+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', + r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', webpage, 'video iframe', default=None) if iframe_url: return self.url_result(iframe_url) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index b8b8bf979..bcc28693a 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor): class ViceArticleIE(InfoExtractor): IE_NAME = 'vice:article' - _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' + _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P[^?#]+)' _TESTS = [{ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py index 5de8273c3..cf690d7b0 100644 --- a/youtube_dl/extractor/videopremium.py +++ b/youtube_dl/extractor/videopremium.py @@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor): webpage_url = 'http://videopremium.tv/' + video_id webpage = self._download_webpage(webpage_url, video_id) - if re.match(r'^]*>window.location\s*=', webpage): + if re.match(r'^]*>window\.location\s*=', webpage): # Download again, we need a cookie webpage = self._download_webpage( webpage_url, video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index edd8713b7..54f5d7279 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1683,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_uploader_id = None video_uploader_url = None mobj = re.search( - r'', + r'', video_webpage) if mobj is not None: video_uploader_id = mobj.group('uploader_id') From ae5af89079558e0e128dc4d7f033459e68ad81e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Oct 2017 23:52:39 +0700 Subject: [PATCH 0337/2417] [hrti:playlist] Relax _VALID_URL --- youtube_dl/extractor/hrti.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 7cef5f6ce..6424d34ac 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P[0-9]+)/(?P[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P[0-9]+)/(?P[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { @@ -185,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE): }, { 'url': 'https://hrti.hrt.hr/#/video/list/category/212/', 'only_matching': True, + }, { + 'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena', + 'only_matching': True, }] def _real_extract(self, url): From 9e71f88105e546573b8615d49f9d0c064496d6e6 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 9 Oct 2017 22:48:26 +0200 Subject: [PATCH 0338/2417] [vvvvid] Fix typo --- youtube_dl/extractor/vvvvid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index d44ec85fd..656a4b9e5 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -133,7 +133,7 @@ class VVVVIDIE(InfoExtractor): 'season_id': season_id, 'season_number': video_data.get('season_number'), 'episode_id': str_or_none(video_data.get('id')), - 'epidode_number': int_or_none(video_data.get('number')), + 'episode_number': int_or_none(video_data.get('number')), 'episode_title': video_data['title'], 'view_count': int_or_none(video_data.get('views')), 'like_count': int_or_none(video_data.get('video_likes')), From 01c742ecd09be734ca4f3db08aba73424683ac1b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 10 Oct 2017 23:20:38 +0800 Subject: [PATCH 0339/2417] [facebook] Support thumbnails (closes #14416) --- ChangeLog | 6 ++++++ youtube_dl/extractor/facebook.py | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2feb3dc2d..2684cbcdd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [facebook] Support thumbnails (#14416) + + version 2017.10.07 Core diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 4b3f6cc86..220ada3a6 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor): 'uploader': 'Tennis on Facebook', 'upload_date': '20140908', 'timestamp': 1410199200, - } + }, + 'skip': 'Requires logging in', }, { - 'note': 'Video without discernible title', 'url': 'https://www.facebook.com/video.php?v=274175099429670', 'info_dict': { 'id': '274175099429670', @@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, + 'thumbnail': r're:^https?://.*', }, 'expected_warnings': [ 'title' @@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor): 'upload_date': '20160110', 'timestamp': 1452431627, }, + 'skip': 'Requires logging in', }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6', @@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '10153664894881749', 'ext': 'mp4', - 'title': 'Facebook video #10153664894881749', + 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', + 'thumbnail': r're:^https?://.*', + 'timestamp': 1456259628, + 'upload_date': '20160223', + 'uploader': 'Barack Obama', }, }, { # have 1080P, but only up to 720p in swf params @@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', - 'title': 'Holocaust survivor becomes US citizen', + 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...', 'timestamp': 1477818095, 'upload_date': '20161030', 'uploader': 'CNN', + 'thumbnail': r're:^https?://.*', }, }, { # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall @@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477305000, 'upload_date': '20161024', 'uploader': 'La Guía Del Varón', + 'thumbnail': r're:^https?://.*', }, 'params': { 'skip_download': True, @@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) + thumbnail = self._og_search_thumbnail(webpage) info_dict = { 'id': video_id, @@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor): 'formats': formats, 'uploader': uploader, 'timestamp': timestamp, + 'thumbnail': thumbnail, } return webpage, info_dict From d0f2d6411406a35c9593bbb85375c2d7f8300c77 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 10 Oct 2017 18:45:10 +0200 Subject: [PATCH 0340/2417] [slideslive] Add extractor (closes #2680) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/slideslive.py | 34 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/slideslive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24e9acda6..d0f71aecd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -943,6 +943,7 @@ from .skynewsarabia import ( ) from .skysports import SkySportsIE from .slideshare import SlideshareIE +from .slideslive import SlidesLiveIE from .slutload import SlutloadIE from .smotri import ( SmotriIE, diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py new file mode 100644 index 000000000..104576033 --- /dev/null +++ b/youtube_dl/extractor/slideslive.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SlidesLiveIE(InfoExtractor): + _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', + 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', + 'info_dict': { + 'id': 'LMtgR8ba0b0', + 'ext': 'mp4', + 'title': '38902413: external video', + 'description': '3890241320170925-9-1yd6ech.mp4', + 'uploader': 'SlidesLive Administrator', + 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', + 'upload_date': '20170925', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + url, video_id, headers={'Accept': 'application/json'}) + service_name = video_data['video_service_name'] + if service_name == 'YOUTUBE': + yt_video_id = video_data['video_service_id'] + return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + else: + raise ExtractorError( + 'Unsupported service name: {0}'.format(service_name), expected=True) From 04af3aca049588b6b3d4d4b57ee47224fdeee90f Mon Sep 17 00:00:00 2001 From: Khang Nguyen Date: Thu, 5 Oct 2017 21:37:18 +0700 Subject: [PATCH 0341/2417] Remove YoutubeSharedVideoIE https://github.com/rg3/youtube-dl/issues/14303 --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/youtube.py | 33 ------------------------------ 2 files changed, 34 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0f71aecd..d96eafbc3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1346,7 +1346,6 @@ from .youtube import ( YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE, - YoutubeSharedVideoIE, YoutubeShowIE, YoutubeSubscriptionsIE, YoutubeTruncatedIDIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 54f5d7279..6e2d57d6a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2040,39 +2040,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } -class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' - IE_NAME = 'youtube:shared' - - _TEST = { - 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', - 'info_dict': { - 'id': 'uPDB5I9wfp8', - 'ext': 'webm', - 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', - 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', - 'upload_date': '20160219', - 'uploader': 'Pocoyo - Português (BR)', - 'uploader_id': 'PocoyoBrazil', - }, - 'add_ie': ['Youtube'], - 'params': { - # There are already too many Youtube downloads - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - real_video_id = self._html_search_meta( - 'videoId', webpage, 'YouTube video id', fatal=True) - - return self.url_result(real_video_id, YoutubeIE.ie_key()) - - class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: From dfc80bdd2e4ef3d30f161a93f99f3050537944ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 11 Oct 2017 02:03:00 +0800 Subject: [PATCH 0342/2417] [ChangeLog] Update after #14420 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 2684cbcdd..c541c4f62 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +- [youtube:shared] Removed extractor (#14420) + [facebook] Support thumbnails (#14416) From cdab1df91242fb617b09a31c023822ef31ea37b8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 10:04:46 +0000 Subject: [PATCH 0343/2417] [afreecatv] remove AfreecaTVGlobalIE the website now show this message > Global AfreecaTV will be merged and integrated on July 20th, 2017. Every user around the world are now able to interact with one another on www.afreecatv.com! --- youtube_dl/extractor/afreecatv.py | 104 ----------------------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 1 insertion(+), 108 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 2c58f4617..e6513c7a4 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -271,107 +271,3 @@ class AfreecaTVIE(InfoExtractor): }) return info - - -class AfreecaTVGlobalIE(AfreecaTVIE): - IE_NAME = 'afreecatv:global' - _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P\d+)(?:/v/(?P\d+))?' - _TESTS = [{ - 'url': 'http://afreeca.tv/36853014/v/58301', - 'info_dict': { - 'id': '58301', - 'title': 'tryhard top100', - 'uploader_id': '36853014', - 'uploader': 'makgi Hearthstone Live!', - }, - 'playlist_count': 3, - }] - - def _real_extract(self, url): - channel_id, video_id = re.match(self._VALID_URL, url).groups() - video_type = 'video' if video_id else 'live' - query = { - 'pt': 'view', - 'bid': channel_id, - } - if video_id: - query['vno'] = video_id - video_data = self._download_json( - 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type), - video_id or channel_id, query=query)['channel'] - - if video_data.get('result') != 1: - raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg'])) - - title = video_data['title'] - - info = { - 'thumbnail': video_data.get('thumb'), - 'view_count': int_or_none(video_data.get('vcnt')), - 'age_limit': int_or_none(video_data.get('grade')), - 'uploader_id': channel_id, - 'uploader': video_data.get('cname'), - } - - if video_id: - entries = [] - for i, f in enumerate(video_data.get('flist', [])): - video_key = self.parse_video_key(f.get('key', '')) - f_url = f.get('file') - if not video_key or not f_url: - continue - entries.append({ - 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), - 'title': title, - 'upload_date': video_key.get('upload_date'), - 'duration': int_or_none(f.get('length')), - 'url': f_url, - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - - info.update({ - 'id': video_id, - 'title': title, - 'duration': int_or_none(video_data.get('length')), - }) - if len(entries) > 1: - info['_type'] = 'multi_video' - info['entries'] = entries - elif len(entries) == 1: - i = entries[0].copy() - i.update(info) - info = i - else: - formats = [] - for s in video_data.get('strm', []): - s_url = s.get('purl') - if not s_url: - continue - stype = s.get('stype') - if stype == 'HLS': - formats.extend(self._extract_m3u8_formats( - s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False)) - elif stype == 'RTMP': - format_id = [stype] - label = s.get('label') - if label: - format_id.append(label) - formats.append({ - 'format_id': '-'.join(format_id), - 'url': s_url, - 'tbr': int_or_none(s.get('bps')), - 'height': int_or_none(s.get('brt')), - 'ext': 'flv', - 'rtmp_live': True, - }) - self._sort_formats(formats) - - info.update({ - 'id': channel_id, - 'title': self._live_title(title), - 'is_live': True, - 'formats': formats, - }) - - return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d96eafbc3..a363d95bf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -31,10 +31,7 @@ from .aenetworks import ( AENetworksIE, HistoryTopicIE, ) -from .afreecatv import ( - AfreecaTVIE, - AfreecaTVGlobalIE, -) +from .afreecatv import AfreecaTVIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From 4fe4bda287f4b506850f0baa6ff86445423751e7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 11:36:05 +0000 Subject: [PATCH 0344/2417] [tubitv] add support for new url format(fixes #14460) --- youtube_dl/extractor/tubitv.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index c44018aec..36f6c1673 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -13,11 +13,11 @@ from ..utils import ( class TubiTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P[0-9]+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' _GEO_COUNTRIES = ['US'] - _TEST = { + _TESTS = [{ 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', 'info_dict': { @@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor): 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', }, - } + }, { + 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories', + 'only_matching': True, + }, { + 'url': 'http://tubitv.com/movies/383676/tracker', + 'only_matching': True, + }] def _login(self): (username, password) = self._get_login_info() From 5fe75f976f6c8da76bfea68e073e5f35c4300442 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 14:14:51 +0000 Subject: [PATCH 0345/2417] [tva] fix extraction(fixes #14328) --- youtube_dl/extractor/tva.py | 48 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py index 3ced098f9..b57abeaa4 100644 --- a/youtube_dl/extractor/tva.py +++ b/youtube_dl/extractor/tva.py @@ -3,52 +3,50 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - int_or_none, - parse_iso8601, + float_or_none, smuggle_url, ) class TVAIE(InfoExtractor): - _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P\d+)' + _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P\d+)' _TEST = { - 'url': 'http://videos.tva.ca/episode/85538', + 'url': 'https://videos.tva.ca/details/_5596811470001', 'info_dict': { - 'id': '85538', + 'id': '5596811470001', 'ext': 'mp4', - 'title': 'Épisode du 25 janvier 2017', - 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98', - 'upload_date': '20170126', - 'timestamp': 1485442329, + 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !', + 'uploader_id': '5481942443001', + 'upload_date': '20171003', + 'timestamp': 1507064617, }, 'params': { # m3u8 download 'skip_download': True, } } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s' def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id, - video_id, query={ - '$expand': 'Metadata,CustomId', - '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName', - '$format': 'json', + 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ + 'Accept': 'application/json', }) - metadata = video_data.get('Metadata', {}) + + def get_attribute(key): + for attribute in video_data.get('attributes', []): + if attribute.get('key') == key: + return attribute.get('value') + return None return { '_type': 'url_transparent', 'id': video_id, - 'title': video_data['Title'], - 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}), - 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'), - 'series': video_data.get('ShowName'), - 'episode': metadata.get('EpisodeTitle'), - 'episode_number': int_or_none(metadata.get('EpisodeNumber')), - 'categories': video_data.get('Categories'), - 'average_rating': video_data.get('AverageUserRating'), - 'timestamp': parse_iso8601(video_data.get('CreatedDate')), - 'ie_key': 'Ooyala', + 'title': get_attribute('title'), + 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), + 'description': get_attribute('description'), + 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'), + 'duration': float_or_none(get_attribute('video-duration'), 1000), + 'ie_key': 'BrightcoveNew', } From 26bae2d96509b5c5ec80c27c1ea754b53c53818c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Oct 2017 21:59:30 +0700 Subject: [PATCH 0346/2417] [generic] Add support for channel9 embeds (closes #14469) --- youtube_dl/extractor/channel9.py | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index e92894246..81108e704 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor): _RSS_URL = 'http://channel9.msdn.com/%s/RSS' + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', + webpage) + def _extract_list(self, video_id, rss_url=None): if not rss_url: rss_url = self._RSS_URL % video_id diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 68b633839..6dab4c7f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -101,6 +101,7 @@ from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE +from .channel9 import Channel9IE class GenericIE(InfoExtractor): @@ -2871,6 +2872,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + channel9_urls = Channel9IE._extract_urls(webpage) + if channel9_urls: + return self.playlist_from_matches( + channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): From 782195a9d417aab21cff4abe35ad9d705f4d8d83 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 15:48:32 +0000 Subject: [PATCH 0347/2417] [once] add support for vmap urls --- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/once.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 00d311158..02804d297 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -105,7 +105,7 @@ class GameSpotIE(OnceIE): onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') if onceux_url: formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 1bf96ea56..a637c8ecf 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class OnceIE(InfoExtractor): - _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' + _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' From 9e38dbb19ca6874c7350647ea2883d5bbb3b50a1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 15:50:00 +0000 Subject: [PATCH 0348/2417] [voxmedia] add support for recode.net(fixes #14173) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/voxmedia.py | 66 ++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a363d95bf..5629c7623 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1244,7 +1244,10 @@ from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voot import VootIE -from .voxmedia import VoxMediaIE +from .voxmedia import ( + VoxMediaVolumeIE, + VoxMediaIE, +) from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index f8e331493..c7a0a88fe 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -2,11 +2,44 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .once import OnceIE from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError + + +class VoxMediaVolumeIE(OnceIE): + _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P[0-9a-f]{9})' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = self._parse_json(self._search_regex( + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id) + for provider_video_type in ('ooyala', 'youtube', 'brightcove'): + provider_video_id = video_data.get('%s_id' % provider_video_type) + if not provider_video_id: + continue + info = { + 'id': video_id, + 'title': video_data.get('title_short'), + 'description': video_data.get('description_long') or video_data.get('description_short'), + 'thumbnail': video_data.get('brightcove_thumbnail') + } + if provider_video_type == 'brightcove': + info['formats'] = self._extract_once_formats(provider_video_id) + self._sort_formats(info['formats']) + else: + info.update({ + '_type': 'url_transparent', + 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'ie_key': provider_video_type.capitalize(), + }) + return info + raise ExtractorError('Unable to find provider video id') class VoxMediaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'info_dict': { @@ -31,6 +64,7 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', }, 'add_ie': ['Ooyala'], + 'skip': 'Video Not Found', }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -84,6 +118,17 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:e02d56b026d51aa32c010676765a690d', }, }], + }, { + # volume embed, Brightcove Once + 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya', + 'md5': '01571a896281f77dc06e084138987ea2', + 'info_dict': { + 'id': '1231c973d', + 'ext': 'mp4', + 'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella', + 'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.', + }, + 'add_ie': ['VoxMediaVolume'], }] def _real_extract(self, url): @@ -91,9 +136,14 @@ class VoxMediaIE(InfoExtractor): webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) def create_entry(provider_video_id, provider_video_type, title=None, description=None): + video_url = { + 'youtube': '%s', + 'ooyala': 'ooyala:%s', + 'volume': 'http://volume.vox-cdn.com/embed/%s', + }[provider_video_type] % provider_video_id return { '_type': 'url_transparent', - 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'url': video_url, 'title': title or self._og_search_title(webpage), 'description': description or self._og_search_description(webpage), } @@ -124,17 +174,7 @@ class VoxMediaIE(InfoExtractor): volume_uuid = self._search_regex( r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None) if volume_uuid: - volume_webpage = self._download_webpage( - 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) - for provider_video_type in ('ooyala', 'youtube'): - provider_video_id = video_data.get('%s_id' % provider_video_type) - if provider_video_id: - description = video_data.get('description_long') or video_data.get('description_short') - entries.append(create_entry( - provider_video_id, provider_video_type, video_data.get('title_short'), description)) - break + entries.append(create_entry(volume_uuid, 'volume')) if len(entries) == 1: return entries[0] From af0f74288dc1b46147bc8f6b5692d2a21c6e178b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Oct 2017 23:45:03 +0700 Subject: [PATCH 0349/2417] [YoutubeDL] Improve _default_format_spec (closes #14461) --- test/test_YoutubeDL.py | 10 ++++++++-- youtube_dl/YoutubeDL.py | 31 ++++++++++++++++++------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index db936bfb8..4af92fbd4 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'is_live': True}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True, 'is_live': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'outtmpl': '-'}) - self.assertEqual(ydl._default_format_spec({}), 'best') + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') - self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') class TestYoutubeDL(unittest.TestCase): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 855d6b8e5..342d6b47c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1078,22 +1078,27 @@ class YoutubeDL(object): return _filter def _default_format_spec(self, info_dict, download=True): - req_format_list = [] - def can_have_partial_formats(): - if self.params.get('simulate', False): - return True - if not download: - return True - if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': - return False - if info_dict.get('is_live'): - return False + def can_merge(): merger = FFmpegMergerPP(self) return merger.available and merger.can_merge() - if can_have_partial_formats(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') + + def prefer_best(): + if self.params.get('simulate', False): + return False + if not download: + return False + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return True + if info_dict.get('is_live'): + return True + if not can_merge(): + return True + return False + + req_format_list = ['bestvideo+bestaudio', 'best'] + if prefer_best(): + req_format_list.reverse() return '/'.join(req_format_list) def build_format_selector(self, format_spec): From 694b61545cc3fa37c31d5f62d26101fb2620a01d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Oct 2017 00:41:20 +0700 Subject: [PATCH 0350/2417] [nexx] Add support for shortcuts and relax domain id extraction --- youtube_dl/extractor/nexx.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index d0235fdfe..071879ba4 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -18,7 +18,13 @@ from ..utils import ( class NexxIE(InfoExtractor): - _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P\d+)/videos/byid/(?P\d+)' + _VALID_URL = r'''(?x) + (?: + https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P\d+)/videos/byid/| + nexx:(?P\d+): + ) + (?P\d+) + ''' _TESTS = [{ # movie 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', @@ -62,8 +68,18 @@ class NexxIE(InfoExtractor): }, { 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', 'only_matching': True, + }, { + 'url': 'nexx:748:128907', + 'only_matching': True, }] + @staticmethod + def _extract_domain_id(webpage): + mobj = re.search( + r']+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', + webpage) + return mobj.group('id') if mobj else None + @staticmethod def _extract_urls(webpage): # Reference: @@ -72,11 +88,8 @@ class NexxIE(InfoExtractor): entries = [] # JavaScript Integration - mobj = re.search( - r']+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', - webpage) - if mobj: - domain_id = mobj.group('id') + domain_id = NexxIE._extract_domain_id(webpage) + if domain_id: for video_id in re.findall( r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', webpage): @@ -112,7 +125,8 @@ class NexxIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - domain_id, video_id = mobj.group('domain_id', 'id') + domain_id = mobj.group('domain_id') or mobj.group('domain_id_s') + video_id = mobj.group('id') # Reverse engineered from JS code (see getDeviceID function) device_id = '%d:%d:%d%d' % ( From ff3f1a62f087332fa6409b5cbc39871d49e74f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Oct 2017 00:44:13 +0700 Subject: [PATCH 0351/2417] [funk] Add extractor (closes #14464) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/funk.py | 43 ++++++++++++++++++++++++++++++ youtube_dl/extractor/generic.py | 16 ----------- 3 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 youtube_dl/extractor/funk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5629c7623..ecb33bc9e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -381,6 +381,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freshlive import FreshLiveIE from .funimation import FunimationIE +from .funk import FunkIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py new file mode 100644 index 000000000..ce5c67fbb --- /dev/null +++ b/youtube_dl/extractor/funk.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .nexx import NexxIE +from ..utils import extract_attributes + + +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P[^?/#]+)' + _TESTS = [{ + 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/', + 'md5': '4d40974481fa3475f8bccfd20c5361f8', + 'info_dict': { + 'id': '716599', + 'ext': 'mp4', + 'title': 'Neue Rechte Welle', + 'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69', + 'timestamp': 1501337639, + 'upload_date': '20170729', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + domain_id = NexxIE._extract_domain_id(webpage) or '741' + nexx_id = extract_attributes(self._search_regex( + r'(]id=["\']mediaplayer-funk[^>]+>)', + webpage, 'media player'))['data-id'] + + return self.url_result( + 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(), + video_id=nexx_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6dab4c7f4..39630b6f6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1613,22 +1613,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['BrightcoveLegacy'], }, - # Nexx embed - { - 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', - 'info_dict': { - 'id': '247746', - 'ext': 'mp4', - 'title': "Yesterday's Jam (OV)", - 'description': 'md5:09bc0984723fed34e2581624a84e05f0', - 'timestamp': 1492594816, - 'upload_date': '20170419', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, # Facebook ', webpage, 'embed url')) + r'', + webpage, 'embed url')) + if VKIE.suitable(embed_url): + return self.url_result(embed_url, VKIE.ie_key(), video_id) + + self._request_webpage( + HEADRequest(embed_url), video_id, headers={'Referer': url}) + video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A') + item = self._download_json( + 'https://api.vk.com/method/video.get', video_id, + headers={'User-Agent': 'okhttp/3.4.1'}, query={ + 'access_token': access_token, + 'sig': sig, + 'v': 5.44, + 'videos': video_id, + })['response']['items'][0] + title = item['title'] + + formats = [] + for f_id, f_url in item.get('files', {}).items(): + if f_id == 'external': + return self.url_result(f_url) + ext, height = f_id.split('_') + formats.append({ + 'format_id': height + 'p', + 'url': f_url, + 'height': int_or_none(height), + 'ext': ext, + }) + self._sort_formats(formats) + + thumbnails = [] + for k, v in item.items(): + if k.startswith('photo_') and v: + width = k.replace('photo_', '') + thumbnails.append({ + 'id': width, + 'url': v, + 'width': int_or_none(width), + }) return { - '_type': 'url_transparent', - 'url': embed_url, + 'id': video_id, + 'title': title, + 'formats': formats, + 'comment_count': int_or_none(item.get('comments')), + 'description': item.get('description'), + 'duration': int_or_none(item.get('duration')), + 'thumbnails': thumbnails, + 'timestamp': int_or_none(item.get('date')), + 'uploader': item.get('owner_id'), + 'view_count': int_or_none(item.get('views')), } From 68867668cf273c96d4a3d7429fb294f036f56437 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 12 Mar 2019 16:52:28 +0100 Subject: [PATCH 1762/2417] [npr] fix extraction(closes #10793)(closes #13440) --- youtube_dl/extractor/npr.py | 76 +++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index 1777aa10b..a5e8baa7e 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, qualities, @@ -9,16 +8,16 @@ from ..utils import ( class NprIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P\d+)' _TESTS = [{ - 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', + 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more', 'info_dict': { 'id': '449974205', 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' }, 'playlist_count': 7, }, { - 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1', + 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz', 'info_dict': { 'id': '446928052', 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" @@ -32,30 +31,46 @@ class NprIE(InfoExtractor): 'duration': 402, }, }], + }, { + # mutlimedia, not media title + 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', + 'info_dict': { + 'id': '533198237', + 'title': 'Tigers Jaw: Tiny Desk Concert', + }, + 'playlist': [{ + 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', + 'info_dict': { + 'id': '533201718', + 'ext': 'mp4', + 'title': 'Tigers Jaw: Tiny Desk Concert', + 'duration': 402, + }, + }], + 'expected_warnings': ['Failed to download m3u8 information'], }] def _real_extract(self, url): playlist_id = self._match_id(url) - config = self._download_json( - 'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({ + story = self._download_json( + 'http://api.npr.org/query', playlist_id, query={ 'id': playlist_id, - 'fields': 'titles,audio,show', + 'fields': 'audio,multimedia,title', 'format': 'json', 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', - }), playlist_id) + })['list']['story'][0] + playlist_title = story.get('title', {}).get('$text') - story = config['list']['story'][0] - - KNOWN_FORMATS = ('threegp', 'mp4', 'mp3') + KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3') quality = qualities(KNOWN_FORMATS) entries = [] - for audio in story.get('audio', []): - title = audio.get('title', {}).get('$text') - duration = int_or_none(audio.get('duration', {}).get('$text')) + for media in story.get('audio', []) + story.get('multimedia', []): + media_id = media['id'] + formats = [] - for format_id, formats_entry in audio.get('format', {}).items(): + for format_id, formats_entry in media.get('format', {}).items(): if not formats_entry: continue if isinstance(formats_entry, list): @@ -64,19 +79,30 @@ class NprIE(InfoExtractor): if not format_url: continue if format_id in KNOWN_FORMATS: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'ext': formats_entry.get('type'), - 'quality': quality(format_id), - }) + if format_id == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif format_id == 'smil': + smil_formats = self._extract_smil_formats( + format_url, media_id, transform_source=lambda s: s.replace( + 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/')) + self._check_formats(smil_formats, media_id) + formats.extend(smil_formats) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'quality': quality(format_id), + }) self._sort_formats(formats) + entries.append({ - 'id': audio['id'], - 'title': title, - 'duration': duration, + 'id': media_id, + 'title': media.get('title', {}).get('$text') or playlist_title, + 'thumbnail': media.get('altImageUrl', {}).get('$text'), + 'duration': int_or_none(media.get('duration', {}).get('$text')), 'formats': formats, }) - playlist_title = story.get('title', {}).get('$text') return self.playlist_result(entries, playlist_id, playlist_title) From 47f9792620e06c3b9ac24d402951bce01ae5c038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 Mar 2019 22:55:32 +0700 Subject: [PATCH 1763/2417] [ruleporn] Remove extractor (closes #15344, closes #20324) Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/ruleporn.py | 44 ------------------------------ 2 files changed, 45 deletions(-) delete mode 100644 youtube_dl/extractor/ruleporn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 687994045..884d25f50 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -977,7 +977,6 @@ from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .rudo import RudoIE from .ruhd import RUHDIE -from .ruleporn import RulePornIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py deleted file mode 100644 index ebf9808d5..000000000 --- a/youtube_dl/extractor/ruleporn.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import unicode_literals - -from .nuevo import NuevoBaseIE - - -class RulePornIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P[^/?#&]+)' - _TEST = { - 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/', - 'md5': '86861ebc624a1097c7c10eaf06d7d505', - 'info_dict': { - 'id': '48212', - 'display_id': 'brunette-nympho-chick-takes-her-boyfriend-in-every-angle', - 'ext': 'mp4', - 'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle', - 'description': 'md5:6d28be231b981fff1981deaaa03a04d5', - 'age_limit': 18, - 'duration': 635.1, - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - video_id = self._search_regex( - r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id') - - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', - webpage, 'title', group='url') - description = self._html_search_meta('description', webpage) - - info = self._extract_nuevo( - 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, - video_id) - info.update({ - 'display_id': display_id, - 'title': title, - 'description': description, - 'age_limit': 18 - }) - return info From 6db03a29d139aa7594b24117b912b589c7683a96 Mon Sep 17 00:00:00 2001 From: charon2019 Date: Wed, 13 Mar 2019 10:47:55 +0100 Subject: [PATCH 1764/2417] [anitube] Remove extractor site no longer exists --- youtube_dl/extractor/anitube.py | 30 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 31 deletions(-) delete mode 100644 youtube_dl/extractor/anitube.py diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py deleted file mode 100644 index 2fd912da4..000000000 --- a/youtube_dl/extractor/anitube.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import unicode_literals - -from .nuevo import NuevoBaseIE - - -class AnitubeIE(NuevoBaseIE): - IE_NAME = 'anitube.se' - _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' - - _TEST = { - 'url': 'http://www.anitube.se/video/36621', - 'md5': '59d0eeae28ea0bc8c05e7af429998d43', - 'info_dict': { - 'id': '36621', - 'ext': 'mp4', - 'title': 'Recorder to Randoseru 01', - 'duration': 180.19, - }, - 'skip': 'Blocked in the US', - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - key = self._search_regex( - r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - - return self._extract_nuevo( - 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 884d25f50..9a0459de8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -38,7 +38,6 @@ from .alphaporno import AlphaPornoIE from .amcnetworks import AMCNetworksIE from .americastestkitchen import AmericasTestKitchenIE from .animeondemand import AnimeOnDemandIE -from .anitube import AnitubeIE from .anvato import AnvatoIE from .anysex import AnySexIE from .aol import AolIE From 79d2077edc9c4ac8c71b672d8eff06ef847842c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Mar 2019 00:42:14 +0700 Subject: [PATCH 1765/2417] [extractor/common] Fix url meta field for unfragmented DASH formats (closes #20346) --- youtube_dl/extractor/common.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dfd0584d3..c291bc1df 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,10 +108,13 @@ class InfoExtractor(object): for RTMP - RTMP URL, for HLS - URL of the M3U8 media playlist, for HDS - URL of the F4M manifest, - for DASH - URL of the MPD manifest or - base URL representing the media - if MPD manifest is parsed from - a string, + for DASH + - HTTP URL to plain file media (in case of + unfragmented media) + - URL of the MPD manifest or base URL + representing the media if MPD manifest + is parsed froma string (in case of + fragmented media) for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of @@ -2137,8 +2140,6 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - # NB: mpd_url may be empty when MPD manifest is parsed from a string - 'url': mpd_url or base_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), @@ -2277,10 +2278,14 @@ class InfoExtractor(object): fragment['duration'] = segment_duration fragments.append(fragment) representation_ms_info['fragments'] = fragments - # NB: MPD manifest may contain direct URLs to unfragmented media. - # No fragments key is present in this case. + # If there is a fragments key available then we correctly recognized fragmented media. + # Otherwise we will assume unfragmented media with direct access. Technically, such + # assumption is not necessarily correct since we may simply have no support for + # some forms of fragmented media renditions yet, but for now we'll use this fallback. if 'fragments' in representation_ms_info: f.update({ + # NB: mpd_url may be empty when MPD manifest is parsed from a string + 'url': mpd_url or base_url, 'fragment_base_url': base_url, 'fragments': [], 'protocol': 'http_dash_segments', @@ -2291,6 +2296,10 @@ class InfoExtractor(object): f['url'] = initialization_url f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) + else: + # Assuming direct URL to unfragmented media. + f['url'] = base_url + # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation # is not necessarily unique within a Period thus formats with # the same `format_id` are quite possible. There are numerous examples From 2e27421c70bcd71ea7e7d7fe8b7e468731334d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Mar 2019 01:20:24 +0700 Subject: [PATCH 1766/2417] [test_InfoExtractor] Add test for #20346 --- test/test_InfoExtractor.py | 56 +++++++++++++++++++++++++++--- test/testdata/mpd/unfragmented.mpd | 28 +++++++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 test/testdata/mpd/unfragmented.mpd diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index da6cd39b6..d23d94349 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -574,7 +574,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ # Also tests duplicate representation ids, see # https://github.com/ytdl-org/youtube-dl/issues/15111 'float_duration', - 'http://unknown/manifest.mpd', + 'http://unknown/manifest.mpd', # mpd_url + None, # mpd_base_url [{ 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'm4a', @@ -654,7 +655,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ), ( # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', - 'http://unknown/manifest.mpd', + 'http://unknown/manifest.mpd', # mpd_url + None, # mpd_base_url [{ 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', @@ -733,15 +735,61 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1920, 'height': 1080, }] + ), ( + # https://github.com/ytdl-org/youtube-dl/issues/20346 + # Media considered unfragmented even though it contains + # Initialization tag + 'unfragmented', + 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url + 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url + [{ + 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'm4a', + 'format_id': 'AUDIO-1', + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 129.87, + 'asr': 48000, + + }, { + 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'mp4', + 'format_id': 'VIDEO-2', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'acodec': 'none', + 'vcodec': 'avc1.4d401e', + 'tbr': 608.0, + 'width': 240, + 'height': 240, + 'fps': 30, + }, { + 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'mp4', + 'format_id': 'VIDEO-1', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'acodec': 'none', + 'vcodec': 'avc1.4d401e', + 'tbr': 804.261, + 'width': 360, + 'height': 360, + 'fps': 30, + }] ) ] - for mpd_file, mpd_url, expected_formats in _TEST_CASES: + for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, mode='r', encoding='utf-8') as f: formats = self.ie._parse_mpd_formats( compat_etree_fromstring(f.read().encode('utf-8')), - mpd_url=mpd_url) + mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) diff --git a/test/testdata/mpd/unfragmented.mpd b/test/testdata/mpd/unfragmented.mpd new file mode 100644 index 000000000..5a3720be7 --- /dev/null +++ b/test/testdata/mpd/unfragmented.mpd @@ -0,0 +1,28 @@ + + + + + + DASH_360 + + + + + + DASH_240 + + + + + + + + + audio + + + + + + + From ffbd1368df48932e9171e6c8a7a4958430000b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 07:07:56 +0700 Subject: [PATCH 1767/2417] [update] Hide update URLs behind redirect --- youtube_dl/update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index a14265c1b..002ea7f33 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -31,7 +31,7 @@ def rsa_verify(message, signature, key): def update_self(to_screen, verbose, opener): """Update the program file with the latest version from the repository""" - UPDATE_URL = 'https://ytdl-org.github.io/youtube-dl/update/' + UPDATE_URL = 'https://yt-dl.org/update/' VERSION_URL = UPDATE_URL + 'LATEST_VERSION' JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) From 77d95677b7ab4a9840ef142b14627b07a9a31120 Mon Sep 17 00:00:00 2001 From: utlasidyo <42189435+utlasidyo@users.noreply.github.com> Date: Sun, 17 Mar 2019 07:15:15 +0700 Subject: [PATCH 1768/2417] [youtube] Add support for invidiou.sh (#20309) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0bc6cc7a7..886fc1591 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -351,7 +351,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| - (?:www\.)?invidio\.us/| + (?:(?:www|dev)\.)?invidio\.us/| + (?:www\.)?invidiou\.sh/| (?:www\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| (?:www\.)?vid\.wxzm\.sx/| From e5cfb779ea0814971324d693e02f8c4d4ca78600 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sat, 16 Mar 2019 17:18:54 -0700 Subject: [PATCH 1769/2417] [ciscolive] Add support for new URL schema (closes #20320, #20351) --- youtube_dl/extractor/ciscolive.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index c99b6ee58..da404e4dc 100644 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -65,8 +65,8 @@ class CiscoLiveBaseIE(InfoExtractor): class CiscoLiveSessionIE(CiscoLiveBaseIE): - _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P[^/?&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P[^/?&]+)' + _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', 'md5': 'c98acf395ed9c9f766941c70f5352e22', 'info_dict': { @@ -79,7 +79,13 @@ class CiscoLiveSessionIE(CiscoLiveBaseIE): 'uploader_id': '5647924234001', 'location': '16B Mezz.', }, - } + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU', + 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS', + 'only_matching': True, + }] def _real_extract(self, url): rf_id = self._match_id(url) @@ -88,7 +94,7 @@ class CiscoLiveSessionIE(CiscoLiveBaseIE): class CiscoLiveSearchIE(CiscoLiveBaseIE): - _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/' + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)' _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', 'info_dict': { @@ -98,6 +104,9 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): }, { 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/', + 'only_matching': True, }] @classmethod From 0dc41787af0fb011f01d88c1f2ecb686a8416df5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 09:07:47 +0700 Subject: [PATCH 1770/2417] [utils] Introduce parse_bitrate --- test/test_utils.py | 8 ++++++++ youtube_dl/utils.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 409482c3b..acd994bd7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -55,6 +55,7 @@ from youtube_dl.utils import ( parse_count, parse_iso8601, parse_resolution, + parse_bitrate, pkcs1pad, read_batch_urls, sanitize_filename, @@ -1030,6 +1031,13 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) + def test_parse_bitrate(self): + self.assertEqual(parse_bitrate(None), None) + self.assertEqual(parse_bitrate(''), None) + self.assertEqual(parse_bitrate('300kbps'), 300) + self.assertEqual(parse_bitrate('1500kbps'), 1500) + self.assertEqual(parse_bitrate('300 kbps'), 300) + def test_version_tuple(self): self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 630dab8eb..f2726a579 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1798,6 +1798,14 @@ def parse_resolution(s): return {} +def parse_bitrate(s): + if not isinstance(s, compat_str): + return + mobj = re.search(r'\b(\d+)\s*kbps', s) + if mobj: + return int(mobj.group(1)) + + def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ From d493f15c1158abd817e191ff830fd5481b1ed42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 09:09:32 +0700 Subject: [PATCH 1771/2417] [extractor/common] Improve HTML5 entries extraction and add some realworld tests --- test/test_InfoExtractor.py | 178 +++++++++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 43 ++++++-- 2 files changed, 214 insertions(+), 7 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index d23d94349..71f6608fe 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -107,6 +107,184 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + def test_parse_html5_media_entries(self): + # from https://www.r18.com/ + # with kpbs in label + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.r18.com/', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4', + 'ext': 'mp4', + 'format_id': '300kbps', + 'height': 240, + 'tbr': 300, + }, { + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4', + 'ext': 'mp4', + 'format_id': '1000kbps', + 'height': 480, + 'tbr': 1000, + }, { + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4', + 'ext': 'mp4', + 'format_id': '1500kbps', + 'height': 740, + 'tbr': 1500, + }], + 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg' + }) + + # from https://www.csfd.cz/ + # with width and height + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.csfd.cz/', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4', + 'ext': 'mp4', + 'width': 640, + 'height': 360, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4', + 'ext': 'mp4', + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4', + 'ext': 'mp4', + 'width': 1920, + 'height': 1080, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm', + 'ext': 'webm', + 'width': 640, + 'height': 360, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm', + 'ext': 'webm', + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm', + 'ext': 'webm', + 'width': 1920, + 'height': 1080, + }], + 'subtitles': { + 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}] + }, + 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360' + }) + + # from https://tamasha.com/v/Kkdjw + # with height in label + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://tamasha.com/v/Kkdjw', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', + }, { + 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', + 'ext': 'mp4', + 'format_id': '240p', + 'height': 240, + }, { + 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4', + 'ext': 'mp4', + 'format_id': '144p', + 'height': 144, + }] + }) + + # from https://www.directvnow.com + # with data-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + + ''', None)[0], + { + 'formats': [{ + 'ext': 'mp4', + 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', + }] + }) + + # from https://www.directvnow.com + # with data-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', + 'ext': 'mp4', + }] + }) + + # from https://www.klarna.com/uk/ + # with data-video-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4', + 'ext': 'mp4', + }], + }) + def test_extract_jwplayer_data_realworld(self): # from http://www.suffolk.edu/sjc/ expect_dict( diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c291bc1df..0889288f0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -44,6 +44,7 @@ from ..utils import ( compiled_regex_type, determine_ext, determine_protocol, + dict_get, error_to_compat_str, ExtractorError, extract_attributes, @@ -56,13 +57,16 @@ from ..utils import ( JSON_LD_RE, mimetype2ext, orderedSet, + parse_bitrate, parse_codecs, parse_duration, parse_iso8601, parse_m3u8_attributes, + parse_resolution, RegexNotFoundError, sanitized_Request, sanitize_filename, + str_or_none, unescapeHTML, unified_strdate, unified_timestamp, @@ -2481,18 +2485,43 @@ class InfoExtractor(object): media_info['thumbnail'] = absolute_url(media_attributes.get('poster')) if media_content: for source_tag in re.findall(r']+>', media_content): - source_attributes = extract_attributes(source_tag) - src = source_attributes.get('src') + s_attr = extract_attributes(source_tag) + # data-video-src and data-src are non standard but seen + # several times in the wild + src = dict_get(s_attr, ('src', 'data-video-src', 'data-src')) if not src: continue - f = parse_content_type(source_attributes.get('type')) + f = parse_content_type(s_attr.get('type')) is_plain_url, formats = _media_formats(src, media_type, f) if is_plain_url: - # res attribute is not standard but seen several times - # in the wild + # width, height, res, label and title attributes are + # all not standard but seen several times in the wild + labels = [ + s_attr.get(lbl) + for lbl in ('label', 'title') + if str_or_none(s_attr.get(lbl)) + ] + width = int_or_none(s_attr.get('width')) + height = (int_or_none(s_attr.get('height')) or + int_or_none(s_attr.get('res'))) + if not width or not height: + for lbl in labels: + resolution = parse_resolution(lbl) + if not resolution: + continue + width = width or resolution.get('width') + height = height or resolution.get('height') + for lbl in labels: + tbr = parse_bitrate(lbl) + if tbr: + break + else: + tbr = None f.update({ - 'height': int_or_none(source_attributes.get('res')), - 'format_id': source_attributes.get('label'), + 'width': width, + 'height': height, + 'tbr': tbr, + 'format_id': s_attr.get('label') or s_attr.get('title'), }) f.update(formats[0]) media_info['formats'].append(f) From eba3a2f9ef52abcc0d71afeb2162581cc8958367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 09:15:24 +0700 Subject: [PATCH 1772/2417] [anysex] Remove extractor (closes #19279) --- youtube_dl/extractor/anysex.py | 61 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 62 deletions(-) delete mode 100644 youtube_dl/extractor/anysex.py diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py deleted file mode 100644 index ad86d6e58..000000000 --- a/youtube_dl/extractor/anysex.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - parse_duration, - int_or_none, -) - - -class AnySexIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P\d+)' - _TEST = { - 'url': 'http://anysex.com/156592/', - 'md5': '023e9fbb7f7987f5529a394c34ad3d3d', - 'info_dict': { - 'id': '156592', - 'ext': 'mp4', - 'title': 'Busty and sexy blondie in her bikini strips for you', - 'description': 'md5:de9e418178e2931c10b62966474e1383', - 'categories': ['Erotic'], - 'duration': 270, - 'age_limit': 18, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') - - title = self._html_search_regex(r'(.*?)', webpage, 'title') - description = self._html_search_regex( - r'
    ]*>([^<]+)
    ', webpage, 'description', fatal=False) - thumbnail = self._html_search_regex( - r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False) - - categories = re.findall( - r'([^<]+)', webpage) - - duration = parse_duration(self._search_regex( - r'Duration: (?:)?(\d+:\d+)', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._html_search_regex( - r'Views: (\d+)', webpage, 'view count', fatal=False)) - - return { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'categories': categories, - 'duration': duration, - 'view_count': view_count, - 'age_limit': 18, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9a0459de8..e27a09509 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -39,7 +39,6 @@ from .amcnetworks import AMCNetworksIE from .americastestkitchen import AmericasTestKitchenIE from .animeondemand import AnimeOnDemandIE from .anvato import AnvatoIE -from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE from .aliexpress import AliExpressLiveIE From 81dada0b4ba156812a89f0896579830cccd21fa3 Mon Sep 17 00:00:00 2001 From: Tyler Szabo Date: Sat, 16 Mar 2019 19:37:29 -0700 Subject: [PATCH 1773/2417] [cbc:watch] Add support for gem.cbc.ca (closes #20251, #20359) --- youtube_dl/extractor/cbc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 43f95c739..751a3a8f2 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -360,7 +360,7 @@ class CBCWatchVideoIE(CBCWatchBaseIE): class CBCWatchIE(CBCWatchBaseIE): IE_NAME = 'cbc.ca:watch' - _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' + _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' _TESTS = [{ # geo-restricted to Canada, bypassable 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', @@ -386,6 +386,9 @@ class CBCWatchIE(CBCWatchBaseIE): 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', }, 'playlist_mincount': 30, + }, { + 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42', + 'only_matching': True, }] def _real_extract(self, url): From ddff25c5d1f21c323cc37c08faa11ea98baef622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 09:41:16 +0700 Subject: [PATCH 1774/2417] [extractors] Remove superfluous whitespace --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e27a09509..f32f22a5a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1400,7 +1400,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE From 0146c6cde6959a52287c26c99ce159128832246c Mon Sep 17 00:00:00 2001 From: Lukas Anzinger Date: Sun, 17 Mar 2019 03:57:02 +0100 Subject: [PATCH 1775/2417] [orf:radio] Extract series (#20012) --- youtube_dl/extractor/orf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index d432e3449..499be0029 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -176,7 +176,8 @@ class ORFRadioIE(InfoExtractor): 'description': subtitle, 'duration': (info['end'] - info['start']) / 1000, 'timestamp': info['start'] / 1000, - 'ext': 'mp3' + 'ext': 'mp3', + 'series': data.get('programTitle') } entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] From a63782b58117d651bc2d32edada2e795dfd9eb41 Mon Sep 17 00:00:00 2001 From: wolfy1339 <4595477+wolfy1339@users.noreply.github.com> Date: Sun, 17 Mar 2019 03:20:21 -0400 Subject: [PATCH 1776/2417] [corus] Add support for bigbrothercanada.ca (#20357) --- youtube_dl/extractor/corus.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py index 807a29eea..a1b251804 100644 --- a/youtube_dl/extractor/corus.py +++ b/youtube_dl/extractor/corus.py @@ -13,9 +13,9 @@ class CorusIE(ThePlatformFeedIE): (?:www\.)? (?P (?:globaltv|etcanada)\.com| - (?:hgtv|foodnetwork|slice|history|showcase)\.ca + (?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca ) - /(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=)) + /(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=)) (?P\d+) ''' _TESTS = [{ @@ -42,6 +42,12 @@ class CorusIE(ThePlatformFeedIE): }, { 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video', 'only_matching': True, + }, { + 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/', + 'only_matching': True + }, { + 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/', + 'only_matching': True }] _TP_FEEDS = { @@ -73,6 +79,10 @@ class CorusIE(ThePlatformFeedIE): 'feed_id': '9H6qyshBZU3E', 'account_id': 2414426607, }, + 'bigbrothercanada': { + 'feed_id': 'ChQqrem0lNUp', + 'account_id': 2269680845, + }, } def _real_extract(self, url): From 034f5fb5ee37e9edf90f7e82f48af985cdd17901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 14:34:48 +0700 Subject: [PATCH 1777/2417] [radiocanada:audiovideo] Fix typo --- youtube_dl/extractor/radiocanada.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 2e3f1f58f..a28b1a24c 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -147,7 +147,7 @@ class RadioCanadaIE(InfoExtractor): class RadioCanadaAudioVideoIE(InfoExtractor): - 'radiocanada:audiovideo' + IE_NAME = 'radiocanada:audiovideo' _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P[0-9]+)' _TESTS = [{ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', From 04988b55b54bd7bd803d43a56f87e4728158890a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 15:34:21 +0700 Subject: [PATCH 1778/2417] [openload] Improve embed detection --- youtube_dl/extractor/openload.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index bae7c7ee7..cc323e5e2 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,18 +243,16 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space))' _VALID_URL = r'''(?x) https?:// (?P (?:www\.)? - (?: - openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space) - ) + %s )/ (?:f|embed)/ (?P[a-zA-Z0-9-_]+) - ''' + ''' % _DOMAINS _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -359,8 +357,8 @@ class OpenloadIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return re.findall( - r']+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)', - webpage) + r']+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)' + % OpenloadIE._DOMAINS, webpage) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 8428fdccf2dad7667cf53db449282b56dbbfa3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Mar 2019 17:33:07 +0700 Subject: [PATCH 1779/2417] [yandexvideo] Add extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/yandexvideo.py | 90 +++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/yandexvideo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f32f22a5a..8e7a5bf41 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1444,12 +1444,13 @@ from .yahoo import ( YahooIE, YahooSearchIE, ) +from .yandexdisk import YandexDiskIE from .yandexmusic import ( YandexMusicTrackIE, YandexMusicAlbumIE, YandexMusicPlaylistIE, ) -from .yandexdisk import YandexDiskIE +from .yandexvideo import YandexVideoIE from .yapfiles import YapFilesIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py new file mode 100644 index 000000000..940c24af3 --- /dev/null +++ b/youtube_dl/extractor/yandexvideo.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + url_or_none, +) + + +class YandexVideoIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=| + frontend\.vh\.yandex\.ru/player/ + ) + (?P[\da-f]+) + ''' + _TESTS = [{ + 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d', + 'md5': '33955d7ae052f15853dc41f35f17581c', + 'info_dict': { + 'id': '4dbb262b4fe5cf15a215de4f34eee34d', + 'ext': 'mp4', + 'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 0, + 'duration': 30, + 'age_limit': 18, + }, + }, { + 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda', + 'only_matching': True, + }, { + 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d', + 'only_matching': True, + }, { + 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda', + 'only_matching': True, + }, { + # vod-episode, series episode + 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee', + 'only_matching': True, + }, { + # episode, sports + 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + content = self._download_json( + 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id, + video_id, query={ + 'stream_options': 'hires', + 'disable_trackings': 1, + })['content'] + + m3u8_url = url_or_none(content.get('content_url')) or url_or_none( + content['streams'][0]['url']) + title = content.get('title') or content.get('computed_title') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + description = content.get('description') + thumbnail = content.get('thumbnail') + timestamp = (int_or_none(content.get('release_date')) or + int_or_none(content.get('release_date_ut')) or + int_or_none(content.get('start_time'))) + duration = int_or_none(content.get('duration')) + series = content.get('program_title') + age_limit = int_or_none(content.get('restriction_age')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'series': series, + 'age_limit': age_limit, + 'formats': formats, + } From 2ed2ebdb36c678ac47b5bec1d4d2eaeda471d0f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 Mar 2019 01:33:37 +0700 Subject: [PATCH 1780/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/ChangeLog b/ChangeLog index eda94ad33..0ab376d83 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +version + +Core +* [extractor/common] Improve HTML5 entries extraction ++ [utils] Introduce parse_bitrate +* [update] Hide update URLs behind redirect +* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346) + +Extractors ++ [yandexvideo] Add extractor +* [openload] Improve embed detection ++ [corus] Add support for bigbrothercanada.ca (#20357) ++ [orf:radio] Extract series (#20012) ++ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359) +- [anysex] Remove extractor (#19279) ++ [ciscolive] Add support for new URL schema (#20320, #20351) ++ [youtube] Add support for invidiou.sh (#20309) +- [anitube] Remove extractor (#20334) +- [ruleporn] Remove extractor (#15344, #20324) +* [npr] Fix extraction (#10793, #13440) +* [biqle] Fix extraction (#11471, #15313) +* [viddler] Modernize +* [moevideo] Fix extraction +* [primesharetv] Remove extractor +* [hypem] Modernize and extract more metadata (#15320) +* [veoh] Fix extraction +* [escapist] Modernize +- [videomega] Remove extractor (#10108) ++ [beeg] Add support for beeg.porn (#20306) +* [vimeo:review] Improve config url extraction and extract original format + (#20305) +* [fox] Detect geo restriction and authentication errors (#20208) + + version 2019.03.09 Core From 0a8e251b35db6a05949c61ee4e56b678697f08a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 Mar 2019 01:36:41 +0700 Subject: [PATCH 1781/2417] release 2019.03.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 8 ++------ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 911e912a4..0decf19a1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.18*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.03.09 +[debug] youtube-dl version 2019.03.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0ab376d83..d0e3a6088 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.03.18 Core * [extractor/common] Improve HTML5 entries extraction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d8a8d7ede..a3d4447a8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,9 +44,7 @@ - **AmericasTestKitchen** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimeOnDemand** - - **anitube.se** - **Anvato** - - **AnySex** - **APA** - **Aparat** - **AppleConnect** @@ -698,7 +696,6 @@ - **PornoXO** - **PornTube** - **PressTV** - - **PrimeShareTV** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital - **puhutv** @@ -718,7 +715,7 @@ - **radio.de** - **radiobremen** - **radiocanada** - - **RadioCanadaAudioVideo** + - **radiocanada:audiovideo** - **radiofrance** - **RadioJavan** - **Rai** @@ -765,7 +762,6 @@ - **RTVS** - **Rudo** - **RUHD** - - **RulePorn** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels - **rutube:embed**: Rutube embedded videos @@ -1010,7 +1006,6 @@ - **video.mit.edu** - **VideoDetective** - **videofy.me** - - **VideoMega** - **videomore** - **videomore:season** - **videomore:video** @@ -1127,6 +1122,7 @@ - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек + - **YandexVideo** - **YapFiles** - **YesJapan** - **yinyuetai:video**: 音悦Tai diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f72fee57f..5e86bc4d5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.09' +__version__ = '2019.03.18' From c4580580f58d004a0e4e9a5d90b0a44cdd69c7e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Mar 2019 22:39:35 +0700 Subject: [PATCH 1782/2417] [svtplay] Update API endpoint (closes #20430) --- youtube_dl/extractor/svt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 0901c3163..7aa1b5919 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -185,7 +185,7 @@ class SVTPlayIE(SVTPlayBaseIE): def _extract_by_video_id(self, video_id, webpage=None): data = self._download_json( - 'https://api.svt.se/videoplayer-api/video/%s' % video_id, + 'https://api.svt.se/video/%s' % video_id, video_id, headers=self.geo_verification_headers()) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): From 050afa60c6685f0b0aff68d943d48b3b6b9c85a0 Mon Sep 17 00:00:00 2001 From: Jesse de Zwart Date: Thu, 21 Mar 2019 16:55:03 +0100 Subject: [PATCH 1783/2417] Check for valid --min-sleep-interval when --max-sleep-interval is specified --- youtube_dl/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 94788d936..9d4859bcf 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -166,6 +166,8 @@ def _real_main(argv=None): if opts.max_sleep_interval is not None: if opts.max_sleep_interval < 0: parser.error('max sleep interval must be positive or 0') + if opts.sleep_interval is None: + parser.error('min sleep interval must be specified, use --min-sleep-interval') if opts.max_sleep_interval < opts.sleep_interval: parser.error('max sleep interval must be greater than or equal to min sleep interval') else: From 5e1271c56dbf3f96fbf928ccd3facf5fc70493b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Mar 2019 01:08:54 +0700 Subject: [PATCH 1784/2417] [utils] Improve int_or_none and float_or_none (#20403) --- test/test_utils.py | 17 +++++++++++++++++ youtube_dl/utils.py | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index acd994bd7..ca6d832a4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -33,11 +33,13 @@ from youtube_dl.utils import ( ExtractorError, find_xpath_attr, fix_xml_ampersands, + float_or_none, get_element_by_class, get_element_by_attribute, get_elements_by_class, get_elements_by_attribute, InAdvancePagedList, + int_or_none, intlist_to_bytes, is_html, js_to_json, @@ -468,6 +470,21 @@ class TestUtil(unittest.TestCase): shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') + def test_float_or_none(self): + self.assertEqual(float_or_none('42.42'), 42.42) + self.assertEqual(float_or_none('42'), 42.0) + self.assertEqual(float_or_none(''), None) + self.assertEqual(float_or_none(None), None) + self.assertEqual(float_or_none([]), None) + self.assertEqual(float_or_none(set()), None) + + def test_int_or_none(self): + self.assertEqual(int_or_none('42'), 42) + self.assertEqual(int_or_none(''), None) + self.assertEqual(int_or_none(None), None) + self.assertEqual(int_or_none([]), None) + self.assertEqual(int_or_none(set()), None) + def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f2726a579..71713f63a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1922,7 +1922,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): return default try: return int(v) * invscale // scale - except ValueError: + except (ValueError, TypeError): return default @@ -1943,7 +1943,7 @@ def float_or_none(v, scale=1, invscale=1, default=None): return default try: return float(v) * invscale / scale - except ValueError: + except (ValueError, TypeError): return default From b8526c78f9a8a9101c5fd365f07c12e7b53c5ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Mar 2019 01:09:33 +0700 Subject: [PATCH 1785/2417] [pornhub] Add support for DASH formats (closes #20403) --- youtube_dl/extractor/pornhub.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3a474c179..bf8f0be88 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -14,6 +14,7 @@ from ..compat import ( ) from .openload import PhantomJSwrapper from ..utils import ( + determine_ext, ExtractorError, int_or_none, orderedSet, @@ -275,6 +276,10 @@ class PornHubIE(PornHubBaseIE): r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') + if determine_ext(video_url) == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + continue tbr = None mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url) if mobj: From 8cb10807ed6c6aa8e0f316b3f1a31a91df46abdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Mar 2019 21:43:50 +0700 Subject: [PATCH 1786/2417] [npo] Improve DRM detection --- youtube_dl/extractor/npo.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index ad62f8ec6..e525ad928 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -181,10 +181,7 @@ class NPOIE(NPOBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - try: - return self._get_info(url, video_id) - except ExtractorError: - return self._get_old_info(video_id) + return self._get_info(url, video_id) or self._get_old_info(video_id) def _get_info(self, url, video_id): token = self._download_json( @@ -206,6 +203,7 @@ class NPOIE(NPOBaseIE): player_token = player['token'] + drm = False format_urls = set() formats = [] for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'): @@ -227,7 +225,8 @@ class NPOIE(NPOBaseIE): if not stream_url or stream_url in format_urls: continue format_urls.add(stream_url) - if stream.get('protection') is not None: + if stream.get('protection') is not None or stream.get('keySystemOptions') is not None: + drm = True continue stream_type = stream.get('type') stream_ext = determine_ext(stream_url) @@ -246,6 +245,11 @@ class NPOIE(NPOBaseIE): 'url': stream_url, }) + if not formats: + if drm: + raise ExtractorError('This video is DRM protected.', expected=True) + return + self._sort_formats(formats) info = { From cf3d399727b87683aff25b5f57e026a4326244ef Mon Sep 17 00:00:00 2001 From: ealgase Date: Mon, 25 Mar 2019 12:04:31 -0400 Subject: [PATCH 1787/2417] [openload] add support for oladblock.services and oladblock.xyz domains --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index cc323e5e2..25b3bfdbd 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space))' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space)|oladblock\.(?:services|xyz))' _VALID_URL = r'''(?x) https?:// (?P From de74ef83b786daad1b3df738734ca81ce3344667 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 27 Mar 2019 18:01:51 +0100 Subject: [PATCH 1788/2417] [cwtv] fix episode number extraction(closes #20461) --- youtube_dl/extractor/cwtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index f9bd535f6..73382431b 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -79,7 +79,7 @@ class CWTVIE(InfoExtractor): season = str_or_none(video_data.get('season')) episode = str_or_none(video_data.get('episode')) if episode and season: - episode = episode.lstrip(season) + episode = episode[len(season):] return { '_type': 'url_transparent', From b27a71e66c1eb31887aa34eb8dbadb22b91bf716 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 27 Mar 2019 18:29:24 +0100 Subject: [PATCH 1789/2417] [ina] improve extraction --- youtube_dl/extractor/ina.py | 64 +++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index 9544ff9d4..5744a52b8 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -1,36 +1,72 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + int_or_none, + strip_or_none, + xpath_attr, + xpath_text, +) class InaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?PI?[A-Z0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P[A-Z0-9_]+)' + _TESTS = [{ 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', 'md5': 'a667021bf2b41f8dc6049479d9bb38a3', 'info_dict': { 'id': 'I12055569', 'ext': 'mp4', 'title': 'François Hollande "Je crois que c\'est clair"', + 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663', } - } + }, { + 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + video_id = self._match_id(url) + info_doc = self._download_xml( + 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id) + item = info_doc.find('channel/item') + title = xpath_text(item, 'title', fatal=True) + media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/') + content = item.find(media_ns_xpath('content')) - video_id = mobj.group('id') - mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id - info_doc = self._download_xml(mrss_url, video_id) + get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url') + formats = [] + for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)): + q_url = get_furl(q) + if not q_url: + continue + formats.append({ + 'format_id': q, + 'url': q_url, + 'width': w, + 'height': h, + }) + if not formats: + formats = [{ + 'url': get_furl('player') or content.attrib['url'], + }] - self.report_extraction(video_id) - - video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url'] + thumbnails = [] + for thumbnail in content.findall(media_ns_xpath('thumbnail')): + thumbnail_url = thumbnail.get('url') + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'height': int_or_none(thumbnail.get('height')), + 'width': int_or_none(thumbnail.get('width')), + }) return { 'id': video_id, - 'url': video_url, - 'title': info_doc.find('.//title').text, + 'formats': formats, + 'title': title, + 'description': strip_or_none(xpath_text(item, 'description')), + 'thumbnails': thumbnails, } From c4c888697ec6b1576f01273a333a3a08ffbf6831 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 27 Mar 2019 18:49:29 +0100 Subject: [PATCH 1790/2417] [ina] add support for audio URLs --- youtube_dl/extractor/ina.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index 5744a52b8..12695af27 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, strip_or_none, xpath_attr, @@ -11,7 +12,7 @@ from ..utils import ( class InaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P[A-Z0-9_]+)' + _VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P[A-Z0-9_]+)' _TESTS = [{ 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', 'md5': 'a667021bf2b41f8dc6049479d9bb38a3', @@ -24,6 +25,12 @@ class InaIE(InfoExtractor): }, { 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html', 'only_matching': True, + }, { + 'url': 'https://www.ina.fr/audio/P16173408', + 'only_matching': True, + }, { + 'url': 'https://www.ina.fr/video/P16173408-video.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -48,8 +55,12 @@ class InaIE(InfoExtractor): 'height': h, }) if not formats: + furl = get_furl('player') or content.attrib['url'] + ext = determine_ext(furl) formats = [{ - 'url': get_furl('player') or content.attrib['url'], + 'url': furl, + 'vcodec': 'none' if ext == 'mp3' else None, + 'ext': ext, }] thumbnails = [] From 99fe3300707464a926ab1f62925cbbad90c3bde2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 28 Mar 2019 16:55:57 +0100 Subject: [PATCH 1791/2417] [teamtreehouse] Add new extractor(closes #9836) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/teamtreehouse.py | 140 ++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 youtube_dl/extractor/teamtreehouse.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8e7a5bf41..e79ffca85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1114,6 +1114,7 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE +from .teamtreehouse import TeamTreeHouseIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele5 import Tele5IE diff --git a/youtube_dl/extractor/teamtreehouse.py b/youtube_dl/extractor/teamtreehouse.py new file mode 100644 index 000000000..d347e97ef --- /dev/null +++ b/youtube_dl/extractor/teamtreehouse.py @@ -0,0 +1,140 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + ExtractorError, + float_or_none, + get_element_by_class, + get_element_by_id, + parse_duration, + remove_end, + urlencode_postdata, + urljoin, +) + + +class TeamTreeHouseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teamtreehouse\.com/library/(?P[^/]+)' + _TESTS = [{ + # Course + 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php', + 'info_dict': { + 'id': 'introduction-to-user-authentication-in-php', + 'title': 'Introduction to User Authentication in PHP', + 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053', + }, + 'playlist_mincount': 24, + }, { + # WorkShop + 'url': 'https://teamtreehouse.com/library/deploying-a-react-app', + 'info_dict': { + 'id': 'deploying-a-react-app', + 'title': 'Deploying a React App', + 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921', + }, + 'playlist_mincount': 4, + }, { + # Video + 'url': 'https://teamtreehouse.com/library/application-overview-2', + 'info_dict': { + 'id': 'application-overview-2', + 'ext': 'mp4', + 'title': 'Application Overview', + 'description': 'md5:4b0a234385c27140a4378de5f1e15127', + }, + 'expected_warnings': ['This is just a preview'], + }] + _NETRC_MACHINE = 'teamtreehouse' + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + signin_page = self._download_webpage( + 'https://teamtreehouse.com/signin', + None, 'Downloading signin page') + data = self._form_hidden_inputs('new_user_session', signin_page) + data.update({ + 'user_session[email]': email, + 'user_session[password]': password, + }) + error_message = get_element_by_class('error-message', self._download_webpage( + 'https://teamtreehouse.com/person_session', + None, 'Logging in', data=urlencode_postdata(data))) + if error_message: + raise ExtractorError(clean_html(error_message), expected=True) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + title = self._html_search_meta(['og:title', 'twitter:title'], webpage) + description = self._html_search_meta( + ['description', 'og:description', 'twitter:description'], webpage) + entries = self._parse_html5_media_entries(url, webpage, display_id) + if entries: + info = entries[0] + + for subtitles in info.get('subtitles', {}).values(): + for subtitle in subtitles: + subtitle['ext'] = determine_ext(subtitle['url'], 'srt') + + is_preview = 'data-preview="true"' in webpage + if is_preview: + self.report_warning( + 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id) + duration = 30 + else: + duration = float_or_none(self._search_regex( + r'data-duration="(\d+)"', webpage, 'duration'), 1000) + if not duration: + duration = parse_duration(get_element_by_id( + 'video-duration', webpage)) + + info.update({ + 'id': display_id, + 'title': title, + 'description': description, + 'duration': duration, + }) + return info + else: + def extract_urls(html, extract_info=None): + for path in re.findall(r']+href="([^"]+)"', html): + page_url = urljoin(url, path) + entry = { + '_type': 'url_transparent', + 'id': self._match_id(page_url), + 'url': page_url, + 'id_key': self.ie_key(), + } + if extract_info: + entry.update(extract_info) + entries.append(entry) + + workshop_videos = self._search_regex( + r'(?s)]+id="workshop-videos"[^>]*>(.+?)', + webpage, 'workshop videos', default=None) + if workshop_videos: + extract_urls(workshop_videos) + else: + stages_path = self._search_regex( + r'(?s)]+id="syllabus-stages"[^>]+data-url="([^"]+)"', + webpage, 'stages path') + if stages_path: + stages_page = self._download_webpage( + urljoin(url, stages_path), display_id, 'Downloading stages page') + for chapter_number, (chapter, steps_list) in enumerate(re.findall(r'(?s)]*>\s*(.+?)\s*.+?]*>(.+?)', stages_page), 1): + extract_urls(steps_list, { + 'chapter': chapter, + 'chapter_number': chapter_number, + }) + title = remove_end(title, ' Course') + + return self.playlist_result( + entries, display_id, title, description) From 4014a4862268db8357c7b9e3750c188b1648277e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 31 Mar 2019 01:17:30 +0700 Subject: [PATCH 1792/2417] [mediasite:catalog] Add extractor (closes #20507) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/mediasite.py | 116 ++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e79ffca85..a1a0f9cd5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -632,7 +632,10 @@ from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE -from .mediasite import MediasiteIE +from .mediasite import ( + MediasiteIE, + MediasiteCatalogIE, +) from .medici import MediciIE from .megaphone import MegaphoneIE from .meipai import MeipaiIE diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py index ef9628e65..5c9e49d90 100644 --- a/youtube_dl/extractor/mediasite.py +++ b/youtube_dl/extractor/mediasite.py @@ -13,6 +13,8 @@ from ..utils import ( ExtractorError, float_or_none, mimetype2ext, + str_or_none, + try_get, unescapeHTML, unsmuggle_url, url_or_none, @@ -20,8 +22,11 @@ from ..utils import ( ) +_ID_RE = r'[0-9a-f]{32,34}' + + class MediasiteIE(InfoExtractor): - _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P[0-9a-f]{32,34})(?P\?[^#]+|)' + _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P%s)(?P\?[^#]+|)' % _ID_RE _TESTS = [ { 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d', @@ -109,7 +114,7 @@ class MediasiteIE(InfoExtractor): return [ unescapeHTML(mobj.group('url')) for mobj in re.finditer( - r'(?xi)]+\bsrc=(["\'])(?P(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1', + r'(?xi)]+\bsrc=(["\'])(?P(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE, webpage)] def _real_extract(self, url): @@ -221,3 +226,110 @@ class MediasiteIE(InfoExtractor): 'formats': formats, 'thumbnails': thumbnails, } + + +class MediasiteCatalogIE(InfoExtractor): + _VALID_URL = r'''(?xi) + (?Phttps?://[^/]+/Mediasite) + /Catalog/Full/ + (?P{0}) + (?: + /(?P{0}) + /(?P{0}) + )? + '''.format(_ID_RE) + _TESTS = [{ + 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21', + 'info_dict': { + 'id': '631f9e48530d454381549f955d08c75e21', + 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically', + }, + 'playlist_count': 6, + 'expected_warnings': ['is not a supported codec'], + }, { + # with CurrentFolderId and RootDynamicFolderId + 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521', + 'info_dict': { + 'id': '9518c4a6c5cf4993b21cbd53e828a92521', + 'title': 'IUSM Family and Friends Sessions', + }, + 'playlist_count': 2, + }, { + 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21', + 'only_matching': True, + }, { + # no AntiForgeryToken + 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21', + 'only_matching': True, + }, { + 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + mediasite_url = mobj.group('url') + catalog_id = mobj.group('catalog_id') + current_folder_id = mobj.group('current_folder_id') or catalog_id + root_dynamic_folder_id = mobj.group('root_dynamic_folder_id') + + webpage = self._download_webpage(url, catalog_id) + + # AntiForgeryToken is optional (e.g. [1]) + # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21 + anti_forgery_token = self._search_regex( + r'AntiForgeryToken\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'anti forgery token', default=None, group='value') + if anti_forgery_token: + anti_forgery_header = self._search_regex( + r'AntiForgeryHeaderName\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'anti forgery header name', + default='X-SOFO-AntiForgeryHeader', group='value') + + data = { + 'IsViewPage': True, + 'IsNewFolder': True, + 'AuthTicket': None, + 'CatalogId': catalog_id, + 'CurrentFolderId': current_folder_id, + 'RootDynamicFolderId': root_dynamic_folder_id, + 'ItemsPerPage': 1000, + 'PageIndex': 0, + 'PermissionMask': 'Execute', + 'CatalogSearchType': 'SearchInFolder', + 'SortBy': 'Date', + 'SortDirection': 'Descending', + 'StartDate': None, + 'EndDate': None, + 'StatusFilterList': None, + 'PreviewKey': None, + 'Tags': [], + } + + headers = { + 'Content-Type': 'application/json; charset=UTF-8', + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + } + if anti_forgery_token: + headers[anti_forgery_header] = anti_forgery_token + + catalog = self._download_json( + '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url, + catalog_id, data=json.dumps(data).encode(), headers=headers) + + entries = [] + for video in catalog['PresentationDetailsList']: + if not isinstance(video, dict): + continue + video_id = str_or_none(video.get('Id')) + if not video_id: + continue + entries.append(self.url_result( + '%s/Play/%s' % (mediasite_url, video_id), + ie=MediasiteIE.ie_key(), video_id=video_id)) + + title = try_get( + catalog, lambda x: x['CurrentFolder']['Name'], compat_str) + + return self.playlist_result(entries, catalog_id, title,) From b43c5f474a7be2f7dbfd4c887ded6c751f081d73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 31 Mar 2019 01:27:45 +0700 Subject: [PATCH 1793/2417] [xhamster] Add support for xhamster.one (closes #20508) --- youtube_dl/extractor/xhamster.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 68a48034e..d268372e6 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -20,7 +20,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?:.+?\.)?xhamster\.com/ + (?:.+?\.)?xhamster\.(?:com|one)/ (?: movies/(?P\d+)/(?P[^/]*)\.html| videos/(?P[^/]*)-(?P\d+) @@ -91,6 +91,9 @@ class XHamsterIE(InfoExtractor): # new URL schema 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821', 'only_matching': True, + }, { + 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, }] def _real_extract(self, url): From 93bb6b1baeddd8e6ea77d814e739e1b076fb248b Mon Sep 17 00:00:00 2001 From: RexYuan Date: Sun, 31 Mar 2019 02:31:33 +0800 Subject: [PATCH 1794/2417] [weibo] Extend _VALID_URL (#20496) --- youtube_dl/extractor/weibo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py index 3cb4d71a6..621df5b54 100644 --- a/youtube_dl/extractor/weibo.py +++ b/youtube_dl/extractor/weibo.py @@ -19,7 +19,7 @@ from ..utils import ( class WeiboIE(InfoExtractor): - _VALID_URL = r'https?://weibo\.com/[0-9]+/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P[a-zA-Z0-9]+)' _TEST = { 'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment', 'info_dict': { From 25d9243141394cf505fb9daea824e52801b19766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 1 Apr 2019 23:53:28 +0700 Subject: [PATCH 1795/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index d0e3a6088..68aae918f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core +* [utils] Improve int_or_none and float_or_none (#20403) +* Check for valid --min-sleep-interval when --max-sleep-interval is specified + (#20435) + +Extractors ++ [weibo] Extend URL regular expression (#20496) ++ [xhamster] Add support for xhamster.one (#20508) ++ [mediasite] Add support for catalogs (#20507) ++ [teamtreehouse] Add support for teamtreehouse.com (#9836) ++ [ina] Add support for audio URLs +* [ina] Improve extraction +* [cwtv] Fix episode number extraction (#20461) +* [npo] Improve DRM detection ++ [pornhub] Add support for DASH formats (#20403) +* [svtplay] Update API endpoint (#20430) + + version 2019.03.18 Core From 38287d251dfb6e3541abb2d75aafff248dcf0b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 1 Apr 2019 23:55:17 +0700 Subject: [PATCH 1796/2417] release 2019.04.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0decf19a1..cc68279d0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.18*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.01*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.03.18 +[debug] youtube-dl version 2019.04.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 68aae918f..3a0b6634f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.04.01 Core * [utils] Improve int_or_none and float_or_none (#20403) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3d4447a8..12df319eb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -488,6 +488,7 @@ - **Medialaan** - **Mediaset** - **Mediasite** + - **MediasiteCatalog** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 @@ -869,6 +870,7 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** + - **TeamTreeHouse** - **TechTalks** - **techtv.mit.edu** - **ted** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5e86bc4d5..f872cd137 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.18' +__version__ = '2019.04.01' From efee62ac7f9f876d09a2ea9a6a3655780c165e31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Apr 2019 01:13:52 +0700 Subject: [PATCH 1797/2417] [mediasite] Add support for dashed ids and named catalogs (closes #20531) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mediasite.py | 33 +++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a1a0f9cd5..124764e2b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -635,6 +635,7 @@ from .mediaset import MediasetIE from .mediasite import ( MediasiteIE, MediasiteCatalogIE, + MediasiteNamedCatalogIE, ) from .medici import MediciIE from .megaphone import MegaphoneIE diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py index 5c9e49d90..694a264d6 100644 --- a/youtube_dl/extractor/mediasite.py +++ b/youtube_dl/extractor/mediasite.py @@ -22,7 +22,7 @@ from ..utils import ( ) -_ID_RE = r'[0-9a-f]{32,34}' +_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' class MediasiteIE(InfoExtractor): @@ -98,6 +98,11 @@ class MediasiteIE(InfoExtractor): 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d', 'only_matching': True, }, + { + # dashed id + 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d', + 'only_matching': True, + } ] # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) @@ -264,6 +269,10 @@ class MediasiteCatalogIE(InfoExtractor): }, { 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521', 'only_matching': True, + }, { + # dashed id + 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e', + 'only_matching': True, }] def _real_extract(self, url): @@ -333,3 +342,25 @@ class MediasiteCatalogIE(InfoExtractor): catalog, lambda x: x['CurrentFolder']['Name'], compat_str) return self.playlist_result(entries, catalog_id, title,) + + +class MediasiteNamedCatalogIE(InfoExtractor): + _VALID_URL = r'(?xi)(?Phttps?://[^/]+/Mediasite)/Catalog/catalogs/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + mediasite_url = mobj.group('url') + catalog_name = mobj.group('catalog_name') + + webpage = self._download_webpage(url, catalog_name) + + catalog_id = self._search_regex( + r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id') + + return self.url_result( + '%s/Catalog/Full/%s' % (mediasite_url, catalog_id), + ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id) From c0b7d11713e57b75189a38b3732aa217f149e5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Apr 2019 01:29:44 +0700 Subject: [PATCH 1798/2417] [YoutubeDL] Add ffmpeg_location to post processor options (closes #20532) --- youtube_dl/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3b92acd97..57f52f888 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -309,6 +309,8 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, otherwise prefer ffmpeg. + ffmpeg_location: Location of the ffmpeg/avconv binary; either the path + to the binary or its containing directory. postprocessor_args: A list of additional command-line arguments for the postprocessor. From 313e8b2b18d6befc27ce145f49583d90ba8caee2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 Apr 2019 15:49:43 +0100 Subject: [PATCH 1799/2417] [gaia] add support for authentication(closes #14605) --- youtube_dl/extractor/gaia.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py index f2eef3f4c..e9527758f 100644 --- a/youtube_dl/extractor/gaia.py +++ b/youtube_dl/extractor/gaia.py @@ -4,12 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) from ..utils import ( + ExtractorError, int_or_none, str_or_none, strip_or_none, try_get, + urlencode_postdata, ) @@ -46,6 +51,29 @@ class GaiaIE(InfoExtractor): 'skip_download': True, }, }] + _NETRC_MACHINE = 'gaia' + _jwt = None + + def _real_initialize(self): + auth = self._get_cookies('https://www.gaia.com/').get('auth') + if auth: + auth = self._parse_json( + compat_urllib_parse_unquote(auth.value), + None, fatal=False) + if not auth: + username, password = self._get_login_info() + if username is None: + return + auth = self._download_json( + 'https://auth.gaia.com/v1/login', + None, data=urlencode_postdata({ + 'username': username, + 'password': password + })) + if auth.get('success') is False: + raise ExtractorError(', '.join(auth['messages']), expected=True) + if auth: + self._jwt = auth.get('jwt') def _real_extract(self, url): display_id, vtype = re.search(self._VALID_URL, url).groups() @@ -59,8 +87,12 @@ class GaiaIE(InfoExtractor): media_id = compat_str(vdata['nid']) title = node['title'] + headers = None + if self._jwt: + headers = {'Authorization': 'Bearer ' + self._jwt} media = self._download_json( - 'https://brooklyn.gaia.com/media/' + media_id, media_id) + 'https://brooklyn.gaia.com/media/' + media_id, + media_id, headers=headers) formats = self._extract_m3u8_formats( media['mediaUrls']['bcHLS'], media_id, 'mp4') self._sort_formats(formats) From f8987163fb20b53e10b65ab80fbfd7ed2bd115d5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 Apr 2019 22:40:39 +0100 Subject: [PATCH 1800/2417] [adobeconnect] Add new extractor(closes #20283) --- youtube_dl/extractor/adobeconnect.py | 37 ++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/adobeconnect.py diff --git a/youtube_dl/extractor/adobeconnect.py b/youtube_dl/extractor/adobeconnect.py new file mode 100644 index 000000000..728549eb9 --- /dev/null +++ b/youtube_dl/extractor/adobeconnect.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class AdobeConnectIE(InfoExtractor): + _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P[\w-]+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(.+?)', webpage, 'title') + qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) + is_live = qs.get('isLive', ['false'])[0] == 'true' + formats = [] + for con_string in qs['conStrings'][0].split(','): + formats.append({ + 'format_id': con_string.split('://')[0], + 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), + 'ext': 'flv', + 'play_path': 'mp4:' + qs['streamName'][0], + 'rtmp_conn': 'S:' + qs['ticket'][0], + 'rtmp_live': is_live, + 'url': con_string, + }) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'formats': formats, + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 124764e2b..254dbc946 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -20,6 +20,7 @@ from .acast import ( ) from .addanime import AddAnimeIE from .adn import ADNIE +from .adobeconnect import AdobeConnectIE from .adobetv import ( AdobeTVIE, AdobeTVShowIE, From d7d86fdd49389c0cddef13606b5a1c1109857fc3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 Apr 2019 22:41:23 +0100 Subject: [PATCH 1801/2417] [download/external] pass rtmp_conn to ffmpeg --- youtube_dl/downloader/external.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 5f73f7f0f..acdb27712 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -289,6 +289,7 @@ class FFmpegFD(ExternalFD): tc_url = info_dict.get('tc_url') flash_version = info_dict.get('flash_version') live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') if player_url is not None: args += ['-rtmp_swfverify', player_url] if page_url is not None: @@ -303,6 +304,11 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_flashver', flash_version] if live: args += ['-rtmp_live', 'live'] + if isinstance(conn, list): + for entry in conn: + args += ['-rtmp_conn', entry] + elif isinstance(conn, compat_str): + args += ['-rtmp_conn', conn] args += ['-i', url, '-c', 'copy'] From 4f7db46887986870f0e1d90dfff6bb29d8822b48 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 3 Apr 2019 01:00:02 +0100 Subject: [PATCH 1802/2417] [rtl2] improve _VALID_URL regex --- youtube_dl/extractor/rtl2.py | 45 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 18a327d81..cfaee7303 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -21,7 +21,7 @@ from ..utils import ( class RTL2IE(InfoExtractor): IE_NAME = 'rtl2' - _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P[^?#/]*?)(?:$|/(?:$|[?#]))' + _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P\d+)[^/]+/(?P\d+)-|folge/)(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'info_dict': { @@ -34,10 +34,11 @@ class RTL2IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'info_dict': { - 'id': '21040-anna-erwischt-alex', + 'id': 'anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' @@ -46,31 +47,29 @@ class RTL2IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }] def _real_extract(self, url): - # Some rtl2 urls have no slash at the end, so append it. - if not url.endswith('/'): - url += '/' + vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups() + if not vico_id: + webpage = self._download_webpage(url, display_id) - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - mobj = re.search( - r']+data-collection="(?P\d+)"[^>]+data-video="(?P\d+)"', - webpage) - if mobj: - vico_id = mobj.group('vico_id') - vivi_id = mobj.group('vivi_id') - else: - vico_id = self._html_search_regex( - r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') - vivi_id = self._html_search_regex( - r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') + mobj = re.search( + r'data-collection="(?P\d+)"[^>]+data-video="(?P\d+)"', + webpage) + if mobj: + vico_id = mobj.group('vico_id') + vivi_id = mobj.group('vivi_id') + else: + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info = self._download_json( - 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php', - video_id, query={ + 'https://service.rtl2.de/api-player-vipo/video.php', + display_id, query={ 'vico_id': vico_id, 'vivi_id': vivi_id, }) @@ -99,12 +98,12 @@ class RTL2IE(InfoExtractor): m3u8_url = video_info.get('streamurl_hls') if m3u8_url: - formats.extend(self._extract_akamai_formats(m3u8_url, video_id)) + formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) self._sort_formats(formats) return { - 'id': video_id, + 'id': display_id, 'title': title, 'thumbnail': video_info.get('image'), 'description': video_info.get('beschreibung'), From a2b6f946f17ba231131166703a8702dc52f7be62 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 3 Apr 2019 10:19:36 +0100 Subject: [PATCH 1803/2417] [newstube] fix extraction --- youtube_dl/extractor/newstube.py | 116 +++++++++++-------------------- 1 file changed, 41 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index e3f35f1d8..dab4aec44 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -1,12 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import base64 +import hashlib from .common import InfoExtractor +from ..aes import aes_cbc_decrypt from ..utils import ( - ExtractorError, + bytes_to_intlist, int_or_none, + intlist_to_bytes, + parse_codecs, + parse_duration, ) @@ -14,7 +19,7 @@ class NewstubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P.+)' _TEST = { 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', - 'md5': '801eef0c2a9f4089fa04e4fe3533abdc', + 'md5': '9d10320ad473444352f72f746ccb8b8c', 'info_dict': { 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'ext': 'mp4', @@ -25,84 +30,45 @@ class NewstubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - page = self._download_webpage(url, video_id, 'Downloading page') + page = self._download_webpage(url, video_id) + title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True) video_guid = self._html_search_regex( - r' Date: Wed, 3 Apr 2019 10:20:01 +0100 Subject: [PATCH 1804/2417] [rtl2] update player_url --- youtube_dl/extractor/rtl2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index cfaee7303..70f000ca8 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -88,7 +88,7 @@ class RTL2IE(InfoExtractor): 'format_id': 'rtmp', 'url': rtmp_url, 'play_path': stream_url, - 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', + 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', 'page_url': url, 'flash_version': 'LNX 11,2,202,429', 'rtmp_conn': rtmp_conn, From 220828f2d6c0147ddcf5c16ff856d8df56332eb7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 3 Apr 2019 11:08:42 +0100 Subject: [PATCH 1805/2417] [vk] use a more unique video id(closes #17848) --- youtube_dl/extractor/biqle.py | 2 +- youtube_dl/extractor/vk.py | 33 ++++++++++++++++++--------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index 4c5c6be10..3707dc97f 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -28,7 +28,7 @@ class BIQLEIE(InfoExtractor): 'url': 'http://biqle.org/watch/-44781847_168547604', 'md5': '7f24e72af1db0edf7c1aaba513174f97', 'info_dict': { - 'id': '168547604', + 'id': '-44781847_168547604', 'ext': 'mp4', 'title': 'Ребенок в шоке от автоматической мойки', 'timestamp': 1396633454, diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d1fe95654..39376e39f 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -103,7 +103,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'md5': '7babad3b85ea2e91948005b1b8b0cb84', 'info_dict': { - 'id': '162222515', + 'id': '-77521_162222515', 'ext': 'mp4', 'title': 'ProtivoGunz - Хуёвая песня', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', @@ -117,7 +117,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/video205387401_165548505', 'md5': '6c0aeb2e90396ba97035b9cbde548700', 'info_dict': { - 'id': '165548505', + 'id': '205387401_165548505', 'ext': 'mp4', 'title': 'No name', 'uploader': 'Tom Cruise', @@ -132,7 +132,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', 'info_dict': { - 'id': '162925554', + 'id': '32194266_162925554', 'ext': 'mp4', 'uploader': 'Vladimir Gavrin', 'title': 'Lin Dan', @@ -149,7 +149,7 @@ class VKIE(VKBaseIE): 'md5': 'a590bcaf3d543576c9bd162812387666', 'note': 'Only available for registered users', 'info_dict': { - 'id': '164049491', + 'id': '-8871596_164049491', 'ext': 'mp4', 'uploader': 'Триллеры', 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', @@ -163,7 +163,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 'md5': '4d7a5ef8cf114dfa09577e57b2993202', 'info_dict': { - 'id': '168067957', + 'id': '-43215063_168067957', 'ext': 'mp4', 'uploader': 'Киномания - лучшее из мира кино', 'title': ' ', @@ -177,7 +177,7 @@ class VKIE(VKBaseIE): 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', 'note': 'ivi.ru embed', 'info_dict': { - 'id': '60690', + 'id': '-43215063_169084319', 'ext': 'mp4', 'title': 'Книга Илая', 'duration': 6771, @@ -191,7 +191,7 @@ class VKIE(VKBaseIE): 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4', 'md5': '091287af5402239a1051c37ec7b92913', 'info_dict': { - 'id': '171201961', + 'id': '30481095_171201961', 'ext': 'mp4', 'title': 'ТюменцевВВ_09.07.2015', 'uploader': 'Anton Ivanov', @@ -206,10 +206,10 @@ class VKIE(VKBaseIE): 'url': 'https://vk.com/video276849682_170681728', 'info_dict': { 'id': 'V3K4mi0SYkc', - 'ext': 'webm', + 'ext': 'mp4', 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', - 'duration': 179, + 'duration': 178, 'upload_date': '20130116', 'uploader': "Children's Joy Foundation Inc.", 'uploader_id': 'thecjf', @@ -239,7 +239,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/video-110305615_171782105', 'md5': 'e13fcda136f99764872e739d13fac1d1', 'info_dict': { - 'id': '171782105', + 'id': '-110305615_171782105', 'ext': 'mp4', 'title': 'S-Dance, репетиции к The way show', 'uploader': 'THE WAY SHOW | 17 апреля', @@ -254,14 +254,17 @@ class VKIE(VKBaseIE): { # finished live stream, postlive_mp4 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', - 'md5': '90d22d051fccbbe9becfccc615be6791', 'info_dict': { - 'id': '456242764', + 'id': '-387766_456242764', 'ext': 'mp4', - 'title': 'ИгроМир 2016 — день 1', + 'title': 'ИгроМир 2016 День 1 — Игромания Утром', 'uploader': 'Игромания', 'duration': 5239, - 'view_count': int, + # TODO: use act=show to extract view_count + # 'view_count': int, + 'upload_date': '20160929', + 'uploader_id': '-387766', + 'timestamp': 1475137527, }, }, { @@ -465,7 +468,7 @@ class VKIE(VKBaseIE): self._sort_formats(formats) return { - 'id': compat_str(data.get('vid') or video_id), + 'id': video_id, 'formats': formats, 'title': title, 'thumbnail': data.get('jpg'), From b966740cf786d3755c4849b12c9559d4bfe8ec15 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Apr 2019 14:50:16 +0100 Subject: [PATCH 1806/2417] [adn] fix extraction and add support for positioning styles(closes #20549) --- youtube_dl/extractor/adn.py | 74 +++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 1eb99c39a..2eb4d1dc7 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -21,7 +21,6 @@ from ..utils import ( intlist_to_bytes, long_to_bytes, pkcs1pad, - srt_subtitles_timecode, strip_or_none, urljoin, ) @@ -42,6 +41,18 @@ class ADNIE(InfoExtractor): } _BASE_URL = 'http://animedigitalnetwork.fr' _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) + _POS_ALIGN_MAP = { + 'start': 1, + 'end': 3, + } + _LINE_ALIGN_MAP = { + 'middle': 8, + 'end': 4, + } + + @staticmethod + def _ass_subtitles_timecode(seconds): + return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) def _get_subtitles(self, sub_path, video_id): if not sub_path: @@ -49,14 +60,14 @@ class ADNIE(InfoExtractor): enc_subtitles = self._download_webpage( urljoin(self._BASE_URL, sub_path), - video_id, fatal=False) + video_id, 'Downloading subtitles data', fatal=False) if not enc_subtitles: return None # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), - bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')), + bytes_to_intlist(binascii.unhexlify(self._K + '083db5aebd9353b4')), bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( @@ -67,23 +78,27 @@ class ADNIE(InfoExtractor): subtitles = {} for sub_lang, sub in subtitles_json.items(): - srt = '' - for num, current in enumerate(sub): - start, end, text = ( + ssa = '''[Script Info] +ScriptType:V4.00 +[V4 Styles] +Format:Name,Fontname,Fontsize,PrimaryColour,Bold,BorderStyle,Outline,Alignment,MarginL,MarginR,MarginV +Style:Default,Arial,18,16777215,-1,1,1,2,20,20,20 +[Events] +Format:Marked,Start,End,Style,Text''' + for current in sub: + start, end, text, line_align, position_align = ( float_or_none(current.get('startTime')), float_or_none(current.get('endTime')), - current.get('text')) + current.get('text'), current.get('lineAlign'), + current.get('positionAlign')) if start is None or end is None or text is None: continue - srt += os.linesep.join( - ( - '%d' % num, - '%s --> %s' % ( - srt_subtitles_timecode(start), - srt_subtitles_timecode(end)), - text, - os.linesep, - )) + alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) + ssa += os.linesep + 'Dialogue:Marked=0,%s,%s,Default,%s%s' % ( + self._ass_subtitles_timecode(start), + self._ass_subtitles_timecode(end), + '{\\a%d}' % alignment if alignment != 2 else '', + text.replace('\n', '\\N').replace('', '{\\i1}').replace('', '{\\i0}')) if sub_lang == 'vostf': sub_lang = 'fr' @@ -91,8 +106,8 @@ class ADNIE(InfoExtractor): 'ext': 'json', 'data': json.dumps(sub), }, { - 'ext': 'srt', - 'data': srt, + 'ext': 'ssa', + 'data': ssa, }]) return subtitles @@ -100,7 +115,15 @@ class ADNIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_config = self._parse_json(self._search_regex( - r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) + r'playerConfig\s*=\s*({.+});', webpage, + 'player config', default='{}'), video_id, fatal=False) + if not player_config: + config_url = urljoin(self._BASE_URL, self._search_regex( + r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', + webpage, 'config url')) + player_config = self._download_json( + config_url, video_id, + 'Downloading player config JSON metadata')['player'] video_info = {} video_info_str = self._search_regex( @@ -129,12 +152,15 @@ class ADNIE(InfoExtractor): encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() links_data = self._download_json( - urljoin(self._BASE_URL, links_url), video_id, headers={ + urljoin(self._BASE_URL, links_url), video_id, + 'Downloading links JSON metadata', headers={ 'Authorization': 'Bearer ' + authorization, }) links = links_data.get('links') or {} metas = metas or links_data.get('meta') or {} - sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token + sub_path = sub_path or links_data.get('subtitles') or \ + 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id + sub_path += '&token=' + token error = links_data.get('error') title = metas.get('title') or video_info['title'] @@ -142,9 +168,11 @@ class ADNIE(InfoExtractor): for format_id, qualities in links.items(): if not isinstance(qualities, dict): continue - for load_balancer_url in qualities.values(): + for quality, load_balancer_url in qualities.items(): load_balancer_data = self._download_json( - load_balancer_url, video_id, fatal=False) or {} + load_balancer_url, video_id, + 'Downloading %s %s JSON metadata' % (format_id, quality), + fatal=False) or {} m3u8_url = load_balancer_data.get('location') if not m3u8_url: continue From 2bbde1d09afb2225fb7bd245bcf77a0715a58f29 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Apr 2019 17:59:20 +0100 Subject: [PATCH 1807/2417] [adn] fix subtitle compatibility with ffmpeg --- youtube_dl/extractor/adn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 2eb4d1dc7..1e04a55a6 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -81,10 +81,10 @@ class ADNIE(InfoExtractor): ssa = '''[Script Info] ScriptType:V4.00 [V4 Styles] -Format:Name,Fontname,Fontsize,PrimaryColour,Bold,BorderStyle,Outline,Alignment,MarginL,MarginR,MarginV -Style:Default,Arial,18,16777215,-1,1,1,2,20,20,20 +Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding +Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0 [Events] -Format:Marked,Start,End,Style,Text''' +Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for current in sub: start, end, text, line_align, position_align = ( float_or_none(current.get('startTime')), @@ -94,7 +94,7 @@ Format:Marked,Start,End,Style,Text''' if start is None or end is None or text is None: continue alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) - ssa += os.linesep + 'Dialogue:Marked=0,%s,%s,Default,%s%s' % ( + ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( self._ass_subtitles_timecode(start), self._ass_subtitles_timecode(end), '{\\a%d}' % alignment if alignment != 2 else '', From 69e6efac1669da68c0746419657160311cde2671 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 08:26:04 +0100 Subject: [PATCH 1808/2417] [teamcoco] fix extraction and add suport for subdomains(closes #17099)(closes #20339) --- youtube_dl/extractor/teamcoco.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 73469cc5d..7640cf00a 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -16,7 +16,7 @@ from ..utils import ( class TeamcocoIE(TurnerBaseIE): - _VALID_URL = r'https?://teamcoco\.com/(?P([^/]+/)*[^/?#]+)' + _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P([^/]+/)*[^/?#]+)' _TESTS = [ { 'url': 'http://teamcoco.com/video/mary-kay-remote', @@ -79,15 +79,20 @@ class TeamcocoIE(TurnerBaseIE): }, { 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv', 'only_matching': True, + }, { + 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft', + 'only_matching': True, } ] def _graphql_call(self, query_template, object_type, object_id): find_object = 'find' + object_type return self._download_json( - 'http://teamcoco.com/graphql/', object_id, data=json.dumps({ + 'https://teamcoco.com/graphql', object_id, data=json.dumps({ 'query': query_template % (find_object, object_id) - }))['data'][find_object] + }).encode(), headers={ + 'Content-Type': 'application/json', + })['data'][find_object] def _real_extract(self, url): display_id = self._match_id(url) @@ -145,7 +150,12 @@ class TeamcocoIE(TurnerBaseIE): 'accessTokenType': 'jws', })) else: - video_sources = self._graphql_call('''{ + d = self._download_json( + 'https://teamcoco.com/_truman/d/' + video_id, + video_id, fatal=False) or {} + video_sources = d.get('meta') or {} + if not video_sources: + video_sources = self._graphql_call('''{ %s(id: "%s") { src } From afb74964162eaee64c5c9b72990837daae945fec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 11:45:49 +0100 Subject: [PATCH 1809/2417] [adultswim] fix extraction(closes #18025) --- youtube_dl/extractor/adultswim.py | 194 ++++++++++++++++++------------ 1 file changed, 118 insertions(+), 76 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 88c96a950..8d1d9ac7d 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -1,13 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .turner import TurnerBaseIE from ..utils import ( + determine_ext, + float_or_none, int_or_none, + mimetype2ext, + parse_age_limit, + parse_iso8601, strip_or_none, - url_or_none, + try_get, ) @@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Rick and Morty - Pilot', 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', - 'timestamp': 1493267400, - 'upload_date': '20170427', + 'timestamp': 1543294800, + 'upload_date': '20181127', }, 'params': { # m3u8 download @@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }, { 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'info_dict': { @@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE): }, { 'url': 'http://www.adultswim.com/videos/attack-on-titan', 'info_dict': { - 'id': 'b7A69dzfRzuaXIECdxW8XQ', + 'id': 'attack-on-titan', 'title': 'Attack on Titan', - 'description': 'md5:6c8e003ea0777b47013e894767f5e114', + 'description': 'md5:41caa9416906d90711e31dc00cb7db7e', }, 'playlist_mincount': 12, }, { @@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }] def _real_extract(self, url): show_path, episode_path = re.match(self._VALID_URL, url).groups() display_id = episode_path or show_path - webpage = self._download_webpage(url, display_id) - initial_data = self._parse_json(self._search_regex( - r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});', - webpage, 'initial data'), display_id) - - is_stream = show_path == 'streams' - if is_stream: - if not episode_path: - episode_path = 'live-stream' - - video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path) - video_id = video_data.get('stream') - - if not video_id: - entries = [] - for episode in video_data.get('archiveEpisodes', []): - episode_url = url_or_none(episode.get('url')) - if not episode_url: - continue - entries.append(self.url_result( - episode_url, 'AdultSwim', episode.get('id'))) - return self.playlist_result( - entries, video_data.get('id'), video_data.get('title'), - strip_or_none(video_data.get('description'))) + query = '''query { + getShowBySlug(slug:"%s") { + %%s + } +}''' % show_path + if episode_path: + query = query % '''title + getVideoBySlug(slug:"%s") { + _id + auth + description + duration + episodeNumber + launchDate + mediaID + seasonNumber + poster + title + tvRating + }''' % episode_path + ['getVideoBySlug'] else: - show_data = initial_data['show'] + query = query % '''metaDescription + title + videos(first:1000,sort:["episode_number"]) { + edges { + node { + _id + slug + } + } + }''' + show_data = self._download_json( + 'https://www.adultswim.com/api/search', display_id, + data=json.dumps({'query': query}).encode(), + headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] + if episode_path: + video_data = show_data['getVideoBySlug'] + video_id = video_data['_id'] + episode_title = title = video_data['title'] + series = show_data.get('title') + if series: + title = '%s - %s' % (series, title) + info = { + 'id': video_id, + 'title': title, + 'description': strip_or_none(video_data.get('description')), + 'duration': float_or_none(video_data.get('duration')), + 'formats': [], + 'subtitles': {}, + 'age_limit': parse_age_limit(video_data.get('tvRating')), + 'thumbnail': video_data.get('poster'), + 'timestamp': parse_iso8601(video_data.get('launchDate')), + 'series': series, + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode': episode_title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + } - if not episode_path: - entries = [] - for video in show_data.get('videos', []): - slug = video.get('slug') - if not slug: + auth = video_data.get('auth') + media_id = video_data.get('mediaID') + if media_id: + info.update(self._extract_ngtv_info(media_id, { + # CDN_TOKEN_APP_ID from: + # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js + 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': auth, + })) + + if not auth: + extract_data = self._download_json( + 'https://www.adultswim.com/api/shows/v1/videos/' + video_id, + video_id, query={'fields': 'stream'}, fatal=False) or {} + assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] + for asset in assets: + asset_url = asset.get('url') + if not asset_url: continue - entries.append(self.url_result( - 'http://adultswim.com/videos/%s/%s' % (show_path, slug), - 'AdultSwim', video.get('id'))) - return self.playlist_result( - entries, show_data.get('id'), show_data.get('title'), - strip_or_none(show_data.get('metadata', {}).get('description'))) + ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) + if ext == 'm3u8': + info['formats'].extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + continue + # info['formats'].extend(self._extract_f4m_formats( + # asset_url, video_id, f4m_id='hds', fatal=False)) + elif ext in ('scc', 'ttml', 'vtt'): + info['subtitles'].setdefault('en', []).append({ + 'url': asset_url, + }) + self._sort_formats(info['formats']) - video_data = show_data['sluggedVideo'] - video_id = video_data['id'] - - info = self._extract_cvp_info( - 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id, - video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', - 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', - }, - }, { - 'url': url, - 'site_name': 'AdultSwim', - 'auth_required': video_data.get('auth'), - }) - - info.update({ - 'id': video_id, - 'display_id': display_id, - 'description': info.get('description') or strip_or_none(video_data.get('description')), - }) - if not is_stream: - info.update({ - 'duration': info.get('duration') or int_or_none(video_data.get('duration')), - 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')), - 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')), - 'episode': info['title'], - 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')), - }) - - info['series'] = video_data.get('collection_title') or info.get('series') - if info['series'] and info['series'] != info['title']: - info['title'] = '%s - %s' % (info['series'], info['title']) - - return info + return info + else: + entries = [] + for edge in show_data.get('videos', {}).get('edges', []): + video = edge.get('node') or {} + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('_id'))) + return self.playlist_result( + entries, show_path, show_data.get('title'), + strip_or_none(show_data.get('metaDescription'))) From 19041a38773b1022480d50587da7db4a0e6fa869 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 16:18:57 +0100 Subject: [PATCH 1810/2417] [youtube] extract srv[1-3] subtitle formats(#20566) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 886fc1591..132572c88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -484,7 +484,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False From a7978f8e2acc8c34989b7f289a16180e71902193 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 18:08:43 +0100 Subject: [PATCH 1811/2417] [hbo] fix extraction and extract subtitles(closes #14629)(closes #13709) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/hbo.py | 102 +++++++++++------------------ 2 files changed, 39 insertions(+), 68 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 254dbc946..c1c5d1953 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -441,10 +441,7 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import ( - HBOIE, - HBOEpisodeIE, -) +from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 859ad5429..44440233d 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -4,16 +4,28 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( xpath_text, xpath_element, int_or_none, parse_duration, + urljoin, ) -class HBOBaseIE(InfoExtractor): +class HBOIE(InfoExtractor): + IE_NAME = 'hbo' + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer', + 'md5': '8126210656f433c452a21367f9ad85b3', + 'info_dict': { + 'id': '22113301', + 'ext': 'mp4', + 'title': 'Game of Thrones - Trailer', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + } _FORMATS_INFO = { 'pro7': { 'width': 1280, @@ -53,10 +65,17 @@ class HBOBaseIE(InfoExtractor): }, } - def _extract_from_id(self, video_id): - video_data = self._download_xml( - 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) - title = xpath_text(video_data, 'title', 'title', True) + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + location_path = self._parse_json(self._html_search_regex( + r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl'] + video_data = self._download_xml(urljoin(url, location_path), display_id) + video_id = xpath_text(video_data, 'id', fatal=True) + episode_title = title = xpath_text(video_data, 'title', fatal=True) + series = xpath_text(video_data, 'program') + if series: + title = '%s - %s' % (series, title) formats = [] for source in xpath_element(video_data, 'videos', 'sources', True): @@ -128,68 +147,23 @@ class HBOBaseIE(InfoExtractor): 'width': width, }) + subtitles = None + caption_url = xpath_text(video_data, 'captionUrl') + if caption_url: + subtitles = { + 'en': [{ + 'url': caption_url, + 'ext': 'ttml' + }], + } + return { 'id': video_id, 'title': title, 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), + 'series': series, + 'episode': episode_title, 'formats': formats, 'thumbnails': thumbnails, + 'subtitles': subtitles, } - - -class HBOIE(HBOBaseIE): - IE_NAME = 'hbo' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 1072, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_from_id(video_id) - - -class HBOEpisodeIE(HBOBaseIE): - IE_NAME = 'hbo:episode' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P[0-9a-z-]+))(?:\.html)?' - - _TESTS = [{ - 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', - 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb', - 'info_dict': { - 'id': '1439518', - 'display_id': 'ep-52-inside-the-episode', - 'ext': 'mp4', - 'title': 'Ep. 52: Inside the Episode', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 240, - }, - }, { - 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver', - 'only_matching': True, - }] - - def _real_extract(self, url): - path, display_id = re.match(self._VALID_URL, url).groups() - - content = self._download_json( - 'http://www.hbo.com/api/content/' + path, display_id)['content'] - - video_id = compat_str((content.get('parsed', {}).get( - 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId']) - - info_dict = self._extract_from_id(video_id) - info_dict['display_id'] = display_id - - return info_dict From 4810655cd65f9bdde1ad240adf7334828243fb0a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Apr 2019 19:35:35 +0100 Subject: [PATCH 1812/2417] [bfi:player] Add new extractor(#19235) --- youtube_dl/extractor/bfi.py | 37 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/bfi.py diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py new file mode 100644 index 000000000..60c8944b5 --- /dev/null +++ b/youtube_dl/extractor/bfi.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class BFIPlayerIE(InfoExtractor): + IE_NAME = 'bfi:player' + _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P[\w-]+)-online' + _TEST = { + 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', + 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', + 'info_dict': { + 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', + 'ext': 'mp4', + 'title': 'Computer Doctor', + 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', + }, + 'skip': 'BFI Player films cannot be played outside of the UK', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + entries = [] + for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): + player_attr = extract_attributes(player_el) + ooyala_id = player_attr.get('data-video-id') + if not ooyala_id: + continue + entries.append(self.url_result( + 'ooyala:' + ooyala_id, 'Ooyala', + ooyala_id, player_attr.get('data-label'))) + return self.playlist_result(entries) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c1c5d1953..01119f2bb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -107,6 +107,7 @@ from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE from .beatport import BeatportIE from .bet import BetIE +from .bfi import BFIPlayerIE from .bigflix import BigflixIE from .bild import BildIE from .bilibili import ( From 9f182c23ba74f76ff716940b2a0568f428d6c2b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Apr 2019 09:22:25 +0100 Subject: [PATCH 1813/2417] [vrv] add basic support for individual movie links(#19229) --- youtube_dl/extractor/vrv.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 6c060ae76..c11da97de 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -150,9 +150,10 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + object_data = self._call_cms(self._get_cms_resource( + 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] + resource_path = object_data['__links__']['resource']['href'] + video_data = self._call_cms(resource_path, video_id, 'video') title = video_data['title'] streams_path = video_data['__links__'].get('streams', {}).get('href') From b9aad6c427ea083143d1e53c0f087ad508bd706a Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Sat, 9 Feb 2019 10:15:53 +0100 Subject: [PATCH 1814/2417] [dvtv] Fix extraction (closes #18514) --- youtube_dl/extractor/dvtv.py | 120 +++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index 20996962a..bc68d07d1 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -10,7 +10,9 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + try_get, unescapeHTML, + parse_iso8601, ) @@ -28,6 +30,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', 'duration': 1484, + 'upload_date': '20141217', + 'timestamp': 1418792400, } }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', @@ -84,6 +88,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta', 'duration': 1103, + 'upload_date': '20170511', + 'timestamp': 1494514200, }, 'params': { 'skip_download': True, @@ -91,43 +97,62 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'only_matching': True, + }, { + # Test live stream video (liveStarter) parsing + 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/', + 'md5': '2e552e483f2414851ca50467054f9d5d', + 'info_dict': { + 'id': '8d116360288011e98c840cc47ab5f122', + 'ext': 'mp4', + 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu', + 'upload_date': '20190204', + 'timestamp': 1549289591, + }, + 'params': { + # Video content is no longer available + 'skip_download': True, + }, }] - def _parse_video_metadata(self, js, video_id, live_js=None): + def _parse_video_metadata(self, js, video_id, timestamp): + data = self._parse_json(js, video_id, transform_source=js_to_json) - if live_js: - data.update(self._parse_json( - live_js, video_id, transform_source=js_to_json)) + + live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) + if live_starter: + data.update(live_starter) title = unescapeHTML(data['title']) formats = [] - for video in data['sources']: - video_url = video.get('file') - if not video_url: - continue - video_type = video.get('type') - ext = determine_ext(video_url, mimetype2ext(video_type)) - if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif video_type == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - else: - label = video.get('label') - height = self._search_regex( - r'^(\d+)[pP]', label or '', 'height', default=None) - format_id = ['http'] - for f in (ext, label): - if f: - format_id.append(f) - formats.append({ - 'url': video_url, - 'format_id': '-'.join(format_id), - 'height': int_or_none(height), - }) + + for tracks in data.get('tracks', {}).values(): + for video in tracks: + video_url = video.get('src') + if not video_url: + continue + video_type = video.get('type') + ext = determine_ext(video_url, mimetype2ext(video_type)) + if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif video_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + else: + label = video.get('label') + height = self._search_regex( + r'^(\d+)[pP]', label or '', 'height', default=None) + format_id = ['http'] + for f in (ext, label): + if f: + format_id.append(f) + formats.append({ + 'url': video_url, + 'format_id': '-'.join(format_id), + 'height': int_or_none(height), + }) self._sort_formats(formats) return { @@ -136,7 +161,7 @@ class DVTVIE(InfoExtractor): 'description': data.get('description'), 'thumbnail': data.get('image'), 'duration': int_or_none(data.get('duration')), - 'timestamp': int_or_none(data.get('pubtime')), + 'timestamp': int_or_none(timestamp), 'formats': formats } @@ -145,32 +170,33 @@ class DVTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - # live content - live_item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - # single video - item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - if item: - return self._parse_video_metadata(item, video_id, live_item) + timestamp = parse_iso8601(self._html_search_meta( + 'article:published_time', webpage, 'published time', default=None)) # playlist items = re.findall( - r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);", + r"(?s)playlist\.push\(({.+?})\);", webpage) - if not items: - items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage) if items: return { '_type': 'playlist', 'id': video_id, 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id) for i in items] + 'entries': [self._parse_video_metadata(i, video_id, timestamp) for i in items] } + # single video + item = self._search_regex( + r'(?s)BBXPlayer.setup\((.+?)\);', + webpage, 'video', default=None) + + if item: + # remove function calls (ex. htmldeentitize) + # TODO this should be fixed in a general way in the js_to_json + item = re.sub(r'\w+?\((.+)\)', r'\1', item) + + if item: + return self._parse_video_metadata(item, video_id, timestamp) + raise ExtractorError('Could not find neither video nor playlist') From 19591facea9abe965a734224bea33948bb57b5f4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Apr 2019 16:29:30 +0100 Subject: [PATCH 1815/2417] [dvtv] remove unnecessary comments and spaces --- youtube_dl/extractor/dvtv.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index bc68d07d1..de7f6d670 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -19,9 +19,7 @@ from ..utils import ( class DVTVIE(InfoExtractor): IE_NAME = 'dvtv' IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P[0-9a-f]{32})' - _TESTS = [{ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'md5': '67cb83e4a955d36e1b5d31993134a0c2', @@ -36,7 +34,7 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { - 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'id': '973eb3bc854e11e498be002590604f2e', }, 'playlist': [{ @@ -115,17 +113,14 @@ class DVTVIE(InfoExtractor): }] def _parse_video_metadata(self, js, video_id, timestamp): - data = self._parse_json(js, video_id, transform_source=js_to_json) + title = unescapeHTML(data['title']) live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) if live_starter: data.update(live_starter) - title = unescapeHTML(data['title']) - formats = [] - for tracks in data.get('tracks', {}).values(): for video in tracks: video_url = video.get('src') @@ -167,36 +162,23 @@ class DVTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - timestamp = parse_iso8601(self._html_search_meta( 'article:published_time', webpage, 'published time', default=None)) - # playlist - items = re.findall( - r"(?s)playlist\.push\(({.+?})\);", - webpage) - + items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) if items: - return { - '_type': 'playlist', - 'id': video_id, - 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id, timestamp) for i in items] - } + return self.playlist_result( + [self._parse_video_metadata(i, video_id, timestamp) for i in items], + video_id, self._html_search_meta('twitter:title', webpage)) - # single video item = self._search_regex( - r'(?s)BBXPlayer.setup\((.+?)\);', + r'(?s)BBXPlayer\.setup\((.+?)\);', webpage, 'video', default=None) - if item: # remove function calls (ex. htmldeentitize) # TODO this should be fixed in a general way in the js_to_json item = re.sub(r'\w+?\((.+)\)', r'\1', item) - - if item: return self._parse_video_metadata(item, video_id, timestamp) raise ExtractorError('Could not find neither video nor playlist') From c701472fc99c90f6ef4fdf0f0169a342f2e9e892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:15:25 +0700 Subject: [PATCH 1816/2417] [platzi] Add extractor (closes #20562) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/platzi.py | 217 +++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 youtube_dl/extractor/platzi.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 01119f2bb..41ab36213 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -868,6 +868,10 @@ from .picarto import ( from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py new file mode 100644 index 000000000..557b2b5ad --- /dev/null +++ b/youtube_dl/extractor/platzi.py @@ -0,0 +1,217 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_or_none, + try_get, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class PlatziIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P\d+)-[^/?\#&]+ + ''' + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in compat_str(urlh.geturl()): + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) From 059cd768b97188fbbf3262696e6511f3aa1795e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:17:54 +0700 Subject: [PATCH 1817/2417] [vk] Remove unused import --- youtube_dl/extractor/vk.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 39376e39f..1072550f1 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -6,10 +6,7 @@ import re import sys from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( clean_html, ExtractorError, From f412970164c349bea81bfd9d95e56fec7d16c8a1 Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Sun, 7 Apr 2019 02:28:31 +0700 Subject: [PATCH 1818/2417] [README.md] Fix lists formatting (closes #20558) Lists have to be separated from the previous paragraph by a blank line in certain variants of Markdown, otherwise they are not interpreted as lists. This change ensures that that the youtube-dl.1 man page, which is generated from README.md with the help of pandoc, is formatted correctly. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e476045b2..92c3a92a1 100644 --- a/README.md +++ b/README.md @@ -642,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can also use special names to select particular edge case formats: + - `best`: Select the best quality format represented by a single file with video and audio. - `worst`: Select the worst quality format represented by a single file with video and audio. - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. @@ -658,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + - `filesize`: The number of bytes, if known in advance - `width`: Width of the video, if known - `height`: Height of the video, if known @@ -668,6 +670,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `fps`: Frame rate Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: + - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use From f4da80803619aea52bb116f6191f78e1bd77d2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 02:58:40 +0700 Subject: [PATCH 1819/2417] [xvideos] Extract all thumbnails (closes #20432) --- youtube_dl/extractor/xvideos.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index ec2d913fc..166bcf443 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -57,10 +57,17 @@ class XVideosIE(InfoExtractor): webpage, 'title', default=None, group='title') or self._og_search_title(webpage) - thumbnail = self._search_regex( - (r'setThumbUrl\(\s*(["\'])(?P(?:(?!\1).)+)\1', - r'url_bigthumb=(?P.+?)&'), - webpage, 'thumbnail', fatal=False, group='thumbnail') + thumbnails = [] + for preference, thumbnail in enumerate(('', '169')): + thumbnail_url = self._search_regex( + r'setThumbUrl%s\(\s*(["\'])(?P(?:(?!\1).)+)\1' % thumbnail, + webpage, 'thumbnail', default=None, group='thumbnail') + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'preference': preference, + }) + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( @@ -98,6 +105,6 @@ class XVideosIE(InfoExtractor): 'formats': formats, 'title': title, 'duration': duration, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'age_limit': 18, } From 8410653f24ae73e4bc52c11bc0ec595e42567038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 03:18:10 +0700 Subject: [PATCH 1820/2417] [ruutu] Add support for audio podcasts (closes #20473, closes #20545) --- youtube_dl/extractor/ruutu.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index f530f0083..f05401b36 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -59,6 +59,20 @@ class RuutuIE(InfoExtractor): 'url': 'http://www.ruutu.fi/video/3193728', 'only_matching': True, }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': ['HTTP Error 502: Bad Gateway'], + } ] def _real_extract(self, url): @@ -94,6 +108,12 @@ class RuutuIE(InfoExtractor): continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': From aa5338118e77380c753b397c6f544fcfdef1cb22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 04:16:45 +0700 Subject: [PATCH 1821/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3a0b6634f..41adb4204 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version + +Core ++ [downloader/external] Pass rtmp_conn to ffmpeg + +Extractors ++ [ruutu] Add support for audio podcasts (#20473, #20545) ++ [xvideos] Extract all thumbnails (#20432) ++ [platzi] Add support for platzi.com (#20562) +* [dvtv] Fix extraction (#18514, #19174) ++ [vrv] Add basic support for individual movie links (#19229) ++ [bfi:player] Add support for player.bfi.org.uk (#19235) +* [hbo] Fix extraction and extract subtitles (#14629, #13709) +* [youtube] Extract srv[1-3] subtitle formats (#20566) +* [adultswim] Fix extraction (#18025) +* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339) +* [adn] Fix subtitle compatibility with ffmpeg +* [adn] Fix extraction and add support for positioning styles (#20549) +* [vk] Use unique video id (#17848) +* [newstube] Fix extraction +* [rtl2] Actualize extraction ++ [adobeconnect] Add support for adobeconnect.com (#20283) ++ [gaia] Add support for authentication (#14605) ++ [mediasite] Add support for dashed ids and named catalogs (#20531) + + version 2019.04.01 Core From a46d9e5b41bcfdf23038fa9b36aec5d4ba9c6de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Apr 2019 04:19:46 +0700 Subject: [PATCH 1822/2417] release 2019.04.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 6 +++++- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index cc68279d0..5469c73cf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.01*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.07*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.04.01 +[debug] youtube-dl version 2019.04.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 41adb4204..421f247fd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.04.07 Core + [downloader/external] Pass rtmp_conn to ffmpeg diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 12df319eb..df272c479 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **acast:channel** - **AddAnime** - **ADN**: Anime Digital Network + - **AdobeConnect** - **AdobeTV** - **AdobeTVChannel** - **AdobeTVShow** @@ -101,6 +102,7 @@ - **Bellator** - **BellMedia** - **Bet** + - **bfi:player** - **Bigflix** - **Bild**: Bild.de - **BiliBili** @@ -345,7 +347,6 @@ - **Groupon** - **Hark** - **hbo** - - **hbo:episode** - **HearThisAt** - **Heise** - **HellPorno** @@ -489,6 +490,7 @@ - **Mediaset** - **Mediasite** - **MediasiteCatalog** + - **MediasiteNamedCatalog** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 @@ -671,6 +673,8 @@ - **Piksel** - **Pinkbike** - **Pladform** + - **Platzi** + - **PlatziCourse** - **play.fm** - **PlayPlusTV** - **PlaysTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f872cd137..5c7d550f5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.04.01' +__version__ = '2019.04.07' From bf6fb8b9dc921a30df24d9789d5bbb0ac5b370b0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:38:40 -0400 Subject: [PATCH 1823/2417] [openload] add tests --- youtube_dl/extractor/openload.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 25b3bfdbd..130165b8c 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -350,6 +350,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.space/f/IY4eZSst3u8/', 'only_matching': True, + }, { + 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', + 'only_matching': True, + }, { + 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 9ed06812ec8da6f1364acd00261935c334994e62 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 6 Apr 2019 23:59:41 -0400 Subject: [PATCH 1824/2417] [streamango] add support for streamcherry.com --- youtube_dl/extractor/streamango.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py index efb259f96..f1e17dd88 100644 --- a/youtube_dl/extractor/streamango.py +++ b/youtube_dl/extractor/streamango.py @@ -14,7 +14,7 @@ from ..utils import ( class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'md5': 'e992787515a182f55e38fc97588d802a', @@ -41,6 +41,9 @@ class StreamangoIE(InfoExtractor): }, { 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', 'only_matching': True, + }, { + 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', + 'only_matching': True, }] def _real_extract(self, url): From d562cac9dc67bfa2306c6225c261390162527d9e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Apr 2019 12:39:48 +0100 Subject: [PATCH 1825/2417] [stv:player] Add new extractor(closes #20586) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/stv.py | 94 ++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 youtube_dl/extractor/stv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 41ab36213..cc19af5c4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1093,6 +1093,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stv import STVPlayerIE from .sunporno import SunPornoIE from .svt import ( SVTIE, diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py new file mode 100644 index 000000000..ccb074cd4 --- /dev/null +++ b/youtube_dl/extractor/stv.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse +) +from ..utils import ( + extract_attributes, + float_or_none, + int_or_none, + str_or_none, +) + + +class STVPlayerIE(InfoExtractor): + IE_NAME = 'stv:player' + _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' + _TEST = { + 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', + 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'info_dict': { + 'id': '6016487034001', + 'ext': 'mp4', + 'upload_date': '20190321', + 'title': 'Interview with the cast ahead of new Victoria', + 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', + 'timestamp': 1553179628, + 'uploader_id': '1486976045', + }, + 'skip': 'this resource is unavailable outside of the UK', + } + _PUBLISHER_ID = '1486976045' + _PTYPE_MAP = { + 'episode': 'episodes', + 'video': 'shortform', + } + + def _real_extract(self, url): + ptype, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( + r'itemprop="embedURL"[^>]+href="([^"]+)', + webpage, 'embed URL', default=None)).query) + publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + + player_attr = extract_attributes(self._search_regex( + r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + + info = {} + duration = ref_id = series = video_id = None + api_ref_id = player_attr.get('data-player-api-refid') + if api_ref_id: + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), + api_ref_id, fatal=False) + if resp: + result = resp.get('results') or {} + video = result.get('video') or {} + video_id = str_or_none(video.get('id')) + ref_id = video.get('guid') + duration = video.get('length') + programme = result.get('programme') or {} + series = programme.get('name') or programme.get('shortName') + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) + info.update({ + 'description': result.get('summary'), + 'subtitles': subtitles, + 'view_count': int_or_none(result.get('views')), + }) + if not video_id: + video_id = qs.get('videoId', [None])[0] or self._search_regex( + r' Date: Sun, 7 Apr 2019 21:05:50 +0700 Subject: [PATCH 1826/2417] [tiktok] Add support for new URL schema (closes #20573) --- youtube_dl/extractor/tiktok.py | 35 +++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index 083e9f36d..66088b9ab 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/v| + (?:www\.)?tiktok\.com/share/video + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'md5': 'd584b572e92fcd48888051f238022420', 'info_dict': { @@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, 'repost_count': int, } - } + }, { + 'url': 'https://www.tiktok.com/share/video/6606727368545406213', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://m.tiktok.com/v/%s.html' % video_id, video_id) data = self._parse_json(self._search_regex( r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) return self._extract_aweme(data) class TikTokUserIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/h5/share/usr| + (?:www\.)?tiktok\.com/share/user + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', 'info_dict': { 'id': '188294915489964032', }, 'playlist_mincount': 24, - } + }, { + 'url': 'https://www.tiktok.com/share/user/188294915489964032', + 'only_matching': True, + }] def _real_extract(self, url): user_id = self._match_id(url) From 9045d28b5e3eba03908a82dcb868bd1649650ddb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Apr 2019 21:31:01 +0100 Subject: [PATCH 1827/2417] [aol] restrict url regex and improve format extraction --- youtube_dl/extractor/aol.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index cb9279193..dffa9733d 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -12,12 +16,12 @@ from ..utils import ( class AolIE(InfoExtractor): - IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P[^/?#&]+)' + IE_NAME = 'aol.com' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.com/video/(?:[^/]+/)*)(?P[0-9a-f]+)' _TESTS = [{ # video with 5min ID - 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', + 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { 'id': '518167793', @@ -34,7 +38,7 @@ class AolIE(InfoExtractor): } }, { # video with vidible ID - 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', + 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', 'ext': 'mp4', @@ -49,17 +53,17 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { - 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, }, { - 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', - 'only_matching': True, - }, { - 'url': 'http://on.aol.com/video/519442220', + 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', 'only_matching': True, }, { 'url': 'aol-video:5707d6b8e4b090497b04f706', 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', + 'only_matching': True, }] def _real_extract(self, url): @@ -73,7 +77,7 @@ class AolIE(InfoExtractor): video_data = response['data'] formats = [] - m3u8_url = video_data.get('videoMasterPlaylist') + m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) if m3u8_url: formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) @@ -96,6 +100,12 @@ class AolIE(InfoExtractor): 'width': int(mobj.group(1)), 'height': int(mobj.group(2)), }) + else: + qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + f.update({ + 'width': int_or_none(qs.get('w', [None])[0]), + 'height': int_or_none(qs.get('h', [None])[0]), + }) formats.append(f) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) From 9c017253e83ac3dfd363566ccbb9fc63f4e2ac07 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 8 Apr 2019 16:34:03 +0100 Subject: [PATCH 1828/2417] [jwplatfom] do not match manifest URLs(#20596) --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index d19a6a774..647b905f1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', From 5ca3459828cc0d752f02dab3e9c02cca85185cbb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:20:26 +0100 Subject: [PATCH 1829/2417] [kaltura] sanitize embed URLs --- youtube_dl/extractor/kaltura.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index fdf7f5bbc..79162f665 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -145,6 +145,8 @@ class KalturaIE(InfoExtractor): ) if mobj: embed_info = mobj.groupdict() + for k, v in embed_info.items(): + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( From 4bc12b8f819cd0a393e5800d4dc2ecf24401e199 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:21:46 +0100 Subject: [PATCH 1830/2417] [dispeak] improve mp4 bitrate extraction --- youtube_dl/extractor/dispeak.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index c05f601e2..c345e0274 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor): stream_name = xpath_text(a_format, 'streamName', fatal=True) video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') url = video_root + video_path - vbr = xpath_text(a_format, 'bitrate') + bitrate = xpath_text(a_format, 'bitrate') + tbr = int_or_none(bitrate) + vbr = int_or_none(self._search_regex( + r'-(\d+)\.mp4', video_path, 'vbr', default=None)) + abr = tbr - vbr if tbr and vbr else None video_formats.append({ + 'format_id': bitrate, 'url': url, - 'vbr': int_or_none(vbr), + 'tbr': tbr, + 'vbr': vbr, + 'abr': abr, }) return video_formats From 118f7add3b9690884edb4dc887995f3815243c78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Apr 2019 11:23:47 +0100 Subject: [PATCH 1831/2417] [gdc] add support for kaltura embeds and update tests(closes #20575) --- youtube_dl/extractor/gdcvault.py | 96 ++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 8806dc48a..2f555c1d4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,22 +3,24 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .kaltura import KalturaIE from ..utils import ( HEADRequest, sanitized_Request, + smuggle_url, urlencode_postdata, ) class GDCVaultIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)/(?P(\w|-)+)?' + _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)(?:/(?P[\w-]+))?' _NETRC_MACHINE = 'gdcvault' _TESTS = [ { 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 'md5': '7ce8388f544c88b7ac11c7ab1b593704', 'info_dict': { - 'id': '1019721', + 'id': '201311826596_AWNY', 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'ext': 'mp4', 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' @@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor): { 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 'info_dict': { - 'id': '1015683', + 'id': '201203272_1330951438328RSXR', 'display_id': 'Embracing-the-Dark-Art-of', 'ext': 'flv', 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' @@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor): 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', 'info_dict': { - 'id': '1023460', + 'id': '840376_BQRC', 'ext': 'mp4', 'display_id': 'Tenacious-Design-and-The-Interface', 'title': 'Tenacious Design and The Interface of \'Destiny\'', @@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor): # Multiple audios 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', 'info_dict': { - 'id': '1014631', - 'ext': 'flv', + 'id': '12396_1299111843500GMPX', + 'ext': 'mp4', 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', }, - 'params': { - 'skip_download': True, # Requires rtmpdump - 'format': 'jp', # The japanese audio - } + # 'params': { + # 'skip_download': True, # Requires rtmpdump + # 'format': 'jp', # The japanese audio + # } }, { # gdc-player.html 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', 'info_dict': { - 'id': '1435', + 'id': '9350_1238021887562UHXB', 'display_id': 'An-American-engine-in-Tokyo', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', }, + }, + { + # Kaltura Embed + 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling', + 'info_dict': { + 'id': '0_h1fg8j3p', + 'ext': 'mp4', + 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)', + 'timestamp': 1554401811, + 'upload_date': '20190404', + 'uploader_id': 'joe@blazestreaming.com', + }, 'params': { - 'skip_download': True, # Requires rtmpdump + 'format': 'mp4-408', }, }, ] @@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor): return start_page def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - display_id = mobj.group('name') or video_id + video_id, name = re.match(self._VALID_URL, url).groups() + display_id = name or video_id webpage_url = 'http://www.gdcvault.com/play/' + video_id start_page = self._download_webpage(webpage_url, display_id) @@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor): start_page, 'url', default=None) if direct_url: title = self._html_search_regex( - r'Session Name\s*(.*?)', + r'Session Name:?\s*(.*?)', start_page, 'title') video_url = 'http://www.gdcvault.com' + direct_url # resolve the url so that we can detect the correct extension - head = self._request_webpage(HEADRequest(video_url), video_id) - video_url = head.geturl() + video_url = self._request_webpage( + HEADRequest(video_url), video_id).geturl() return { 'id': video_id, @@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor): 'title': title, } - PLAYER_REGEX = r'' - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root', default=None) - if xml_root is None: - # Probably need to authenticate - login_res = self._login(webpage_url, display_id) - if login_res is None: - self.report_warning('Could not login.') - else: - start_page = login_res - # Grab the url from the authenticated page - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root') + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') - xml_name = self._html_search_regex( - r'', + r'', + r'', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) From 427cc215310804127b55744fcc3664ede38a4a0d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 10 Jun 2019 15:17:26 +0100 Subject: [PATCH 1971/2417] [biqle] remove unnecessary regex group --- youtube_dl/extractor/biqle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index c5c374845..af21e3ee5 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -42,7 +42,7 @@ class BIQLEIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) embed_url = self._proto_relative_url(self._search_regex( - r'', + r'', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) From 8361e7f93489f226542517216b2127ff170ca996 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 12 Jun 2019 21:41:46 +0100 Subject: [PATCH 1972/2417] [toutv] update client key(closes #21370) --- youtube_dl/extractor/toutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 00f58a087..44b022fca 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -38,7 +38,7 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] - _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' + _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4' def _real_initialize(self): email, password = self._get_login_info() From 28cc2241e44ff0c0704cfffaca6d47d377041aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:56:17 +0700 Subject: [PATCH 1973/2417] [utils] Restrict parse_codecs and add theora as known vcodec (#21381) --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 11 +++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 71980b3fc..659c6ece5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -822,6 +822,15 @@ class TestUtil(unittest.TestCase): 'vcodec': 'av01.0.05M.08', 'acodec': 'none', }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ead9bd862..a1f586b80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2529,7 +2529,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): @@ -2540,13 +2540,8 @@ def parse_codecs(codecs_str): if not vcodec and not acodec: if len(splited_codecs) == 2: return { - 'vcodec': vcodec, - 'acodec': acodec, - } - elif len(splited_codecs) == 1: - return { - 'vcodec': 'none', - 'acodec': vcodec, + 'vcodec': splited_codecs[0], + 'acodec': splited_codecs[1], } else: return { From b85eae0f057a0afdf1da9d6034c19327c8de33cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Jun 2019 01:59:05 +0700 Subject: [PATCH 1974/2417] [youtube] Hardcode codec metadata for av01 video only formats (closes #21381) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 963c73a2d..7b630b191 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -500,6 +500,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, + + # av01 video only formats sometimes served with "unknown" codecs + '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, + '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') From 7c24a58bdb60af80137beac85c8804c70194a455 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 16 Jun 2019 06:32:17 +0100 Subject: [PATCH 1975/2417] [sixplay] add support for rtlmost.hu(#21405) --- youtube_dl/extractor/sixplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 2a72af11b..7ec66ecf3 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -19,7 +19,7 @@ from ..utils import ( class SixPlayIE(InfoExtractor): IE_NAME = '6play' - _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P[0-9]+)' + _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P[0-9]+)' _TESTS = [{ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', @@ -35,6 +35,9 @@ class SixPlayIE(InfoExtractor): }, { 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', 'only_matching': True, + }, { + 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', + 'only_matching': True, }] def _real_extract(self, url): @@ -43,6 +46,7 @@ class SixPlayIE(InfoExtractor): '6play.fr': ('6play', 'm6web'), 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), + 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), }.get(domain, ('6play', 'm6web')) data = self._download_json( From c40714cdee0ce3de1a5f6e17a61d3ee4c610ae63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 20 Jun 2019 00:57:58 +0700 Subject: [PATCH 1976/2417] [youtube] Make --write-annotations non fatal (closes #21452) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7b630b191..1010c8616 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1581,8 +1581,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return video_id def _extract_annotations(self, video_id): - url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + return self._download_webpage( + 'https://www.youtube.com/annotations_invideo', video_id, + note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + query={ + 'features': 1, + 'legacy': 1, + 'video_id': video_id, + }) @staticmethod def _extract_chapters(description, duration): From abefc03f517e9208b9d0c35e7e683941a40bb152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 22:58:42 +0700 Subject: [PATCH 1977/2417] [youtube] Update signature function patterns (closes #21469, closes #21476) --- youtube_dl/extractor/youtube.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1010c8616..83b6ac134 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1312,11 +1312,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + # Obsolete patterns + r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) From bc6438c092be6ca63843a349eee1db2b5d398d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:01:09 +0700 Subject: [PATCH 1978/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c4d485ff1..10394a3b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) + +Extractors +* [youtube] Update signature function patterns (#21469, #21476) +* [youtube] Make --write-annotations non fatal (#21452) ++ [sixplay] Add support for rtlmost.hu (#21405) +* [youtube] Hardcode codec metadata for av01 video only formats (#21381) +* [toutv] Update client key (#21370) ++ [biqle] Add support for new embed domain +* [cbs] Improve DRM protected videos detection (#21339) + + version 2019.06.08 Core From 9842d29d660b1ffe7873823542085879ba9d86a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jun 2019 23:04:09 +0700 Subject: [PATCH 1979/2417] release 2019.06.21 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 104ad598c..7a2b16827 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index f711af040..d6180e672 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index ae865a8b0..7cb981abf 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8246b570e..802fa2313 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.08 + [debug] youtube-dl version 2019.06.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 292c2e697..5153864a1 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.08** +- [ ] I've verified that I'm running youtube-dl version **2019.06.21** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 10394a3b6..2d9988da3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.21 Core * [utils] Restrict parse_codecs and add theora as known vcodec (#21381) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6aa666bc9..33474a452 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.08' +__version__ = '2019.06.21' From 4681441d2faf54615962029c7240601e339281bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:07:26 +0700 Subject: [PATCH 1980/2417] [crunchyroll:playlist] Fix and relax title extraction (closes #21291, closes #21443) --- youtube_dl/extractor/crunchyroll.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 588c3c71b..75b56ee42 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -661,9 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): webpage = self._download_webpage( self._add_skip_wall(url), show_id, headers=self.geo_verification_headers()) - title = self._html_search_regex( - r'(?s)]*>\s*(.*?)', - webpage, 'title') + title = self._html_search_meta('name', webpage, default=None) + episode_paths = re.findall( r'(?s)
  • ]+>.*? Date: Sat, 22 Jun 2019 00:15:52 +0700 Subject: [PATCH 1981/2417] [crunchyroll] Move Accept-Language workaround to video extractor since it causes playlists not to list any videos --- youtube_dl/extractor/crunchyroll.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 75b56ee42..85a9a577f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -103,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor): def _real_initialize(self): self._login() - def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) - else sanitized_Request(url_or_request)) - # Accept-Language must be set explicitly to accept any language to avoid issues - # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. - # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction - # should be imposed or not (from what I can see it just takes the first language - # ignoring the priority and requires it to correspond the IP). By the way this causes - # Crunchyroll to not work in georestriction cases in some browsers that don't place - # the locale lang first in header. However allowing any language seems to workaround the issue. - request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) - @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) @@ -269,6 +256,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): '1080': ('80', '108'), } + def _download_webpage(self, url_or_request, *args, **kwargs): + request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + else sanitized_Request(url_or_request)) + # Accept-Language must be set explicitly to accept any language to avoid issues + # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. + # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction + # should be imposed or not (from what I can see it just takes the first language + # ignoring the priority and requires it to correspond the IP). By the way this causes + # Crunchyroll to not work in georestriction cases in some browsers that don't place + # the locale lang first in header. However allowing any language seems to workaround the issue. + request.add_header('Accept-Language', '*') + return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) + def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(compat_b64decode(data)) iv = bytes_to_intlist(compat_b64decode(iv)) From 9c2aaac2685b34143ed770d5e0c7f3906ab1107d Mon Sep 17 00:00:00 2001 From: Emmanuel Froissart Date: Wed, 12 Jun 2019 13:55:07 +0200 Subject: [PATCH 1982/2417] [tf1] Fix wat id extraction (closes #21365) --- youtube_dl/extractor/tf1.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 903f47380..091350848 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import js_to_json + class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" @@ -43,12 +45,40 @@ class TF1IE(InfoExtractor): }, { 'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html', 'only_matching': True, + }, { + 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html', + 'info_dict': { + 'id': '13641379', + 'ext': 'mp4', + 'title': 'md5:f392bc52245dc5ad43771650c96fb620', + 'description': 'md5:44bc54f0a21322f5b91d68e76a544eae', + 'upload_date': '20190611', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, + }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - webpage, 'wat id', group='id') + vids_data_string = self._html_search_regex( + r'', + webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None + if vids_data_string is not None: + vids_data = self._parse_json( + vids_data_string, video_id, + transform_source=js_to_json) + video_data = [v for v in vids_data.values() + if 'slug' in v and v['slug'] == video_id] + if len(video_data) > 0 and 'streamId' in video_data[0]: + wat_id = video_data[0]['streamId'] + if wat_id is None: + wat_id = self._html_search_regex( + [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' + ], + webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 1c11204056566c2983f0a837897d882581880f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 00:37:49 +0700 Subject: [PATCH 1983/2417] [tf1] Improve extraction and fix issues (closes #21372) --- youtube_dl/extractor/tf1.py | 42 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 091350848..55e2a0721 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -2,8 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor - -from ..utils import js_to_json +from ..compat import compat_str class TF1IE(InfoExtractor): @@ -62,23 +61,32 @@ class TF1IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - vids_data_string = self._html_search_regex( - r'', - webpage, 'videos data string', group='vids_data_string', default=None) + wat_id = None - if vids_data_string is not None: - vids_data = self._parse_json( - vids_data_string, video_id, - transform_source=js_to_json) - video_data = [v for v in vids_data.values() - if 'slug' in v and v['slug'] == video_id] - if len(video_data) > 0 and 'streamId' in video_data[0]: - wat_id = video_data[0]['streamId'] - if wat_id is None: + + data = self._parse_json( + self._search_regex( + r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|)', webpage, + 'data', default='{}'), video_id, fatal=False) + + if data: + try: + wat_id = next( + video.get('streamId') + for key, video in data.items() + if isinstance(video, dict) + and video.get('slug') == video_id) + if not isinstance(wat_id, compat_str) or not wat_id.isdigit(): + wat_id = None + except StopIteration: + pass + + if not wat_id: wat_id = self._html_search_regex( - [r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', - r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2' - ], + (r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P\d{8})\1', + r'(["\']?)streamId\1\s*:\s*(["\']?)(?P\d+)\2'), webpage, 'wat id', group='id') + return self.url_result('wat:%s' % wat_id, 'Wat') From 31ce6e996666e7512990da01ef58785933dcb2be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 02:22:07 +0700 Subject: [PATCH 1984/2417] [youtube] Add another signature function pattern --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 83b6ac134..b570d5bae 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1314,6 +1314,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', From 21b08463a777a79876721e49d3d07a19bc3fe05e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 05:34:46 +0700 Subject: [PATCH 1985/2417] [pornhub] Rework extractors (closes #11922, closes #16078, closes #17454, closes #17936) --- youtube_dl/extractor/pornhub.py | 157 +++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index cb59d526f..72c351d56 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -387,17 +387,81 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): }] -class PornHubUserVideosIE(PornHubPlaylistBaseIE): +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'playlist_mincount': 0, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'playlist_mincount': 0, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserIE, cls).suitable(url)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('id') + return self.url_result( + '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + user_id = mobj.group('id') + + page_url = self._make_page_url(url) + + entries = [] + for page_num in itertools.count(1): + try: + webpage = self._download_webpage( + page_url, user_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + entries.extend(page_entries) + if not self._has_more(webpage): + break + + return self.playlist_result(orderedSet(entries), user_id) + + +class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' _TESTS = [{ - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'info_dict': { - 'id': 'zoe_ph', - }, - 'playlist_mincount': 171, + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 149, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -426,26 +490,69 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, }] - def _real_extract(self, url): + @classmethod + def suitable(cls, url): + return (False + if PornHubUserVideosUploadIE.suitable(url) + else super(PornHubUserVideosIE, cls).suitable(url)) + + def _make_page_url(self, url): + return url + + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }] + + def _make_page_url(self, url): mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - user_id = mobj.group('id') + return '%s/ajax' % mobj.group('url') - entries = [] - for page_num in itertools.count(1): - try: - webpage = self._download_webpage( - url, user_id, 'Downloading page %d' % page_num, - query={'page': page_num}) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - break - raise - page_entries = self._extract_entries(webpage, host) - if not page_entries: - break - entries.extend(page_entries) - - return self.playlist_result(entries, user_id) + @staticmethod + def _has_more(webpage): + return True From 1f7a563ab0efd0745ea66c354255844a9bd36c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 06:01:43 +0700 Subject: [PATCH 1986/2417] [pornhub] Add support for downloading single pages and search pages (closes #15570) --- youtube_dl/extractor/pornhub.py | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 72c351d56..7de585604 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -409,14 +409,14 @@ class PornHubUserIE(PornHubPlaylistBaseIE): @classmethod def suitable(cls, url): return (False - if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubUserIE, cls).suitable(url)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), video_id=user_id) @@ -426,10 +426,13 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): host = mobj.group('host') user_id = mobj.group('id') + page = int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + page_url = self._make_page_url(url) entries = [] - for page_num in itertools.count(1): + for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( page_url, user_id, 'Downloading page %d' % page_num, @@ -448,10 +451,17 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): return self.playlist_result(orderedSet(entries), user_id) -class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos' +class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ + (?: + (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| + video/search + ) + ''' _TESTS = [{ - 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', @@ -462,6 +472,12 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): 'id': 'jenny-blighe', }, 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', @@ -484,12 +500,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', - 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', @@ -506,9 +516,6 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): # Newest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', - 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', 'only_matching': True, @@ -521,7 +528,7 @@ class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): def suitable(cls, url): return (False if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserVideosIE, cls).suitable(url)) + else super(PornHubPagedVideosIE, cls).suitable(url)) def _make_page_url(self, url): return url From 9634de178d35c5cd767b183c2be82b14bef84209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jun 2019 08:37:07 +0700 Subject: [PATCH 1987/2417] [pornhub] Add support for more paged video sources --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/pornhub.py | 101 +++++++++++++++++------------ 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1ed8a4b2..9cd7d3ac4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,8 +892,9 @@ from .porncom import PornComIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, - PornHubPlaylistIE, - PornHubUserVideosIE, + PornHubUserIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7de585604..11b8cfcf7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -372,23 +372,8 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): entries, playlist_id, title, playlist.get('description')) -class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/playlist/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/4667351', - 'info_dict': { - 'id': '4667351', - 'title': 'Nataly Hot', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://de.pornhub.com/playlist/4667351', - 'only_matching': True, - }] - - class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -400,23 +385,20 @@ class PornHubUserIE(PornHubPlaylistBaseIE): 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/users/russianveet69', - 'playlist_mincount': 0, + 'only_matching': True, }, { 'url': 'https://www.pornhub.com/channels/povd', - 'playlist_mincount': 0, + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return (False - if PornHubPagedVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) - else super(PornHubUserIE, cls).suitable(url)) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('id') return self.url_result( - '%s/videos' % mobj.group('url'), ie=PornHubPagedVideosIE.ie_key(), + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) @@ -424,7 +406,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') - user_id = mobj.group('id') + item_id = mobj.group('id') page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) @@ -435,7 +417,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, user_id, 'Downloading page %d' % page_num, + page_url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -448,18 +430,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), user_id) + return self.playlist_result(orderedSet(entries), item_id) -class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/ - (?: - (?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos| - video/search - ) - ''' +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?P(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -469,20 +444,20 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 149, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', 'info_dict': { - 'id': 'jenny-blighe', + 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', 'info_dict': { - 'id': 'povd', + 'id': 'channels/povd/videos', }, 'playlist_mincount': 293, }, { @@ -522,13 +497,55 @@ class PornHubPagedVideosIE(PornHubPagedPlaylistBaseIE): }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': 'playlist/44121572', + }, + 'playlist_mincount': 132, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, }] @classmethod def suitable(cls, url): return (False - if PornHubUserVideosUploadIE.suitable(url) - else super(PornHubPagedVideosIE, cls).suitable(url)) + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) def _make_page_url(self, url): return url From 091c9b43164f6f3b31f5f911c88a4aeaa0358429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jun 2019 02:13:46 +0700 Subject: [PATCH 1988/2417] [vimeo:likes] Implement extrator in terms of channel extractor This allows to obtain videos' ids before extraction (#21493) --- youtube_dl/extractor/vimeo.py | 50 +++++------------------------------ 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a41178bab..aeee7df8f 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,7 +16,6 @@ from ..utils import ( determine_ext, ExtractorError, js_to_json, - InAdvancePagedList, int_or_none, merge_dicts, NO_DEFAULT, @@ -1065,7 +1064,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') -class VimeoLikesIE(InfoExtractor): +class VimeoLikesIE(VimeoChannelIE): _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' @@ -1073,55 +1072,20 @@ class VimeoLikesIE(InfoExtractor): 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, 'info_dict': { - 'id': 'user755559_likes', - 'description': 'See all the videos urza likes', - 'title': 'Videos urza likes', + 'id': 'user755559', + 'title': 'urza’s Likes', }, }, { 'url': 'https://vimeo.com/stormlapse/likes', 'only_matching': True, }] + def _page_url(self, base_url, pagenum): + return '%s/page:%d/' % (base_url, pagenum) + def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id) - page_count = self._int( - self._search_regex( - r'''(?x)
  • - .*?
  • \s* - ''', webpage, 'page count', default=1), - 'page count', fatal=True) - PAGE_SIZE = 12 - title = self._html_search_regex( - r'(?s)

    (.+?)

    ', webpage, 'title', fatal=False) - description = self._html_search_meta('description', webpage) - - def _get_page(idx): - page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % ( - user_id, idx + 1) - webpage = self._download_webpage( - page_url, user_id, - note='Downloading page %d/%d' % (idx + 1, page_count)) - video_list = self._search_regex( - r'(?s)
      ]*>(.*?)
    ', - webpage, 'video content') - paths = re.findall( - r']*>\s* Date: Sun, 23 Jun 2019 02:16:09 +0700 Subject: [PATCH 1989/2417] [vimeo:channel,group] Make title extraction no fatal --- youtube_dl/extractor/vimeo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index aeee7df8f..b5b44a79a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -813,7 +813,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex( + self._TITLE_RE, webpage, 'list title', fatal=False) def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -954,7 +955,7 @@ class VimeoGroupsIE(VimeoAlbumIE): }] def _extract_list_title(self, webpage): - return self._og_search_title(webpage) + return self._og_search_title(webpage, fatal=False) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 695720ebe81166b3ee249eb3916e3c7819ef57a8 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Sat, 22 Jun 2019 22:31:43 +0100 Subject: [PATCH 1990/2417] [openload] Add support for oload.life (#21495) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32d546e4e..b2918dc85 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -244,7 +244,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -363,6 +363,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.website/embed/drTBl1aOTvk/', 'only_matching': True, + }, { + 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 3031b7c4ed3a446dc83123ce34780f4db56ad4ef Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Sun, 23 Jun 2019 19:04:05 +0900 Subject: [PATCH 1991/2417] [brightcove:new] Add support for playlists (#21331) --- youtube_dl/extractor/brightcove.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c0345e2c3..58ec5c979 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -483,7 +483,7 @@ class BrightcoveLegacyIE(InfoExtractor): class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' - _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*videoId=(?P\d+|ref:[^&]+)' + _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*(?Pvideo|playlist)Id=(?P\d+|ref:[^&]+)' _TESTS = [{ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'md5': 'c8100925723840d4b0d243f7025703be', @@ -516,6 +516,21 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } + }, { + # playlist stream + 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', + 'info_dict': { + 'id': '5718313430001', + 'title': 'No Audio Playlist', + }, + 'playlist_count': 7, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', + 'only_matching': True, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -715,7 +730,7 @@ class BrightcoveNewIE(AdobePassIE): 'ip_blocks': smuggled_data.get('geo_ip_blocks'), }) - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' @@ -736,7 +751,7 @@ class BrightcoveNewIE(AdobePassIE): r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = { 'Accept': 'application/json;pk=%s' % policy_key, } @@ -771,5 +786,12 @@ class BrightcoveNewIE(AdobePassIE): 'tveToken': tve_token, }) + if content_type == 'playlist': + return self.playlist_result( + [self._parse_brightcove_metadata(vid, vid.get('id'), headers) + for vid in json_data.get('videos', []) if vid.get('id')], + json_data.get('id'), json_data.get('name'), + json_data.get('description')) + return self._parse_brightcove_metadata( json_data, video_id, headers=headers) From 27cef8885de4ffaa33f96973df3c50b62504bd49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Jun 2019 23:01:43 +0700 Subject: [PATCH 1992/2417] [beeg] Add support for api/v6 v2 URLs (closes #21511) --- youtube_dl/extractor/beeg.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 192f11ea6..c15a0ac8f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, unified_timestamp, @@ -11,6 +14,7 @@ from ..utils import ( class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P\d+)' _TESTS = [{ + # api/v6 v1 'url': 'http://beeg.com/5416503', 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { @@ -24,6 +28,10 @@ class BeegIE(InfoExtractor): 'tags': list, 'age_limit': 18, } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -41,11 +49,22 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} + for api_path in ('', 'api.'): video = self._download_json( 'https://%sbeeg.com/api/v6/%s/video/%s' % (api_path, beeg_version, video_id), video_id, - fatal=api_path == 'api.') + fatal=api_path == 'api.', query=query) if video: break From 1d83e9bd4b2dbc854f6f8b7f4baa14602a288c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Jun 2019 00:12:31 +0700 Subject: [PATCH 1993/2417] [nfb] Remove extractor (closes #21518) Covered by generic extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/nfb.py | 112 ----------------------------- 2 files changed, 113 deletions(-) delete mode 100644 youtube_dl/extractor/nfb.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9cd7d3ac4..530474f3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -745,7 +745,6 @@ from .nexx import ( NexxIE, NexxEmbedIE, ) -from .nfb import NFBIE from .nfl import NFLIE from .nhk import NhkVodIE from .nhl import NHLIE diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py deleted file mode 100644 index adcc636bc..000000000 --- a/youtube_dl/extractor/nfb.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - clean_html, - determine_ext, - int_or_none, - qualities, - urlencode_postdata, - xpath_text, -) - - -class NFBIE(InfoExtractor): - IE_NAME = 'nfb' - IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' - - _TEST = { - 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', - 'info_dict': { - 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'flv', - 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:6b8e32dde3abf91e58857b174916620c', - 'duration': 3128, - 'creator': 'Mark Sandiford', - 'uploader': 'Mark Sandiford', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - config = self._download_xml( - 'https://www.nfb.ca/film/%s/player_config' % video_id, - video_id, 'Downloading player config XML', - data=urlencode_postdata({'getConfig': 'true'}), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' - }) - - title, description, thumbnail, duration, uploader, author = [None] * 6 - thumbnails, formats = [[]] * 2 - subtitles = {} - - for media in config.findall('./player/stream/media'): - if media.get('type') == 'posterImage': - quality_key = qualities(('low', 'high')) - thumbnails = [] - for asset in media.findall('assets/asset'): - asset_url = xpath_text(asset, 'default/url', default=None) - if not asset_url: - continue - quality = asset.get('quality') - thumbnails.append({ - 'url': asset_url, - 'id': quality, - 'preference': quality_key(quality), - }) - elif media.get('type') == 'video': - title = xpath_text(media, 'title', fatal=True) - for asset in media.findall('assets/asset'): - quality = asset.get('quality') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', quality or '', 'height', default=None)) - for node in asset: - streamer = xpath_text(node, 'streamerURI', default=None) - if not streamer: - continue - play_path = xpath_text(node, 'url', default=None) - if not play_path: - continue - formats.append({ - 'url': streamer, - 'app': streamer.split('/', 3)[3], - 'play_path': play_path, - 'rtmp_live': False, - 'ext': 'flv', - 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, - 'height': height, - }) - self._sort_formats(formats) - description = clean_html(xpath_text(media, 'description')) - uploader = xpath_text(media, 'author') - duration = int_or_none(media.get('duration')) - for subtitle in media.findall('./subtitles/subtitle'): - subtitle_url = xpath_text(subtitle, 'url', default=None) - if not subtitle_url: - continue - lang = xpath_text(subtitle, 'lang', default='en') - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'duration': duration, - 'creator': uploader, - 'uploader': uploader, - 'formats': formats, - 'subtitles': subtitles, - } From 509bcec37ba26a8c7bc263cf8067495ec7cf120a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 27 Jun 2019 12:06:09 +0100 Subject: [PATCH 1994/2417] [fusion] fix extraction(closes #17775)(closes #21269) --- youtube_dl/extractor/fusion.py | 69 +++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index 25e284d46..a3f44b812 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -1,35 +1,84 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .ooyala import OoyalaIE +from ..utils import ( + determine_ext, + int_or_none, + mimetype2ext, + parse_iso8601, +) class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P\d+)' _TESTS = [{ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', 'info_dict': { - 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', + 'id': '3145868', 'ext': 'mp4', 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', 'duration': 140.0, + 'timestamp': 1442589635, + 'uploader': 'UNIVISON', + 'upload_date': '20150918', }, 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Anvato'], }, { 'url': 'http://fusion.tv/video/201781', 'only_matching': True, + }, { + 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) + video = self._download_json( + 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id) - ooyala_code = self._search_regex( - r'data-ooyala-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') + info = { + 'id': video_id, + 'title': video['title'], + 'description': video.get('excerpt'), + 'timestamp': parse_iso8601(video.get('published')), + 'series': video.get('show'), + } - return OoyalaIE._build_url_result(ooyala_code) + formats = [] + src = video.get('src') or {} + for f_id, f in src.items(): + for q_id, q in f.items(): + q_url = q.get('url') + if not q_url: + continue + ext = determine_ext(q_url, mimetype2ext(q.get('type'))) + if ext == 'smil': + formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False)) + elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'): + formats.extend(self._extract_m3u8_formats( + q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': '-'.join([f_id, q_id]), + 'url': q_url, + 'width': int_or_none(q.get('width')), + 'height': int_or_none(q.get('height')), + 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')), + 'ext': 'mp4' if ext == 'm3u8' else ext, + 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + else: + info.update({ + '_type': 'url', + 'url': 'anvato:uni:' + video['video_ids']['anvato'], + 'ie_key': 'Anvato', + }) + + return info From f5629946608861097b6ce5095efb9a9e8ac7f056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:18:10 +0700 Subject: [PATCH 1995/2417] [drtv] Relax _VALID_URL --- youtube_dl/extractor/drtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 0c7e350f0..218f10209 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -24,7 +24,7 @@ from ..utils import ( class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _GEO_BYPASS = False _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' @@ -80,6 +80,9 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'only_matching': True, }] def _real_extract(self, url): From 6625bf200d08baf64764e99caa48b4fb3a48ff8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 27 Jun 2019 17:24:46 +0200 Subject: [PATCH 1996/2417] [mixer:vod] Relax _VALID_URL (closes #21531) (#21536) --- youtube_dl/extractor/beampro.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index 2eaec1ab4..e264a145f 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\w+)' + _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', 'info_dict': { @@ -119,7 +119,10 @@ class BeamProVodIE(BeamProBaseIE): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', + 'only_matching': True, + }] @staticmethod def _extract_format(vod, vod_type): From 4f71473ef186c0797596e96755e86df80f357a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 22:59:30 +0700 Subject: [PATCH 1997/2417] [go] Add support for disneynow.com (closes #21528) --- youtube_dl/extractor/go.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 206d89e82..5916f9a8f 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,7 @@ class GoIE(AdobePassIE): 'resource_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -71,6 +71,9 @@ class GoIE(AdobePassIE): # brand 008 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', 'only_matching': True, + }, { + 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', + 'only_matching': True, }] def _extract_videos(self, brand, video_id='-1', show_id='-1'): @@ -89,7 +92,7 @@ class GoIE(AdobePassIE): # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', - default=None) + default=video_id) if not site_info: brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', From 232331c0d2f446af760403ed5a0439cdc3deb112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:55:15 +0700 Subject: [PATCH 1998/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2d9988da3..985d14a28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +version + +Extractors ++ [go] Add support for disneynow.com (#21528) +* [mixer:vod] Relax URL regular expression (#21531, #21536) +* [drtv] Relax URL regular expression +* [fusion] Fix extraction (#17775, #21269) +- [nfb] Remove extractor (#21518) ++ [beeg] Add support for api/v6 v2 URLs (#21511) ++ [brightcove:new] Add support for playlists (#21331) ++ [openload] Add support for oload.life (#21495) +* [vimeo:channel,group] Make title extraction non fatal +* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) ++ [pornhub] Add support for more paged video sources ++ [pornhub] Add support for downloading single pages and search pages (#15570) +* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) ++ [youtube] Add another signature function pattern +* [tf1] Fix extraction (#21365, #21372) +* [crunchyroll] Move Accept-Language workaround to video extractor since + it causes playlists not to list any videos +* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) + + version 2019.06.21 Core From 8c8cae91ece9841567aa48095245f92ae8f4b295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Jun 2019 23:57:33 +0700 Subject: [PATCH 1999/2417] release 2019.06.27 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 6 +++--- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 7a2b16827..d7c15e85a 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index d6180e672..741862590 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7cb981abf..4fb035ea4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 802fa2313..73ed62012 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.21 + [debug] youtube-dl version 2019.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5153864a1..a9d3653e2 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.21** +- [ ] I've verified that I'm running youtube-dl version **2019.06.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 985d14a28..4ae3d6c7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.06.27 Extractors + [go] Add support for disneynow.com (#21528) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bfd15b4dc..55ae43144 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -581,7 +581,6 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** - - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** - **nhl.com** @@ -692,8 +691,9 @@ - **PornerBros** - **PornHd** - **PornHub**: PornHub and Thumbzilla - - **PornHubPlaylist** - - **PornHubUserVideos** + - **PornHubPagedVideoList** + - **PornHubUser** + - **PornHubUserVideosUpload** - **Pornotube** - **PornoVoisines** - **PornoXO** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 33474a452..01896873d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.21' +__version__ = '2019.06.27' From f7a147e3b63a3165c425c56ee19e66f86900128c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Jun 2019 00:32:43 +0700 Subject: [PATCH 2000/2417] [utils] Introduce random_user_agent and use as default User-Agent (closes #21546) --- youtube_dl/extractor/openload.py | 1595 +----------------------------- youtube_dl/utils.py | 1586 ++++++++++++++++++++++++++++- 2 files changed, 1590 insertions(+), 1591 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b2918dc85..237b0d8fb 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import json import os -import random import re import subprocess import tempfile @@ -380,1595 +379,15 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '74.0.3729.129', - '76.0.3780.3', - '76.0.3780.2', - '74.0.3729.128', - '76.0.3780.1', - '76.0.3780.0', - '75.0.3770.15', - '74.0.3729.127', - '74.0.3729.126', - '76.0.3779.1', - '76.0.3779.0', - '75.0.3770.14', - '74.0.3729.125', - '76.0.3778.1', - '76.0.3778.0', - '75.0.3770.13', - '74.0.3729.124', - '74.0.3729.123', - '73.0.3683.121', - '76.0.3777.1', - '76.0.3777.0', - '75.0.3770.12', - '74.0.3729.122', - '76.0.3776.4', - '75.0.3770.11', - '74.0.3729.121', - '76.0.3776.3', - '76.0.3776.2', - '73.0.3683.120', - '74.0.3729.120', - '74.0.3729.119', - '74.0.3729.118', - '76.0.3776.1', - '76.0.3776.0', - '76.0.3775.5', - '75.0.3770.10', - '74.0.3729.117', - '76.0.3775.4', - '76.0.3775.3', - '74.0.3729.116', - '75.0.3770.9', - '76.0.3775.2', - '76.0.3775.1', - '76.0.3775.0', - '75.0.3770.8', - '74.0.3729.115', - '74.0.3729.114', - '76.0.3774.1', - '76.0.3774.0', - '75.0.3770.7', - '74.0.3729.113', - '74.0.3729.112', - '74.0.3729.111', - '76.0.3773.1', - '76.0.3773.0', - '75.0.3770.6', - '74.0.3729.110', - '74.0.3729.109', - '76.0.3772.1', - '76.0.3772.0', - '75.0.3770.5', - '74.0.3729.108', - '74.0.3729.107', - '76.0.3771.1', - '76.0.3771.0', - '75.0.3770.4', - '74.0.3729.106', - '74.0.3729.105', - '75.0.3770.3', - '74.0.3729.104', - '74.0.3729.103', - '74.0.3729.102', - '75.0.3770.2', - '74.0.3729.101', - '75.0.3770.1', - '75.0.3770.0', - '74.0.3729.100', - '75.0.3769.5', - '75.0.3769.4', - '74.0.3729.99', - '75.0.3769.3', - '75.0.3769.2', - '75.0.3768.6', - '74.0.3729.98', - '75.0.3769.1', - '75.0.3769.0', - '74.0.3729.97', - '73.0.3683.119', - '73.0.3683.118', - '74.0.3729.96', - '75.0.3768.5', - '75.0.3768.4', - '75.0.3768.3', - '75.0.3768.2', - '74.0.3729.95', - '74.0.3729.94', - '75.0.3768.1', - '75.0.3768.0', - '74.0.3729.93', - '74.0.3729.92', - '73.0.3683.117', - '74.0.3729.91', - '75.0.3766.3', - '74.0.3729.90', - '75.0.3767.2', - '75.0.3767.1', - '75.0.3767.0', - '74.0.3729.89', - '73.0.3683.116', - '75.0.3766.2', - '74.0.3729.88', - '75.0.3766.1', - '75.0.3766.0', - '74.0.3729.87', - '73.0.3683.115', - '74.0.3729.86', - '75.0.3765.1', - '75.0.3765.0', - '74.0.3729.85', - '73.0.3683.114', - '74.0.3729.84', - '75.0.3764.1', - '75.0.3764.0', - '74.0.3729.83', - '73.0.3683.113', - '75.0.3763.2', - '75.0.3761.4', - '74.0.3729.82', - '75.0.3763.1', - '75.0.3763.0', - '74.0.3729.81', - '73.0.3683.112', - '75.0.3762.1', - '75.0.3762.0', - '74.0.3729.80', - '75.0.3761.3', - '74.0.3729.79', - '73.0.3683.111', - '75.0.3761.2', - '74.0.3729.78', - '74.0.3729.77', - '75.0.3761.1', - '75.0.3761.0', - '73.0.3683.110', - '74.0.3729.76', - '74.0.3729.75', - '75.0.3760.0', - '74.0.3729.74', - '75.0.3759.8', - '75.0.3759.7', - '75.0.3759.6', - '74.0.3729.73', - '75.0.3759.5', - '74.0.3729.72', - '73.0.3683.109', - '75.0.3759.4', - '75.0.3759.3', - '74.0.3729.71', - '75.0.3759.2', - '74.0.3729.70', - '73.0.3683.108', - '74.0.3729.69', - '75.0.3759.1', - '75.0.3759.0', - '74.0.3729.68', - '73.0.3683.107', - '74.0.3729.67', - '75.0.3758.1', - '75.0.3758.0', - '74.0.3729.66', - '73.0.3683.106', - '74.0.3729.65', - '75.0.3757.1', - '75.0.3757.0', - '74.0.3729.64', - '73.0.3683.105', - '74.0.3729.63', - '75.0.3756.1', - '75.0.3756.0', - '74.0.3729.62', - '73.0.3683.104', - '75.0.3755.3', - '75.0.3755.2', - '73.0.3683.103', - '75.0.3755.1', - '75.0.3755.0', - '74.0.3729.61', - '73.0.3683.102', - '74.0.3729.60', - '75.0.3754.2', - '74.0.3729.59', - '75.0.3753.4', - '74.0.3729.58', - '75.0.3754.1', - '75.0.3754.0', - '74.0.3729.57', - '73.0.3683.101', - '75.0.3753.3', - '75.0.3752.2', - '75.0.3753.2', - '74.0.3729.56', - '75.0.3753.1', - '75.0.3753.0', - '74.0.3729.55', - '73.0.3683.100', - '74.0.3729.54', - '75.0.3752.1', - '75.0.3752.0', - '74.0.3729.53', - '73.0.3683.99', - '74.0.3729.52', - '75.0.3751.1', - '75.0.3751.0', - '74.0.3729.51', - '73.0.3683.98', - '74.0.3729.50', - '75.0.3750.0', - '74.0.3729.49', - '74.0.3729.48', - '74.0.3729.47', - '75.0.3749.3', - '74.0.3729.46', - '73.0.3683.97', - '75.0.3749.2', - '74.0.3729.45', - '75.0.3749.1', - '75.0.3749.0', - '74.0.3729.44', - '73.0.3683.96', - '74.0.3729.43', - '74.0.3729.42', - '75.0.3748.1', - '75.0.3748.0', - '74.0.3729.41', - '75.0.3747.1', - '73.0.3683.95', - '75.0.3746.4', - '74.0.3729.40', - '74.0.3729.39', - '75.0.3747.0', - '75.0.3746.3', - '75.0.3746.2', - '74.0.3729.38', - '75.0.3746.1', - '75.0.3746.0', - '74.0.3729.37', - '73.0.3683.94', - '75.0.3745.5', - '75.0.3745.4', - '75.0.3745.3', - '75.0.3745.2', - '74.0.3729.36', - '75.0.3745.1', - '75.0.3745.0', - '75.0.3744.2', - '74.0.3729.35', - '73.0.3683.93', - '74.0.3729.34', - '75.0.3744.1', - '75.0.3744.0', - '74.0.3729.33', - '73.0.3683.92', - '74.0.3729.32', - '74.0.3729.31', - '73.0.3683.91', - '75.0.3741.2', - '75.0.3740.5', - '74.0.3729.30', - '75.0.3741.1', - '75.0.3741.0', - '74.0.3729.29', - '75.0.3740.4', - '73.0.3683.90', - '74.0.3729.28', - '75.0.3740.3', - '73.0.3683.89', - '75.0.3740.2', - '74.0.3729.27', - '75.0.3740.1', - '75.0.3740.0', - '74.0.3729.26', - '73.0.3683.88', - '73.0.3683.87', - '74.0.3729.25', - '75.0.3739.1', - '75.0.3739.0', - '73.0.3683.86', - '74.0.3729.24', - '73.0.3683.85', - '75.0.3738.4', - '75.0.3738.3', - '75.0.3738.2', - '75.0.3738.1', - '75.0.3738.0', - '74.0.3729.23', - '73.0.3683.84', - '74.0.3729.22', - '74.0.3729.21', - '75.0.3737.1', - '75.0.3737.0', - '74.0.3729.20', - '73.0.3683.83', - '74.0.3729.19', - '75.0.3736.1', - '75.0.3736.0', - '74.0.3729.18', - '73.0.3683.82', - '74.0.3729.17', - '75.0.3735.1', - '75.0.3735.0', - '74.0.3729.16', - '73.0.3683.81', - '75.0.3734.1', - '75.0.3734.0', - '74.0.3729.15', - '73.0.3683.80', - '74.0.3729.14', - '75.0.3733.1', - '75.0.3733.0', - '75.0.3732.1', - '74.0.3729.13', - '74.0.3729.12', - '73.0.3683.79', - '74.0.3729.11', - '75.0.3732.0', - '74.0.3729.10', - '73.0.3683.78', - '74.0.3729.9', - '74.0.3729.8', - '74.0.3729.7', - '75.0.3731.3', - '75.0.3731.2', - '75.0.3731.0', - '74.0.3729.6', - '73.0.3683.77', - '73.0.3683.76', - '75.0.3730.5', - '75.0.3730.4', - '73.0.3683.75', - '74.0.3729.5', - '73.0.3683.74', - '75.0.3730.3', - '75.0.3730.2', - '74.0.3729.4', - '73.0.3683.73', - '73.0.3683.72', - '75.0.3730.1', - '75.0.3730.0', - '74.0.3729.3', - '73.0.3683.71', - '74.0.3729.2', - '73.0.3683.70', - '74.0.3729.1', - '74.0.3729.0', - '74.0.3726.4', - '73.0.3683.69', - '74.0.3726.3', - '74.0.3728.0', - '74.0.3726.2', - '73.0.3683.68', - '74.0.3726.1', - '74.0.3726.0', - '74.0.3725.4', - '73.0.3683.67', - '73.0.3683.66', - '74.0.3725.3', - '74.0.3725.2', - '74.0.3725.1', - '74.0.3724.8', - '74.0.3725.0', - '73.0.3683.65', - '74.0.3724.7', - '74.0.3724.6', - '74.0.3724.5', - '74.0.3724.4', - '74.0.3724.3', - '74.0.3724.2', - '74.0.3724.1', - '74.0.3724.0', - '73.0.3683.64', - '74.0.3723.1', - '74.0.3723.0', - '73.0.3683.63', - '74.0.3722.1', - '74.0.3722.0', - '73.0.3683.62', - '74.0.3718.9', - '74.0.3702.3', - '74.0.3721.3', - '74.0.3721.2', - '74.0.3721.1', - '74.0.3721.0', - '74.0.3720.6', - '73.0.3683.61', - '72.0.3626.122', - '73.0.3683.60', - '74.0.3720.5', - '72.0.3626.121', - '74.0.3718.8', - '74.0.3720.4', - '74.0.3720.3', - '74.0.3718.7', - '74.0.3720.2', - '74.0.3720.1', - '74.0.3720.0', - '74.0.3718.6', - '74.0.3719.5', - '73.0.3683.59', - '74.0.3718.5', - '74.0.3718.4', - '74.0.3719.4', - '74.0.3719.3', - '74.0.3719.2', - '74.0.3719.1', - '73.0.3683.58', - '74.0.3719.0', - '73.0.3683.57', - '73.0.3683.56', - '74.0.3718.3', - '73.0.3683.55', - '74.0.3718.2', - '74.0.3718.1', - '74.0.3718.0', - '73.0.3683.54', - '74.0.3717.2', - '73.0.3683.53', - '74.0.3717.1', - '74.0.3717.0', - '73.0.3683.52', - '74.0.3716.1', - '74.0.3716.0', - '73.0.3683.51', - '74.0.3715.1', - '74.0.3715.0', - '73.0.3683.50', - '74.0.3711.2', - '74.0.3714.2', - '74.0.3713.3', - '74.0.3714.1', - '74.0.3714.0', - '73.0.3683.49', - '74.0.3713.1', - '74.0.3713.0', - '72.0.3626.120', - '73.0.3683.48', - '74.0.3712.2', - '74.0.3712.1', - '74.0.3712.0', - '73.0.3683.47', - '72.0.3626.119', - '73.0.3683.46', - '74.0.3710.2', - '72.0.3626.118', - '74.0.3711.1', - '74.0.3711.0', - '73.0.3683.45', - '72.0.3626.117', - '74.0.3710.1', - '74.0.3710.0', - '73.0.3683.44', - '72.0.3626.116', - '74.0.3709.1', - '74.0.3709.0', - '74.0.3704.9', - '73.0.3683.43', - '72.0.3626.115', - '74.0.3704.8', - '74.0.3704.7', - '74.0.3708.0', - '74.0.3706.7', - '74.0.3704.6', - '73.0.3683.42', - '72.0.3626.114', - '74.0.3706.6', - '72.0.3626.113', - '74.0.3704.5', - '74.0.3706.5', - '74.0.3706.4', - '74.0.3706.3', - '74.0.3706.2', - '74.0.3706.1', - '74.0.3706.0', - '73.0.3683.41', - '72.0.3626.112', - '74.0.3705.1', - '74.0.3705.0', - '73.0.3683.40', - '72.0.3626.111', - '73.0.3683.39', - '74.0.3704.4', - '73.0.3683.38', - '74.0.3704.3', - '74.0.3704.2', - '74.0.3704.1', - '74.0.3704.0', - '73.0.3683.37', - '72.0.3626.110', - '72.0.3626.109', - '74.0.3703.3', - '74.0.3703.2', - '73.0.3683.36', - '74.0.3703.1', - '74.0.3703.0', - '73.0.3683.35', - '72.0.3626.108', - '74.0.3702.2', - '74.0.3699.3', - '74.0.3702.1', - '74.0.3702.0', - '73.0.3683.34', - '72.0.3626.107', - '73.0.3683.33', - '74.0.3701.1', - '74.0.3701.0', - '73.0.3683.32', - '73.0.3683.31', - '72.0.3626.105', - '74.0.3700.1', - '74.0.3700.0', - '73.0.3683.29', - '72.0.3626.103', - '74.0.3699.2', - '74.0.3699.1', - '74.0.3699.0', - '73.0.3683.28', - '72.0.3626.102', - '73.0.3683.27', - '73.0.3683.26', - '74.0.3698.0', - '74.0.3696.2', - '72.0.3626.101', - '73.0.3683.25', - '74.0.3696.1', - '74.0.3696.0', - '74.0.3694.8', - '72.0.3626.100', - '74.0.3694.7', - '74.0.3694.6', - '74.0.3694.5', - '74.0.3694.4', - '72.0.3626.99', - '72.0.3626.98', - '74.0.3694.3', - '73.0.3683.24', - '72.0.3626.97', - '72.0.3626.96', - '72.0.3626.95', - '73.0.3683.23', - '72.0.3626.94', - '73.0.3683.22', - '73.0.3683.21', - '72.0.3626.93', - '74.0.3694.2', - '72.0.3626.92', - '74.0.3694.1', - '74.0.3694.0', - '74.0.3693.6', - '73.0.3683.20', - '72.0.3626.91', - '74.0.3693.5', - '74.0.3693.4', - '74.0.3693.3', - '74.0.3693.2', - '73.0.3683.19', - '74.0.3693.1', - '74.0.3693.0', - '73.0.3683.18', - '72.0.3626.90', - '74.0.3692.1', - '74.0.3692.0', - '73.0.3683.17', - '72.0.3626.89', - '74.0.3687.3', - '74.0.3691.1', - '74.0.3691.0', - '73.0.3683.16', - '72.0.3626.88', - '72.0.3626.87', - '73.0.3683.15', - '74.0.3690.1', - '74.0.3690.0', - '73.0.3683.14', - '72.0.3626.86', - '73.0.3683.13', - '73.0.3683.12', - '74.0.3689.1', - '74.0.3689.0', - '73.0.3683.11', - '72.0.3626.85', - '73.0.3683.10', - '72.0.3626.84', - '73.0.3683.9', - '74.0.3688.1', - '74.0.3688.0', - '73.0.3683.8', - '72.0.3626.83', - '74.0.3687.2', - '74.0.3687.1', - '74.0.3687.0', - '73.0.3683.7', - '72.0.3626.82', - '74.0.3686.4', - '72.0.3626.81', - '74.0.3686.3', - '74.0.3686.2', - '74.0.3686.1', - '74.0.3686.0', - '73.0.3683.6', - '72.0.3626.80', - '74.0.3685.1', - '74.0.3685.0', - '73.0.3683.5', - '72.0.3626.79', - '74.0.3684.1', - '74.0.3684.0', - '73.0.3683.4', - '72.0.3626.78', - '72.0.3626.77', - '73.0.3683.3', - '73.0.3683.2', - '72.0.3626.76', - '73.0.3683.1', - '73.0.3683.0', - '72.0.3626.75', - '71.0.3578.141', - '73.0.3682.1', - '73.0.3682.0', - '72.0.3626.74', - '71.0.3578.140', - '73.0.3681.4', - '73.0.3681.3', - '73.0.3681.2', - '73.0.3681.1', - '73.0.3681.0', - '72.0.3626.73', - '71.0.3578.139', - '72.0.3626.72', - '72.0.3626.71', - '73.0.3680.1', - '73.0.3680.0', - '72.0.3626.70', - '71.0.3578.138', - '73.0.3678.2', - '73.0.3679.1', - '73.0.3679.0', - '72.0.3626.69', - '71.0.3578.137', - '73.0.3678.1', - '73.0.3678.0', - '71.0.3578.136', - '73.0.3677.1', - '73.0.3677.0', - '72.0.3626.68', - '72.0.3626.67', - '71.0.3578.135', - '73.0.3676.1', - '73.0.3676.0', - '73.0.3674.2', - '72.0.3626.66', - '71.0.3578.134', - '73.0.3674.1', - '73.0.3674.0', - '72.0.3626.65', - '71.0.3578.133', - '73.0.3673.2', - '73.0.3673.1', - '73.0.3673.0', - '72.0.3626.64', - '71.0.3578.132', - '72.0.3626.63', - '72.0.3626.62', - '72.0.3626.61', - '72.0.3626.60', - '73.0.3672.1', - '73.0.3672.0', - '72.0.3626.59', - '71.0.3578.131', - '73.0.3671.3', - '73.0.3671.2', - '73.0.3671.1', - '73.0.3671.0', - '72.0.3626.58', - '71.0.3578.130', - '73.0.3670.1', - '73.0.3670.0', - '72.0.3626.57', - '71.0.3578.129', - '73.0.3669.1', - '73.0.3669.0', - '72.0.3626.56', - '71.0.3578.128', - '73.0.3668.2', - '73.0.3668.1', - '73.0.3668.0', - '72.0.3626.55', - '71.0.3578.127', - '73.0.3667.2', - '73.0.3667.1', - '73.0.3667.0', - '72.0.3626.54', - '71.0.3578.126', - '73.0.3666.1', - '73.0.3666.0', - '72.0.3626.53', - '71.0.3578.125', - '73.0.3665.4', - '73.0.3665.3', - '72.0.3626.52', - '73.0.3665.2', - '73.0.3664.4', - '73.0.3665.1', - '73.0.3665.0', - '72.0.3626.51', - '71.0.3578.124', - '72.0.3626.50', - '73.0.3664.3', - '73.0.3664.2', - '73.0.3664.1', - '73.0.3664.0', - '73.0.3663.2', - '72.0.3626.49', - '71.0.3578.123', - '73.0.3663.1', - '73.0.3663.0', - '72.0.3626.48', - '71.0.3578.122', - '73.0.3662.1', - '73.0.3662.0', - '72.0.3626.47', - '71.0.3578.121', - '73.0.3661.1', - '72.0.3626.46', - '73.0.3661.0', - '72.0.3626.45', - '71.0.3578.120', - '73.0.3660.2', - '73.0.3660.1', - '73.0.3660.0', - '72.0.3626.44', - '71.0.3578.119', - '73.0.3659.1', - '73.0.3659.0', - '72.0.3626.43', - '71.0.3578.118', - '73.0.3658.1', - '73.0.3658.0', - '72.0.3626.42', - '71.0.3578.117', - '73.0.3657.1', - '73.0.3657.0', - '72.0.3626.41', - '71.0.3578.116', - '73.0.3656.1', - '73.0.3656.0', - '72.0.3626.40', - '71.0.3578.115', - '73.0.3655.1', - '73.0.3655.0', - '72.0.3626.39', - '71.0.3578.114', - '73.0.3654.1', - '73.0.3654.0', - '72.0.3626.38', - '71.0.3578.113', - '73.0.3653.1', - '73.0.3653.0', - '72.0.3626.37', - '71.0.3578.112', - '73.0.3652.1', - '73.0.3652.0', - '72.0.3626.36', - '71.0.3578.111', - '73.0.3651.1', - '73.0.3651.0', - '72.0.3626.35', - '71.0.3578.110', - '73.0.3650.1', - '73.0.3650.0', - '72.0.3626.34', - '71.0.3578.109', - '73.0.3649.1', - '73.0.3649.0', - '72.0.3626.33', - '71.0.3578.108', - '73.0.3648.2', - '73.0.3648.1', - '73.0.3648.0', - '72.0.3626.32', - '71.0.3578.107', - '73.0.3647.2', - '73.0.3647.1', - '73.0.3647.0', - '72.0.3626.31', - '71.0.3578.106', - '73.0.3635.3', - '73.0.3646.2', - '73.0.3646.1', - '73.0.3646.0', - '72.0.3626.30', - '71.0.3578.105', - '72.0.3626.29', - '73.0.3645.2', - '73.0.3645.1', - '73.0.3645.0', - '72.0.3626.28', - '71.0.3578.104', - '72.0.3626.27', - '72.0.3626.26', - '72.0.3626.25', - '72.0.3626.24', - '73.0.3644.0', - '73.0.3643.2', - '72.0.3626.23', - '71.0.3578.103', - '73.0.3643.1', - '73.0.3643.0', - '72.0.3626.22', - '71.0.3578.102', - '73.0.3642.1', - '73.0.3642.0', - '72.0.3626.21', - '71.0.3578.101', - '73.0.3641.1', - '73.0.3641.0', - '72.0.3626.20', - '71.0.3578.100', - '72.0.3626.19', - '73.0.3640.1', - '73.0.3640.0', - '72.0.3626.18', - '73.0.3639.1', - '71.0.3578.99', - '73.0.3639.0', - '72.0.3626.17', - '73.0.3638.2', - '72.0.3626.16', - '73.0.3638.1', - '73.0.3638.0', - '72.0.3626.15', - '71.0.3578.98', - '73.0.3635.2', - '71.0.3578.97', - '73.0.3637.1', - '73.0.3637.0', - '72.0.3626.14', - '71.0.3578.96', - '71.0.3578.95', - '72.0.3626.13', - '71.0.3578.94', - '73.0.3636.2', - '71.0.3578.93', - '73.0.3636.1', - '73.0.3636.0', - '72.0.3626.12', - '71.0.3578.92', - '73.0.3635.1', - '73.0.3635.0', - '72.0.3626.11', - '71.0.3578.91', - '73.0.3634.2', - '73.0.3634.1', - '73.0.3634.0', - '72.0.3626.10', - '71.0.3578.90', - '71.0.3578.89', - '73.0.3633.2', - '73.0.3633.1', - '73.0.3633.0', - '72.0.3610.4', - '72.0.3626.9', - '71.0.3578.88', - '73.0.3632.5', - '73.0.3632.4', - '73.0.3632.3', - '73.0.3632.2', - '73.0.3632.1', - '73.0.3632.0', - '72.0.3626.8', - '71.0.3578.87', - '73.0.3631.2', - '73.0.3631.1', - '73.0.3631.0', - '72.0.3626.7', - '71.0.3578.86', - '72.0.3626.6', - '73.0.3630.1', - '73.0.3630.0', - '72.0.3626.5', - '71.0.3578.85', - '72.0.3626.4', - '73.0.3628.3', - '73.0.3628.2', - '73.0.3629.1', - '73.0.3629.0', - '72.0.3626.3', - '71.0.3578.84', - '73.0.3628.1', - '73.0.3628.0', - '71.0.3578.83', - '73.0.3627.1', - '73.0.3627.0', - '72.0.3626.2', - '71.0.3578.82', - '71.0.3578.81', - '71.0.3578.80', - '72.0.3626.1', - '72.0.3626.0', - '71.0.3578.79', - '70.0.3538.124', - '71.0.3578.78', - '72.0.3623.4', - '72.0.3625.2', - '72.0.3625.1', - '72.0.3625.0', - '71.0.3578.77', - '70.0.3538.123', - '72.0.3624.4', - '72.0.3624.3', - '72.0.3624.2', - '71.0.3578.76', - '72.0.3624.1', - '72.0.3624.0', - '72.0.3623.3', - '71.0.3578.75', - '70.0.3538.122', - '71.0.3578.74', - '72.0.3623.2', - '72.0.3610.3', - '72.0.3623.1', - '72.0.3623.0', - '72.0.3622.3', - '72.0.3622.2', - '71.0.3578.73', - '70.0.3538.121', - '72.0.3622.1', - '72.0.3622.0', - '71.0.3578.72', - '70.0.3538.120', - '72.0.3621.1', - '72.0.3621.0', - '71.0.3578.71', - '70.0.3538.119', - '72.0.3620.1', - '72.0.3620.0', - '71.0.3578.70', - '70.0.3538.118', - '71.0.3578.69', - '72.0.3619.1', - '72.0.3619.0', - '71.0.3578.68', - '70.0.3538.117', - '71.0.3578.67', - '72.0.3618.1', - '72.0.3618.0', - '71.0.3578.66', - '70.0.3538.116', - '72.0.3617.1', - '72.0.3617.0', - '71.0.3578.65', - '70.0.3538.115', - '72.0.3602.3', - '71.0.3578.64', - '72.0.3616.1', - '72.0.3616.0', - '71.0.3578.63', - '70.0.3538.114', - '71.0.3578.62', - '72.0.3615.1', - '72.0.3615.0', - '71.0.3578.61', - '70.0.3538.113', - '72.0.3614.1', - '72.0.3614.0', - '71.0.3578.60', - '70.0.3538.112', - '72.0.3613.1', - '72.0.3613.0', - '71.0.3578.59', - '70.0.3538.111', - '72.0.3612.2', - '72.0.3612.1', - '72.0.3612.0', - '70.0.3538.110', - '71.0.3578.58', - '70.0.3538.109', - '72.0.3611.2', - '72.0.3611.1', - '72.0.3611.0', - '71.0.3578.57', - '70.0.3538.108', - '72.0.3610.2', - '71.0.3578.56', - '71.0.3578.55', - '72.0.3610.1', - '72.0.3610.0', - '71.0.3578.54', - '70.0.3538.107', - '71.0.3578.53', - '72.0.3609.3', - '71.0.3578.52', - '72.0.3609.2', - '71.0.3578.51', - '72.0.3608.5', - '72.0.3609.1', - '72.0.3609.0', - '71.0.3578.50', - '70.0.3538.106', - '72.0.3608.4', - '72.0.3608.3', - '72.0.3608.2', - '71.0.3578.49', - '72.0.3608.1', - '72.0.3608.0', - '70.0.3538.105', - '71.0.3578.48', - '72.0.3607.1', - '72.0.3607.0', - '71.0.3578.47', - '70.0.3538.104', - '72.0.3606.2', - '72.0.3606.1', - '72.0.3606.0', - '71.0.3578.46', - '70.0.3538.103', - '70.0.3538.102', - '72.0.3605.3', - '72.0.3605.2', - '72.0.3605.1', - '72.0.3605.0', - '71.0.3578.45', - '70.0.3538.101', - '71.0.3578.44', - '71.0.3578.43', - '70.0.3538.100', - '70.0.3538.99', - '71.0.3578.42', - '72.0.3604.1', - '72.0.3604.0', - '71.0.3578.41', - '70.0.3538.98', - '71.0.3578.40', - '72.0.3603.2', - '72.0.3603.1', - '72.0.3603.0', - '71.0.3578.39', - '70.0.3538.97', - '72.0.3602.2', - '71.0.3578.38', - '71.0.3578.37', - '72.0.3602.1', - '72.0.3602.0', - '71.0.3578.36', - '70.0.3538.96', - '72.0.3601.1', - '72.0.3601.0', - '71.0.3578.35', - '70.0.3538.95', - '72.0.3600.1', - '72.0.3600.0', - '71.0.3578.34', - '70.0.3538.94', - '72.0.3599.3', - '72.0.3599.2', - '72.0.3599.1', - '72.0.3599.0', - '71.0.3578.33', - '70.0.3538.93', - '72.0.3598.1', - '72.0.3598.0', - '71.0.3578.32', - '70.0.3538.87', - '72.0.3597.1', - '72.0.3597.0', - '72.0.3596.2', - '71.0.3578.31', - '70.0.3538.86', - '71.0.3578.30', - '71.0.3578.29', - '72.0.3596.1', - '72.0.3596.0', - '71.0.3578.28', - '70.0.3538.85', - '72.0.3595.2', - '72.0.3591.3', - '72.0.3595.1', - '72.0.3595.0', - '71.0.3578.27', - '70.0.3538.84', - '72.0.3594.1', - '72.0.3594.0', - '71.0.3578.26', - '70.0.3538.83', - '72.0.3593.2', - '72.0.3593.1', - '72.0.3593.0', - '71.0.3578.25', - '70.0.3538.82', - '72.0.3589.3', - '72.0.3592.2', - '72.0.3592.1', - '72.0.3592.0', - '71.0.3578.24', - '72.0.3589.2', - '70.0.3538.81', - '70.0.3538.80', - '72.0.3591.2', - '72.0.3591.1', - '72.0.3591.0', - '71.0.3578.23', - '70.0.3538.79', - '71.0.3578.22', - '72.0.3590.1', - '72.0.3590.0', - '71.0.3578.21', - '70.0.3538.78', - '70.0.3538.77', - '72.0.3589.1', - '72.0.3589.0', - '71.0.3578.20', - '70.0.3538.76', - '71.0.3578.19', - '70.0.3538.75', - '72.0.3588.1', - '72.0.3588.0', - '71.0.3578.18', - '70.0.3538.74', - '72.0.3586.2', - '72.0.3587.0', - '71.0.3578.17', - '70.0.3538.73', - '72.0.3586.1', - '72.0.3586.0', - '71.0.3578.16', - '70.0.3538.72', - '72.0.3585.1', - '72.0.3585.0', - '71.0.3578.15', - '70.0.3538.71', - '71.0.3578.14', - '72.0.3584.1', - '72.0.3584.0', - '71.0.3578.13', - '70.0.3538.70', - '72.0.3583.2', - '71.0.3578.12', - '72.0.3583.1', - '72.0.3583.0', - '71.0.3578.11', - '70.0.3538.69', - '71.0.3578.10', - '72.0.3582.0', - '72.0.3581.4', - '71.0.3578.9', - '70.0.3538.67', - '72.0.3581.3', - '72.0.3581.2', - '72.0.3581.1', - '72.0.3581.0', - '71.0.3578.8', - '70.0.3538.66', - '72.0.3580.1', - '72.0.3580.0', - '71.0.3578.7', - '70.0.3538.65', - '71.0.3578.6', - '72.0.3579.1', - '72.0.3579.0', - '71.0.3578.5', - '70.0.3538.64', - '71.0.3578.4', - '71.0.3578.3', - '71.0.3578.2', - '71.0.3578.1', - '71.0.3578.0', - '70.0.3538.63', - '69.0.3497.128', - '70.0.3538.62', - '70.0.3538.61', - '70.0.3538.60', - '70.0.3538.59', - '71.0.3577.1', - '71.0.3577.0', - '70.0.3538.58', - '69.0.3497.127', - '71.0.3576.2', - '71.0.3576.1', - '71.0.3576.0', - '70.0.3538.57', - '70.0.3538.56', - '71.0.3575.2', - '70.0.3538.55', - '69.0.3497.126', - '70.0.3538.54', - '71.0.3575.1', - '71.0.3575.0', - '71.0.3574.1', - '71.0.3574.0', - '70.0.3538.53', - '69.0.3497.125', - '70.0.3538.52', - '71.0.3573.1', - '71.0.3573.0', - '70.0.3538.51', - '69.0.3497.124', - '71.0.3572.1', - '71.0.3572.0', - '70.0.3538.50', - '69.0.3497.123', - '71.0.3571.2', - '70.0.3538.49', - '69.0.3497.122', - '71.0.3571.1', - '71.0.3571.0', - '70.0.3538.48', - '69.0.3497.121', - '71.0.3570.1', - '71.0.3570.0', - '70.0.3538.47', - '69.0.3497.120', - '71.0.3568.2', - '71.0.3569.1', - '71.0.3569.0', - '70.0.3538.46', - '69.0.3497.119', - '70.0.3538.45', - '71.0.3568.1', - '71.0.3568.0', - '70.0.3538.44', - '69.0.3497.118', - '70.0.3538.43', - '70.0.3538.42', - '71.0.3567.1', - '71.0.3567.0', - '70.0.3538.41', - '69.0.3497.117', - '71.0.3566.1', - '71.0.3566.0', - '70.0.3538.40', - '69.0.3497.116', - '71.0.3565.1', - '71.0.3565.0', - '70.0.3538.39', - '69.0.3497.115', - '71.0.3564.1', - '71.0.3564.0', - '70.0.3538.38', - '69.0.3497.114', - '71.0.3563.0', - '71.0.3562.2', - '70.0.3538.37', - '69.0.3497.113', - '70.0.3538.36', - '70.0.3538.35', - '71.0.3562.1', - '71.0.3562.0', - '70.0.3538.34', - '69.0.3497.112', - '70.0.3538.33', - '71.0.3561.1', - '71.0.3561.0', - '70.0.3538.32', - '69.0.3497.111', - '71.0.3559.6', - '71.0.3560.1', - '71.0.3560.0', - '71.0.3559.5', - '71.0.3559.4', - '70.0.3538.31', - '69.0.3497.110', - '71.0.3559.3', - '70.0.3538.30', - '69.0.3497.109', - '71.0.3559.2', - '71.0.3559.1', - '71.0.3559.0', - '70.0.3538.29', - '69.0.3497.108', - '71.0.3558.2', - '71.0.3558.1', - '71.0.3558.0', - '70.0.3538.28', - '69.0.3497.107', - '71.0.3557.2', - '71.0.3557.1', - '71.0.3557.0', - '70.0.3538.27', - '69.0.3497.106', - '71.0.3554.4', - '70.0.3538.26', - '71.0.3556.1', - '71.0.3556.0', - '70.0.3538.25', - '71.0.3554.3', - '69.0.3497.105', - '71.0.3554.2', - '70.0.3538.24', - '69.0.3497.104', - '71.0.3555.2', - '70.0.3538.23', - '71.0.3555.1', - '71.0.3555.0', - '70.0.3538.22', - '69.0.3497.103', - '71.0.3554.1', - '71.0.3554.0', - '70.0.3538.21', - '69.0.3497.102', - '71.0.3553.3', - '70.0.3538.20', - '69.0.3497.101', - '71.0.3553.2', - '69.0.3497.100', - '71.0.3553.1', - '71.0.3553.0', - '70.0.3538.19', - '69.0.3497.99', - '69.0.3497.98', - '69.0.3497.97', - '71.0.3552.6', - '71.0.3552.5', - '71.0.3552.4', - '71.0.3552.3', - '71.0.3552.2', - '71.0.3552.1', - '71.0.3552.0', - '70.0.3538.18', - '69.0.3497.96', - '71.0.3551.3', - '71.0.3551.2', - '71.0.3551.1', - '71.0.3551.0', - '70.0.3538.17', - '69.0.3497.95', - '71.0.3550.3', - '71.0.3550.2', - '71.0.3550.1', - '71.0.3550.0', - '70.0.3538.16', - '69.0.3497.94', - '71.0.3549.1', - '71.0.3549.0', - '70.0.3538.15', - '69.0.3497.93', - '69.0.3497.92', - '71.0.3548.1', - '71.0.3548.0', - '70.0.3538.14', - '69.0.3497.91', - '71.0.3547.1', - '71.0.3547.0', - '70.0.3538.13', - '69.0.3497.90', - '71.0.3546.2', - '69.0.3497.89', - '71.0.3546.1', - '71.0.3546.0', - '70.0.3538.12', - '69.0.3497.88', - '71.0.3545.4', - '71.0.3545.3', - '71.0.3545.2', - '71.0.3545.1', - '71.0.3545.0', - '70.0.3538.11', - '69.0.3497.87', - '71.0.3544.5', - '71.0.3544.4', - '71.0.3544.3', - '71.0.3544.2', - '71.0.3544.1', - '71.0.3544.0', - '69.0.3497.86', - '70.0.3538.10', - '69.0.3497.85', - '70.0.3538.9', - '69.0.3497.84', - '71.0.3543.4', - '70.0.3538.8', - '71.0.3543.3', - '71.0.3543.2', - '71.0.3543.1', - '71.0.3543.0', - '70.0.3538.7', - '69.0.3497.83', - '71.0.3542.2', - '71.0.3542.1', - '71.0.3542.0', - '70.0.3538.6', - '69.0.3497.82', - '69.0.3497.81', - '71.0.3541.1', - '71.0.3541.0', - '70.0.3538.5', - '69.0.3497.80', - '71.0.3540.1', - '71.0.3540.0', - '70.0.3538.4', - '69.0.3497.79', - '70.0.3538.3', - '71.0.3539.1', - '71.0.3539.0', - '69.0.3497.78', - '68.0.3440.134', - '69.0.3497.77', - '70.0.3538.2', - '70.0.3538.1', - '70.0.3538.0', - '69.0.3497.76', - '68.0.3440.133', - '69.0.3497.75', - '70.0.3537.2', - '70.0.3537.1', - '70.0.3537.0', - '69.0.3497.74', - '68.0.3440.132', - '70.0.3536.0', - '70.0.3535.5', - '70.0.3535.4', - '70.0.3535.3', - '69.0.3497.73', - '68.0.3440.131', - '70.0.3532.8', - '70.0.3532.7', - '69.0.3497.72', - '69.0.3497.71', - '70.0.3535.2', - '70.0.3535.1', - '70.0.3535.0', - '69.0.3497.70', - '68.0.3440.130', - '69.0.3497.69', - '68.0.3440.129', - '70.0.3534.4', - '70.0.3534.3', - '70.0.3534.2', - '70.0.3534.1', - '70.0.3534.0', - '69.0.3497.68', - '68.0.3440.128', - '70.0.3533.2', - '70.0.3533.1', - '70.0.3533.0', - '69.0.3497.67', - '68.0.3440.127', - '70.0.3532.6', - '70.0.3532.5', - '70.0.3532.4', - '69.0.3497.66', - '68.0.3440.126', - '70.0.3532.3', - '70.0.3532.2', - '70.0.3532.1', - '69.0.3497.60', - '69.0.3497.65', - '69.0.3497.64', - '70.0.3532.0', - '70.0.3531.0', - '70.0.3530.4', - '70.0.3530.3', - '70.0.3530.2', - '69.0.3497.58', - '68.0.3440.125', - '69.0.3497.57', - '69.0.3497.56', - '69.0.3497.55', - '69.0.3497.54', - '70.0.3530.1', - '70.0.3530.0', - '69.0.3497.53', - '68.0.3440.124', - '69.0.3497.52', - '70.0.3529.3', - '70.0.3529.2', - '70.0.3529.1', - '70.0.3529.0', - '69.0.3497.51', - '70.0.3528.4', - '68.0.3440.123', - '70.0.3528.3', - '70.0.3528.2', - '70.0.3528.1', - '70.0.3528.0', - '69.0.3497.50', - '68.0.3440.122', - '70.0.3527.1', - '70.0.3527.0', - '69.0.3497.49', - '68.0.3440.121', - '70.0.3526.1', - '70.0.3526.0', - '68.0.3440.120', - '69.0.3497.48', - '69.0.3497.47', - '68.0.3440.119', - '68.0.3440.118', - '70.0.3525.5', - '70.0.3525.4', - '70.0.3525.3', - '68.0.3440.117', - '69.0.3497.46', - '70.0.3525.2', - '70.0.3525.1', - '70.0.3525.0', - '69.0.3497.45', - '68.0.3440.116', - '70.0.3524.4', - '70.0.3524.3', - '69.0.3497.44', - '70.0.3524.2', - '70.0.3524.1', - '70.0.3524.0', - '70.0.3523.2', - '69.0.3497.43', - '68.0.3440.115', - '70.0.3505.9', - '69.0.3497.42', - '70.0.3505.8', - '70.0.3523.1', - '70.0.3523.0', - '69.0.3497.41', - '68.0.3440.114', - '70.0.3505.7', - '69.0.3497.40', - '70.0.3522.1', - '70.0.3522.0', - '70.0.3521.2', - '69.0.3497.39', - '68.0.3440.113', - '70.0.3505.6', - '70.0.3521.1', - '70.0.3521.0', - '69.0.3497.38', - '68.0.3440.112', - '70.0.3520.1', - '70.0.3520.0', - '69.0.3497.37', - '68.0.3440.111', - '70.0.3519.3', - '70.0.3519.2', - '70.0.3519.1', - '70.0.3519.0', - '69.0.3497.36', - '68.0.3440.110', - '70.0.3518.1', - '70.0.3518.0', - '69.0.3497.35', - '69.0.3497.34', - '68.0.3440.109', - '70.0.3517.1', - '70.0.3517.0', - '69.0.3497.33', - '68.0.3440.108', - '69.0.3497.32', - '70.0.3516.3', - '70.0.3516.2', - '70.0.3516.1', - '70.0.3516.0', - '69.0.3497.31', - '68.0.3440.107', - '70.0.3515.4', - '68.0.3440.106', - '70.0.3515.3', - '70.0.3515.2', - '70.0.3515.1', - '70.0.3515.0', - '69.0.3497.30', - '68.0.3440.105', - '68.0.3440.104', - '70.0.3514.2', - '70.0.3514.1', - '70.0.3514.0', - '69.0.3497.29', - '68.0.3440.103', - '70.0.3513.1', - '70.0.3513.0', - '69.0.3497.28', - ) - @classmethod def _extract_urls(cls, webpage): return re.findall( r']+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' % (cls._DOMAINS, cls._EMBED_WORD), webpage) - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) return webpage def _real_extract(self, url): @@ -1977,16 +396,13 @@ class OpenloadIE(InfoExtractor): video_id = mobj.group('id') url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - headers = { - 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), - } for path in (self._EMBED_WORD, self._STREAM_WORD): page_url = url_pattern % path last = path == self._STREAM_WORD webpage = self._download_webpage( page_url, video_id, 'Downloading %s webpage' % path, - headers=headers, fatal=last) + fatal=last) if not webpage: continue if 'File not found' in webpage or 'deleted by the owner' in webpage: @@ -1995,7 +411,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True, video_id=video_id) break - webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers) + webpage = self._extract_decrypted_page(page_url, webpage, video_id) for element_id in self._URL_IDS: decoded_id = get_element_by_id(element_id, webpage) if decoded_id: @@ -2026,7 +442,6 @@ class OpenloadIE(InfoExtractor): 'url': video_url, 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), 'subtitles': subtitles, - 'http_headers': headers, } @@ -2061,5 +476,5 @@ class VerystreamIE(OpenloadIE): 'only_matching': True, }] - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a1f586b80..798757241 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -81,8 +81,1592 @@ def register_socks_protocols(): # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) + +def random_user_agent(): + _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' + _CHROME_VERSIONS = ( + '74.0.3729.129', + '76.0.3780.3', + '76.0.3780.2', + '74.0.3729.128', + '76.0.3780.1', + '76.0.3780.0', + '75.0.3770.15', + '74.0.3729.127', + '74.0.3729.126', + '76.0.3779.1', + '76.0.3779.0', + '75.0.3770.14', + '74.0.3729.125', + '76.0.3778.1', + '76.0.3778.0', + '75.0.3770.13', + '74.0.3729.124', + '74.0.3729.123', + '73.0.3683.121', + '76.0.3777.1', + '76.0.3777.0', + '75.0.3770.12', + '74.0.3729.122', + '76.0.3776.4', + '75.0.3770.11', + '74.0.3729.121', + '76.0.3776.3', + '76.0.3776.2', + '73.0.3683.120', + '74.0.3729.120', + '74.0.3729.119', + '74.0.3729.118', + '76.0.3776.1', + '76.0.3776.0', + '76.0.3775.5', + '75.0.3770.10', + '74.0.3729.117', + '76.0.3775.4', + '76.0.3775.3', + '74.0.3729.116', + '75.0.3770.9', + '76.0.3775.2', + '76.0.3775.1', + '76.0.3775.0', + '75.0.3770.8', + '74.0.3729.115', + '74.0.3729.114', + '76.0.3774.1', + '76.0.3774.0', + '75.0.3770.7', + '74.0.3729.113', + '74.0.3729.112', + '74.0.3729.111', + '76.0.3773.1', + '76.0.3773.0', + '75.0.3770.6', + '74.0.3729.110', + '74.0.3729.109', + '76.0.3772.1', + '76.0.3772.0', + '75.0.3770.5', + '74.0.3729.108', + '74.0.3729.107', + '76.0.3771.1', + '76.0.3771.0', + '75.0.3770.4', + '74.0.3729.106', + '74.0.3729.105', + '75.0.3770.3', + '74.0.3729.104', + '74.0.3729.103', + '74.0.3729.102', + '75.0.3770.2', + '74.0.3729.101', + '75.0.3770.1', + '75.0.3770.0', + '74.0.3729.100', + '75.0.3769.5', + '75.0.3769.4', + '74.0.3729.99', + '75.0.3769.3', + '75.0.3769.2', + '75.0.3768.6', + '74.0.3729.98', + '75.0.3769.1', + '75.0.3769.0', + '74.0.3729.97', + '73.0.3683.119', + '73.0.3683.118', + '74.0.3729.96', + '75.0.3768.5', + '75.0.3768.4', + '75.0.3768.3', + '75.0.3768.2', + '74.0.3729.95', + '74.0.3729.94', + '75.0.3768.1', + '75.0.3768.0', + '74.0.3729.93', + '74.0.3729.92', + '73.0.3683.117', + '74.0.3729.91', + '75.0.3766.3', + '74.0.3729.90', + '75.0.3767.2', + '75.0.3767.1', + '75.0.3767.0', + '74.0.3729.89', + '73.0.3683.116', + '75.0.3766.2', + '74.0.3729.88', + '75.0.3766.1', + '75.0.3766.0', + '74.0.3729.87', + '73.0.3683.115', + '74.0.3729.86', + '75.0.3765.1', + '75.0.3765.0', + '74.0.3729.85', + '73.0.3683.114', + '74.0.3729.84', + '75.0.3764.1', + '75.0.3764.0', + '74.0.3729.83', + '73.0.3683.113', + '75.0.3763.2', + '75.0.3761.4', + '74.0.3729.82', + '75.0.3763.1', + '75.0.3763.0', + '74.0.3729.81', + '73.0.3683.112', + '75.0.3762.1', + '75.0.3762.0', + '74.0.3729.80', + '75.0.3761.3', + '74.0.3729.79', + '73.0.3683.111', + '75.0.3761.2', + '74.0.3729.78', + '74.0.3729.77', + '75.0.3761.1', + '75.0.3761.0', + '73.0.3683.110', + '74.0.3729.76', + '74.0.3729.75', + '75.0.3760.0', + '74.0.3729.74', + '75.0.3759.8', + '75.0.3759.7', + '75.0.3759.6', + '74.0.3729.73', + '75.0.3759.5', + '74.0.3729.72', + '73.0.3683.109', + '75.0.3759.4', + '75.0.3759.3', + '74.0.3729.71', + '75.0.3759.2', + '74.0.3729.70', + '73.0.3683.108', + '74.0.3729.69', + '75.0.3759.1', + '75.0.3759.0', + '74.0.3729.68', + '73.0.3683.107', + '74.0.3729.67', + '75.0.3758.1', + '75.0.3758.0', + '74.0.3729.66', + '73.0.3683.106', + '74.0.3729.65', + '75.0.3757.1', + '75.0.3757.0', + '74.0.3729.64', + '73.0.3683.105', + '74.0.3729.63', + '75.0.3756.1', + '75.0.3756.0', + '74.0.3729.62', + '73.0.3683.104', + '75.0.3755.3', + '75.0.3755.2', + '73.0.3683.103', + '75.0.3755.1', + '75.0.3755.0', + '74.0.3729.61', + '73.0.3683.102', + '74.0.3729.60', + '75.0.3754.2', + '74.0.3729.59', + '75.0.3753.4', + '74.0.3729.58', + '75.0.3754.1', + '75.0.3754.0', + '74.0.3729.57', + '73.0.3683.101', + '75.0.3753.3', + '75.0.3752.2', + '75.0.3753.2', + '74.0.3729.56', + '75.0.3753.1', + '75.0.3753.0', + '74.0.3729.55', + '73.0.3683.100', + '74.0.3729.54', + '75.0.3752.1', + '75.0.3752.0', + '74.0.3729.53', + '73.0.3683.99', + '74.0.3729.52', + '75.0.3751.1', + '75.0.3751.0', + '74.0.3729.51', + '73.0.3683.98', + '74.0.3729.50', + '75.0.3750.0', + '74.0.3729.49', + '74.0.3729.48', + '74.0.3729.47', + '75.0.3749.3', + '74.0.3729.46', + '73.0.3683.97', + '75.0.3749.2', + '74.0.3729.45', + '75.0.3749.1', + '75.0.3749.0', + '74.0.3729.44', + '73.0.3683.96', + '74.0.3729.43', + '74.0.3729.42', + '75.0.3748.1', + '75.0.3748.0', + '74.0.3729.41', + '75.0.3747.1', + '73.0.3683.95', + '75.0.3746.4', + '74.0.3729.40', + '74.0.3729.39', + '75.0.3747.0', + '75.0.3746.3', + '75.0.3746.2', + '74.0.3729.38', + '75.0.3746.1', + '75.0.3746.0', + '74.0.3729.37', + '73.0.3683.94', + '75.0.3745.5', + '75.0.3745.4', + '75.0.3745.3', + '75.0.3745.2', + '74.0.3729.36', + '75.0.3745.1', + '75.0.3745.0', + '75.0.3744.2', + '74.0.3729.35', + '73.0.3683.93', + '74.0.3729.34', + '75.0.3744.1', + '75.0.3744.0', + '74.0.3729.33', + '73.0.3683.92', + '74.0.3729.32', + '74.0.3729.31', + '73.0.3683.91', + '75.0.3741.2', + '75.0.3740.5', + '74.0.3729.30', + '75.0.3741.1', + '75.0.3741.0', + '74.0.3729.29', + '75.0.3740.4', + '73.0.3683.90', + '74.0.3729.28', + '75.0.3740.3', + '73.0.3683.89', + '75.0.3740.2', + '74.0.3729.27', + '75.0.3740.1', + '75.0.3740.0', + '74.0.3729.26', + '73.0.3683.88', + '73.0.3683.87', + '74.0.3729.25', + '75.0.3739.1', + '75.0.3739.0', + '73.0.3683.86', + '74.0.3729.24', + '73.0.3683.85', + '75.0.3738.4', + '75.0.3738.3', + '75.0.3738.2', + '75.0.3738.1', + '75.0.3738.0', + '74.0.3729.23', + '73.0.3683.84', + '74.0.3729.22', + '74.0.3729.21', + '75.0.3737.1', + '75.0.3737.0', + '74.0.3729.20', + '73.0.3683.83', + '74.0.3729.19', + '75.0.3736.1', + '75.0.3736.0', + '74.0.3729.18', + '73.0.3683.82', + '74.0.3729.17', + '75.0.3735.1', + '75.0.3735.0', + '74.0.3729.16', + '73.0.3683.81', + '75.0.3734.1', + '75.0.3734.0', + '74.0.3729.15', + '73.0.3683.80', + '74.0.3729.14', + '75.0.3733.1', + '75.0.3733.0', + '75.0.3732.1', + '74.0.3729.13', + '74.0.3729.12', + '73.0.3683.79', + '74.0.3729.11', + '75.0.3732.0', + '74.0.3729.10', + '73.0.3683.78', + '74.0.3729.9', + '74.0.3729.8', + '74.0.3729.7', + '75.0.3731.3', + '75.0.3731.2', + '75.0.3731.0', + '74.0.3729.6', + '73.0.3683.77', + '73.0.3683.76', + '75.0.3730.5', + '75.0.3730.4', + '73.0.3683.75', + '74.0.3729.5', + '73.0.3683.74', + '75.0.3730.3', + '75.0.3730.2', + '74.0.3729.4', + '73.0.3683.73', + '73.0.3683.72', + '75.0.3730.1', + '75.0.3730.0', + '74.0.3729.3', + '73.0.3683.71', + '74.0.3729.2', + '73.0.3683.70', + '74.0.3729.1', + '74.0.3729.0', + '74.0.3726.4', + '73.0.3683.69', + '74.0.3726.3', + '74.0.3728.0', + '74.0.3726.2', + '73.0.3683.68', + '74.0.3726.1', + '74.0.3726.0', + '74.0.3725.4', + '73.0.3683.67', + '73.0.3683.66', + '74.0.3725.3', + '74.0.3725.2', + '74.0.3725.1', + '74.0.3724.8', + '74.0.3725.0', + '73.0.3683.65', + '74.0.3724.7', + '74.0.3724.6', + '74.0.3724.5', + '74.0.3724.4', + '74.0.3724.3', + '74.0.3724.2', + '74.0.3724.1', + '74.0.3724.0', + '73.0.3683.64', + '74.0.3723.1', + '74.0.3723.0', + '73.0.3683.63', + '74.0.3722.1', + '74.0.3722.0', + '73.0.3683.62', + '74.0.3718.9', + '74.0.3702.3', + '74.0.3721.3', + '74.0.3721.2', + '74.0.3721.1', + '74.0.3721.0', + '74.0.3720.6', + '73.0.3683.61', + '72.0.3626.122', + '73.0.3683.60', + '74.0.3720.5', + '72.0.3626.121', + '74.0.3718.8', + '74.0.3720.4', + '74.0.3720.3', + '74.0.3718.7', + '74.0.3720.2', + '74.0.3720.1', + '74.0.3720.0', + '74.0.3718.6', + '74.0.3719.5', + '73.0.3683.59', + '74.0.3718.5', + '74.0.3718.4', + '74.0.3719.4', + '74.0.3719.3', + '74.0.3719.2', + '74.0.3719.1', + '73.0.3683.58', + '74.0.3719.0', + '73.0.3683.57', + '73.0.3683.56', + '74.0.3718.3', + '73.0.3683.55', + '74.0.3718.2', + '74.0.3718.1', + '74.0.3718.0', + '73.0.3683.54', + '74.0.3717.2', + '73.0.3683.53', + '74.0.3717.1', + '74.0.3717.0', + '73.0.3683.52', + '74.0.3716.1', + '74.0.3716.0', + '73.0.3683.51', + '74.0.3715.1', + '74.0.3715.0', + '73.0.3683.50', + '74.0.3711.2', + '74.0.3714.2', + '74.0.3713.3', + '74.0.3714.1', + '74.0.3714.0', + '73.0.3683.49', + '74.0.3713.1', + '74.0.3713.0', + '72.0.3626.120', + '73.0.3683.48', + '74.0.3712.2', + '74.0.3712.1', + '74.0.3712.0', + '73.0.3683.47', + '72.0.3626.119', + '73.0.3683.46', + '74.0.3710.2', + '72.0.3626.118', + '74.0.3711.1', + '74.0.3711.0', + '73.0.3683.45', + '72.0.3626.117', + '74.0.3710.1', + '74.0.3710.0', + '73.0.3683.44', + '72.0.3626.116', + '74.0.3709.1', + '74.0.3709.0', + '74.0.3704.9', + '73.0.3683.43', + '72.0.3626.115', + '74.0.3704.8', + '74.0.3704.7', + '74.0.3708.0', + '74.0.3706.7', + '74.0.3704.6', + '73.0.3683.42', + '72.0.3626.114', + '74.0.3706.6', + '72.0.3626.113', + '74.0.3704.5', + '74.0.3706.5', + '74.0.3706.4', + '74.0.3706.3', + '74.0.3706.2', + '74.0.3706.1', + '74.0.3706.0', + '73.0.3683.41', + '72.0.3626.112', + '74.0.3705.1', + '74.0.3705.0', + '73.0.3683.40', + '72.0.3626.111', + '73.0.3683.39', + '74.0.3704.4', + '73.0.3683.38', + '74.0.3704.3', + '74.0.3704.2', + '74.0.3704.1', + '74.0.3704.0', + '73.0.3683.37', + '72.0.3626.110', + '72.0.3626.109', + '74.0.3703.3', + '74.0.3703.2', + '73.0.3683.36', + '74.0.3703.1', + '74.0.3703.0', + '73.0.3683.35', + '72.0.3626.108', + '74.0.3702.2', + '74.0.3699.3', + '74.0.3702.1', + '74.0.3702.0', + '73.0.3683.34', + '72.0.3626.107', + '73.0.3683.33', + '74.0.3701.1', + '74.0.3701.0', + '73.0.3683.32', + '73.0.3683.31', + '72.0.3626.105', + '74.0.3700.1', + '74.0.3700.0', + '73.0.3683.29', + '72.0.3626.103', + '74.0.3699.2', + '74.0.3699.1', + '74.0.3699.0', + '73.0.3683.28', + '72.0.3626.102', + '73.0.3683.27', + '73.0.3683.26', + '74.0.3698.0', + '74.0.3696.2', + '72.0.3626.101', + '73.0.3683.25', + '74.0.3696.1', + '74.0.3696.0', + '74.0.3694.8', + '72.0.3626.100', + '74.0.3694.7', + '74.0.3694.6', + '74.0.3694.5', + '74.0.3694.4', + '72.0.3626.99', + '72.0.3626.98', + '74.0.3694.3', + '73.0.3683.24', + '72.0.3626.97', + '72.0.3626.96', + '72.0.3626.95', + '73.0.3683.23', + '72.0.3626.94', + '73.0.3683.22', + '73.0.3683.21', + '72.0.3626.93', + '74.0.3694.2', + '72.0.3626.92', + '74.0.3694.1', + '74.0.3694.0', + '74.0.3693.6', + '73.0.3683.20', + '72.0.3626.91', + '74.0.3693.5', + '74.0.3693.4', + '74.0.3693.3', + '74.0.3693.2', + '73.0.3683.19', + '74.0.3693.1', + '74.0.3693.0', + '73.0.3683.18', + '72.0.3626.90', + '74.0.3692.1', + '74.0.3692.0', + '73.0.3683.17', + '72.0.3626.89', + '74.0.3687.3', + '74.0.3691.1', + '74.0.3691.0', + '73.0.3683.16', + '72.0.3626.88', + '72.0.3626.87', + '73.0.3683.15', + '74.0.3690.1', + '74.0.3690.0', + '73.0.3683.14', + '72.0.3626.86', + '73.0.3683.13', + '73.0.3683.12', + '74.0.3689.1', + '74.0.3689.0', + '73.0.3683.11', + '72.0.3626.85', + '73.0.3683.10', + '72.0.3626.84', + '73.0.3683.9', + '74.0.3688.1', + '74.0.3688.0', + '73.0.3683.8', + '72.0.3626.83', + '74.0.3687.2', + '74.0.3687.1', + '74.0.3687.0', + '73.0.3683.7', + '72.0.3626.82', + '74.0.3686.4', + '72.0.3626.81', + '74.0.3686.3', + '74.0.3686.2', + '74.0.3686.1', + '74.0.3686.0', + '73.0.3683.6', + '72.0.3626.80', + '74.0.3685.1', + '74.0.3685.0', + '73.0.3683.5', + '72.0.3626.79', + '74.0.3684.1', + '74.0.3684.0', + '73.0.3683.4', + '72.0.3626.78', + '72.0.3626.77', + '73.0.3683.3', + '73.0.3683.2', + '72.0.3626.76', + '73.0.3683.1', + '73.0.3683.0', + '72.0.3626.75', + '71.0.3578.141', + '73.0.3682.1', + '73.0.3682.0', + '72.0.3626.74', + '71.0.3578.140', + '73.0.3681.4', + '73.0.3681.3', + '73.0.3681.2', + '73.0.3681.1', + '73.0.3681.0', + '72.0.3626.73', + '71.0.3578.139', + '72.0.3626.72', + '72.0.3626.71', + '73.0.3680.1', + '73.0.3680.0', + '72.0.3626.70', + '71.0.3578.138', + '73.0.3678.2', + '73.0.3679.1', + '73.0.3679.0', + '72.0.3626.69', + '71.0.3578.137', + '73.0.3678.1', + '73.0.3678.0', + '71.0.3578.136', + '73.0.3677.1', + '73.0.3677.0', + '72.0.3626.68', + '72.0.3626.67', + '71.0.3578.135', + '73.0.3676.1', + '73.0.3676.0', + '73.0.3674.2', + '72.0.3626.66', + '71.0.3578.134', + '73.0.3674.1', + '73.0.3674.0', + '72.0.3626.65', + '71.0.3578.133', + '73.0.3673.2', + '73.0.3673.1', + '73.0.3673.0', + '72.0.3626.64', + '71.0.3578.132', + '72.0.3626.63', + '72.0.3626.62', + '72.0.3626.61', + '72.0.3626.60', + '73.0.3672.1', + '73.0.3672.0', + '72.0.3626.59', + '71.0.3578.131', + '73.0.3671.3', + '73.0.3671.2', + '73.0.3671.1', + '73.0.3671.0', + '72.0.3626.58', + '71.0.3578.130', + '73.0.3670.1', + '73.0.3670.0', + '72.0.3626.57', + '71.0.3578.129', + '73.0.3669.1', + '73.0.3669.0', + '72.0.3626.56', + '71.0.3578.128', + '73.0.3668.2', + '73.0.3668.1', + '73.0.3668.0', + '72.0.3626.55', + '71.0.3578.127', + '73.0.3667.2', + '73.0.3667.1', + '73.0.3667.0', + '72.0.3626.54', + '71.0.3578.126', + '73.0.3666.1', + '73.0.3666.0', + '72.0.3626.53', + '71.0.3578.125', + '73.0.3665.4', + '73.0.3665.3', + '72.0.3626.52', + '73.0.3665.2', + '73.0.3664.4', + '73.0.3665.1', + '73.0.3665.0', + '72.0.3626.51', + '71.0.3578.124', + '72.0.3626.50', + '73.0.3664.3', + '73.0.3664.2', + '73.0.3664.1', + '73.0.3664.0', + '73.0.3663.2', + '72.0.3626.49', + '71.0.3578.123', + '73.0.3663.1', + '73.0.3663.0', + '72.0.3626.48', + '71.0.3578.122', + '73.0.3662.1', + '73.0.3662.0', + '72.0.3626.47', + '71.0.3578.121', + '73.0.3661.1', + '72.0.3626.46', + '73.0.3661.0', + '72.0.3626.45', + '71.0.3578.120', + '73.0.3660.2', + '73.0.3660.1', + '73.0.3660.0', + '72.0.3626.44', + '71.0.3578.119', + '73.0.3659.1', + '73.0.3659.0', + '72.0.3626.43', + '71.0.3578.118', + '73.0.3658.1', + '73.0.3658.0', + '72.0.3626.42', + '71.0.3578.117', + '73.0.3657.1', + '73.0.3657.0', + '72.0.3626.41', + '71.0.3578.116', + '73.0.3656.1', + '73.0.3656.0', + '72.0.3626.40', + '71.0.3578.115', + '73.0.3655.1', + '73.0.3655.0', + '72.0.3626.39', + '71.0.3578.114', + '73.0.3654.1', + '73.0.3654.0', + '72.0.3626.38', + '71.0.3578.113', + '73.0.3653.1', + '73.0.3653.0', + '72.0.3626.37', + '71.0.3578.112', + '73.0.3652.1', + '73.0.3652.0', + '72.0.3626.36', + '71.0.3578.111', + '73.0.3651.1', + '73.0.3651.0', + '72.0.3626.35', + '71.0.3578.110', + '73.0.3650.1', + '73.0.3650.0', + '72.0.3626.34', + '71.0.3578.109', + '73.0.3649.1', + '73.0.3649.0', + '72.0.3626.33', + '71.0.3578.108', + '73.0.3648.2', + '73.0.3648.1', + '73.0.3648.0', + '72.0.3626.32', + '71.0.3578.107', + '73.0.3647.2', + '73.0.3647.1', + '73.0.3647.0', + '72.0.3626.31', + '71.0.3578.106', + '73.0.3635.3', + '73.0.3646.2', + '73.0.3646.1', + '73.0.3646.0', + '72.0.3626.30', + '71.0.3578.105', + '72.0.3626.29', + '73.0.3645.2', + '73.0.3645.1', + '73.0.3645.0', + '72.0.3626.28', + '71.0.3578.104', + '72.0.3626.27', + '72.0.3626.26', + '72.0.3626.25', + '72.0.3626.24', + '73.0.3644.0', + '73.0.3643.2', + '72.0.3626.23', + '71.0.3578.103', + '73.0.3643.1', + '73.0.3643.0', + '72.0.3626.22', + '71.0.3578.102', + '73.0.3642.1', + '73.0.3642.0', + '72.0.3626.21', + '71.0.3578.101', + '73.0.3641.1', + '73.0.3641.0', + '72.0.3626.20', + '71.0.3578.100', + '72.0.3626.19', + '73.0.3640.1', + '73.0.3640.0', + '72.0.3626.18', + '73.0.3639.1', + '71.0.3578.99', + '73.0.3639.0', + '72.0.3626.17', + '73.0.3638.2', + '72.0.3626.16', + '73.0.3638.1', + '73.0.3638.0', + '72.0.3626.15', + '71.0.3578.98', + '73.0.3635.2', + '71.0.3578.97', + '73.0.3637.1', + '73.0.3637.0', + '72.0.3626.14', + '71.0.3578.96', + '71.0.3578.95', + '72.0.3626.13', + '71.0.3578.94', + '73.0.3636.2', + '71.0.3578.93', + '73.0.3636.1', + '73.0.3636.0', + '72.0.3626.12', + '71.0.3578.92', + '73.0.3635.1', + '73.0.3635.0', + '72.0.3626.11', + '71.0.3578.91', + '73.0.3634.2', + '73.0.3634.1', + '73.0.3634.0', + '72.0.3626.10', + '71.0.3578.90', + '71.0.3578.89', + '73.0.3633.2', + '73.0.3633.1', + '73.0.3633.0', + '72.0.3610.4', + '72.0.3626.9', + '71.0.3578.88', + '73.0.3632.5', + '73.0.3632.4', + '73.0.3632.3', + '73.0.3632.2', + '73.0.3632.1', + '73.0.3632.0', + '72.0.3626.8', + '71.0.3578.87', + '73.0.3631.2', + '73.0.3631.1', + '73.0.3631.0', + '72.0.3626.7', + '71.0.3578.86', + '72.0.3626.6', + '73.0.3630.1', + '73.0.3630.0', + '72.0.3626.5', + '71.0.3578.85', + '72.0.3626.4', + '73.0.3628.3', + '73.0.3628.2', + '73.0.3629.1', + '73.0.3629.0', + '72.0.3626.3', + '71.0.3578.84', + '73.0.3628.1', + '73.0.3628.0', + '71.0.3578.83', + '73.0.3627.1', + '73.0.3627.0', + '72.0.3626.2', + '71.0.3578.82', + '71.0.3578.81', + '71.0.3578.80', + '72.0.3626.1', + '72.0.3626.0', + '71.0.3578.79', + '70.0.3538.124', + '71.0.3578.78', + '72.0.3623.4', + '72.0.3625.2', + '72.0.3625.1', + '72.0.3625.0', + '71.0.3578.77', + '70.0.3538.123', + '72.0.3624.4', + '72.0.3624.3', + '72.0.3624.2', + '71.0.3578.76', + '72.0.3624.1', + '72.0.3624.0', + '72.0.3623.3', + '71.0.3578.75', + '70.0.3538.122', + '71.0.3578.74', + '72.0.3623.2', + '72.0.3610.3', + '72.0.3623.1', + '72.0.3623.0', + '72.0.3622.3', + '72.0.3622.2', + '71.0.3578.73', + '70.0.3538.121', + '72.0.3622.1', + '72.0.3622.0', + '71.0.3578.72', + '70.0.3538.120', + '72.0.3621.1', + '72.0.3621.0', + '71.0.3578.71', + '70.0.3538.119', + '72.0.3620.1', + '72.0.3620.0', + '71.0.3578.70', + '70.0.3538.118', + '71.0.3578.69', + '72.0.3619.1', + '72.0.3619.0', + '71.0.3578.68', + '70.0.3538.117', + '71.0.3578.67', + '72.0.3618.1', + '72.0.3618.0', + '71.0.3578.66', + '70.0.3538.116', + '72.0.3617.1', + '72.0.3617.0', + '71.0.3578.65', + '70.0.3538.115', + '72.0.3602.3', + '71.0.3578.64', + '72.0.3616.1', + '72.0.3616.0', + '71.0.3578.63', + '70.0.3538.114', + '71.0.3578.62', + '72.0.3615.1', + '72.0.3615.0', + '71.0.3578.61', + '70.0.3538.113', + '72.0.3614.1', + '72.0.3614.0', + '71.0.3578.60', + '70.0.3538.112', + '72.0.3613.1', + '72.0.3613.0', + '71.0.3578.59', + '70.0.3538.111', + '72.0.3612.2', + '72.0.3612.1', + '72.0.3612.0', + '70.0.3538.110', + '71.0.3578.58', + '70.0.3538.109', + '72.0.3611.2', + '72.0.3611.1', + '72.0.3611.0', + '71.0.3578.57', + '70.0.3538.108', + '72.0.3610.2', + '71.0.3578.56', + '71.0.3578.55', + '72.0.3610.1', + '72.0.3610.0', + '71.0.3578.54', + '70.0.3538.107', + '71.0.3578.53', + '72.0.3609.3', + '71.0.3578.52', + '72.0.3609.2', + '71.0.3578.51', + '72.0.3608.5', + '72.0.3609.1', + '72.0.3609.0', + '71.0.3578.50', + '70.0.3538.106', + '72.0.3608.4', + '72.0.3608.3', + '72.0.3608.2', + '71.0.3578.49', + '72.0.3608.1', + '72.0.3608.0', + '70.0.3538.105', + '71.0.3578.48', + '72.0.3607.1', + '72.0.3607.0', + '71.0.3578.47', + '70.0.3538.104', + '72.0.3606.2', + '72.0.3606.1', + '72.0.3606.0', + '71.0.3578.46', + '70.0.3538.103', + '70.0.3538.102', + '72.0.3605.3', + '72.0.3605.2', + '72.0.3605.1', + '72.0.3605.0', + '71.0.3578.45', + '70.0.3538.101', + '71.0.3578.44', + '71.0.3578.43', + '70.0.3538.100', + '70.0.3538.99', + '71.0.3578.42', + '72.0.3604.1', + '72.0.3604.0', + '71.0.3578.41', + '70.0.3538.98', + '71.0.3578.40', + '72.0.3603.2', + '72.0.3603.1', + '72.0.3603.0', + '71.0.3578.39', + '70.0.3538.97', + '72.0.3602.2', + '71.0.3578.38', + '71.0.3578.37', + '72.0.3602.1', + '72.0.3602.0', + '71.0.3578.36', + '70.0.3538.96', + '72.0.3601.1', + '72.0.3601.0', + '71.0.3578.35', + '70.0.3538.95', + '72.0.3600.1', + '72.0.3600.0', + '71.0.3578.34', + '70.0.3538.94', + '72.0.3599.3', + '72.0.3599.2', + '72.0.3599.1', + '72.0.3599.0', + '71.0.3578.33', + '70.0.3538.93', + '72.0.3598.1', + '72.0.3598.0', + '71.0.3578.32', + '70.0.3538.87', + '72.0.3597.1', + '72.0.3597.0', + '72.0.3596.2', + '71.0.3578.31', + '70.0.3538.86', + '71.0.3578.30', + '71.0.3578.29', + '72.0.3596.1', + '72.0.3596.0', + '71.0.3578.28', + '70.0.3538.85', + '72.0.3595.2', + '72.0.3591.3', + '72.0.3595.1', + '72.0.3595.0', + '71.0.3578.27', + '70.0.3538.84', + '72.0.3594.1', + '72.0.3594.0', + '71.0.3578.26', + '70.0.3538.83', + '72.0.3593.2', + '72.0.3593.1', + '72.0.3593.0', + '71.0.3578.25', + '70.0.3538.82', + '72.0.3589.3', + '72.0.3592.2', + '72.0.3592.1', + '72.0.3592.0', + '71.0.3578.24', + '72.0.3589.2', + '70.0.3538.81', + '70.0.3538.80', + '72.0.3591.2', + '72.0.3591.1', + '72.0.3591.0', + '71.0.3578.23', + '70.0.3538.79', + '71.0.3578.22', + '72.0.3590.1', + '72.0.3590.0', + '71.0.3578.21', + '70.0.3538.78', + '70.0.3538.77', + '72.0.3589.1', + '72.0.3589.0', + '71.0.3578.20', + '70.0.3538.76', + '71.0.3578.19', + '70.0.3538.75', + '72.0.3588.1', + '72.0.3588.0', + '71.0.3578.18', + '70.0.3538.74', + '72.0.3586.2', + '72.0.3587.0', + '71.0.3578.17', + '70.0.3538.73', + '72.0.3586.1', + '72.0.3586.0', + '71.0.3578.16', + '70.0.3538.72', + '72.0.3585.1', + '72.0.3585.0', + '71.0.3578.15', + '70.0.3538.71', + '71.0.3578.14', + '72.0.3584.1', + '72.0.3584.0', + '71.0.3578.13', + '70.0.3538.70', + '72.0.3583.2', + '71.0.3578.12', + '72.0.3583.1', + '72.0.3583.0', + '71.0.3578.11', + '70.0.3538.69', + '71.0.3578.10', + '72.0.3582.0', + '72.0.3581.4', + '71.0.3578.9', + '70.0.3538.67', + '72.0.3581.3', + '72.0.3581.2', + '72.0.3581.1', + '72.0.3581.0', + '71.0.3578.8', + '70.0.3538.66', + '72.0.3580.1', + '72.0.3580.0', + '71.0.3578.7', + '70.0.3538.65', + '71.0.3578.6', + '72.0.3579.1', + '72.0.3579.0', + '71.0.3578.5', + '70.0.3538.64', + '71.0.3578.4', + '71.0.3578.3', + '71.0.3578.2', + '71.0.3578.1', + '71.0.3578.0', + '70.0.3538.63', + '69.0.3497.128', + '70.0.3538.62', + '70.0.3538.61', + '70.0.3538.60', + '70.0.3538.59', + '71.0.3577.1', + '71.0.3577.0', + '70.0.3538.58', + '69.0.3497.127', + '71.0.3576.2', + '71.0.3576.1', + '71.0.3576.0', + '70.0.3538.57', + '70.0.3538.56', + '71.0.3575.2', + '70.0.3538.55', + '69.0.3497.126', + '70.0.3538.54', + '71.0.3575.1', + '71.0.3575.0', + '71.0.3574.1', + '71.0.3574.0', + '70.0.3538.53', + '69.0.3497.125', + '70.0.3538.52', + '71.0.3573.1', + '71.0.3573.0', + '70.0.3538.51', + '69.0.3497.124', + '71.0.3572.1', + '71.0.3572.0', + '70.0.3538.50', + '69.0.3497.123', + '71.0.3571.2', + '70.0.3538.49', + '69.0.3497.122', + '71.0.3571.1', + '71.0.3571.0', + '70.0.3538.48', + '69.0.3497.121', + '71.0.3570.1', + '71.0.3570.0', + '70.0.3538.47', + '69.0.3497.120', + '71.0.3568.2', + '71.0.3569.1', + '71.0.3569.0', + '70.0.3538.46', + '69.0.3497.119', + '70.0.3538.45', + '71.0.3568.1', + '71.0.3568.0', + '70.0.3538.44', + '69.0.3497.118', + '70.0.3538.43', + '70.0.3538.42', + '71.0.3567.1', + '71.0.3567.0', + '70.0.3538.41', + '69.0.3497.117', + '71.0.3566.1', + '71.0.3566.0', + '70.0.3538.40', + '69.0.3497.116', + '71.0.3565.1', + '71.0.3565.0', + '70.0.3538.39', + '69.0.3497.115', + '71.0.3564.1', + '71.0.3564.0', + '70.0.3538.38', + '69.0.3497.114', + '71.0.3563.0', + '71.0.3562.2', + '70.0.3538.37', + '69.0.3497.113', + '70.0.3538.36', + '70.0.3538.35', + '71.0.3562.1', + '71.0.3562.0', + '70.0.3538.34', + '69.0.3497.112', + '70.0.3538.33', + '71.0.3561.1', + '71.0.3561.0', + '70.0.3538.32', + '69.0.3497.111', + '71.0.3559.6', + '71.0.3560.1', + '71.0.3560.0', + '71.0.3559.5', + '71.0.3559.4', + '70.0.3538.31', + '69.0.3497.110', + '71.0.3559.3', + '70.0.3538.30', + '69.0.3497.109', + '71.0.3559.2', + '71.0.3559.1', + '71.0.3559.0', + '70.0.3538.29', + '69.0.3497.108', + '71.0.3558.2', + '71.0.3558.1', + '71.0.3558.0', + '70.0.3538.28', + '69.0.3497.107', + '71.0.3557.2', + '71.0.3557.1', + '71.0.3557.0', + '70.0.3538.27', + '69.0.3497.106', + '71.0.3554.4', + '70.0.3538.26', + '71.0.3556.1', + '71.0.3556.0', + '70.0.3538.25', + '71.0.3554.3', + '69.0.3497.105', + '71.0.3554.2', + '70.0.3538.24', + '69.0.3497.104', + '71.0.3555.2', + '70.0.3538.23', + '71.0.3555.1', + '71.0.3555.0', + '70.0.3538.22', + '69.0.3497.103', + '71.0.3554.1', + '71.0.3554.0', + '70.0.3538.21', + '69.0.3497.102', + '71.0.3553.3', + '70.0.3538.20', + '69.0.3497.101', + '71.0.3553.2', + '69.0.3497.100', + '71.0.3553.1', + '71.0.3553.0', + '70.0.3538.19', + '69.0.3497.99', + '69.0.3497.98', + '69.0.3497.97', + '71.0.3552.6', + '71.0.3552.5', + '71.0.3552.4', + '71.0.3552.3', + '71.0.3552.2', + '71.0.3552.1', + '71.0.3552.0', + '70.0.3538.18', + '69.0.3497.96', + '71.0.3551.3', + '71.0.3551.2', + '71.0.3551.1', + '71.0.3551.0', + '70.0.3538.17', + '69.0.3497.95', + '71.0.3550.3', + '71.0.3550.2', + '71.0.3550.1', + '71.0.3550.0', + '70.0.3538.16', + '69.0.3497.94', + '71.0.3549.1', + '71.0.3549.0', + '70.0.3538.15', + '69.0.3497.93', + '69.0.3497.92', + '71.0.3548.1', + '71.0.3548.0', + '70.0.3538.14', + '69.0.3497.91', + '71.0.3547.1', + '71.0.3547.0', + '70.0.3538.13', + '69.0.3497.90', + '71.0.3546.2', + '69.0.3497.89', + '71.0.3546.1', + '71.0.3546.0', + '70.0.3538.12', + '69.0.3497.88', + '71.0.3545.4', + '71.0.3545.3', + '71.0.3545.2', + '71.0.3545.1', + '71.0.3545.0', + '70.0.3538.11', + '69.0.3497.87', + '71.0.3544.5', + '71.0.3544.4', + '71.0.3544.3', + '71.0.3544.2', + '71.0.3544.1', + '71.0.3544.0', + '69.0.3497.86', + '70.0.3538.10', + '69.0.3497.85', + '70.0.3538.9', + '69.0.3497.84', + '71.0.3543.4', + '70.0.3538.8', + '71.0.3543.3', + '71.0.3543.2', + '71.0.3543.1', + '71.0.3543.0', + '70.0.3538.7', + '69.0.3497.83', + '71.0.3542.2', + '71.0.3542.1', + '71.0.3542.0', + '70.0.3538.6', + '69.0.3497.82', + '69.0.3497.81', + '71.0.3541.1', + '71.0.3541.0', + '70.0.3538.5', + '69.0.3497.80', + '71.0.3540.1', + '71.0.3540.0', + '70.0.3538.4', + '69.0.3497.79', + '70.0.3538.3', + '71.0.3539.1', + '71.0.3539.0', + '69.0.3497.78', + '68.0.3440.134', + '69.0.3497.77', + '70.0.3538.2', + '70.0.3538.1', + '70.0.3538.0', + '69.0.3497.76', + '68.0.3440.133', + '69.0.3497.75', + '70.0.3537.2', + '70.0.3537.1', + '70.0.3537.0', + '69.0.3497.74', + '68.0.3440.132', + '70.0.3536.0', + '70.0.3535.5', + '70.0.3535.4', + '70.0.3535.3', + '69.0.3497.73', + '68.0.3440.131', + '70.0.3532.8', + '70.0.3532.7', + '69.0.3497.72', + '69.0.3497.71', + '70.0.3535.2', + '70.0.3535.1', + '70.0.3535.0', + '69.0.3497.70', + '68.0.3440.130', + '69.0.3497.69', + '68.0.3440.129', + '70.0.3534.4', + '70.0.3534.3', + '70.0.3534.2', + '70.0.3534.1', + '70.0.3534.0', + '69.0.3497.68', + '68.0.3440.128', + '70.0.3533.2', + '70.0.3533.1', + '70.0.3533.0', + '69.0.3497.67', + '68.0.3440.127', + '70.0.3532.6', + '70.0.3532.5', + '70.0.3532.4', + '69.0.3497.66', + '68.0.3440.126', + '70.0.3532.3', + '70.0.3532.2', + '70.0.3532.1', + '69.0.3497.60', + '69.0.3497.65', + '69.0.3497.64', + '70.0.3532.0', + '70.0.3531.0', + '70.0.3530.4', + '70.0.3530.3', + '70.0.3530.2', + '69.0.3497.58', + '68.0.3440.125', + '69.0.3497.57', + '69.0.3497.56', + '69.0.3497.55', + '69.0.3497.54', + '70.0.3530.1', + '70.0.3530.0', + '69.0.3497.53', + '68.0.3440.124', + '69.0.3497.52', + '70.0.3529.3', + '70.0.3529.2', + '70.0.3529.1', + '70.0.3529.0', + '69.0.3497.51', + '70.0.3528.4', + '68.0.3440.123', + '70.0.3528.3', + '70.0.3528.2', + '70.0.3528.1', + '70.0.3528.0', + '69.0.3497.50', + '68.0.3440.122', + '70.0.3527.1', + '70.0.3527.0', + '69.0.3497.49', + '68.0.3440.121', + '70.0.3526.1', + '70.0.3526.0', + '68.0.3440.120', + '69.0.3497.48', + '69.0.3497.47', + '68.0.3440.119', + '68.0.3440.118', + '70.0.3525.5', + '70.0.3525.4', + '70.0.3525.3', + '68.0.3440.117', + '69.0.3497.46', + '70.0.3525.2', + '70.0.3525.1', + '70.0.3525.0', + '69.0.3497.45', + '68.0.3440.116', + '70.0.3524.4', + '70.0.3524.3', + '69.0.3497.44', + '70.0.3524.2', + '70.0.3524.1', + '70.0.3524.0', + '70.0.3523.2', + '69.0.3497.43', + '68.0.3440.115', + '70.0.3505.9', + '69.0.3497.42', + '70.0.3505.8', + '70.0.3523.1', + '70.0.3523.0', + '69.0.3497.41', + '68.0.3440.114', + '70.0.3505.7', + '69.0.3497.40', + '70.0.3522.1', + '70.0.3522.0', + '70.0.3521.2', + '69.0.3497.39', + '68.0.3440.113', + '70.0.3505.6', + '70.0.3521.1', + '70.0.3521.0', + '69.0.3497.38', + '68.0.3440.112', + '70.0.3520.1', + '70.0.3520.0', + '69.0.3497.37', + '68.0.3440.111', + '70.0.3519.3', + '70.0.3519.2', + '70.0.3519.1', + '70.0.3519.0', + '69.0.3497.36', + '68.0.3440.110', + '70.0.3518.1', + '70.0.3518.0', + '69.0.3497.35', + '69.0.3497.34', + '68.0.3440.109', + '70.0.3517.1', + '70.0.3517.0', + '69.0.3497.33', + '68.0.3440.108', + '69.0.3497.32', + '70.0.3516.3', + '70.0.3516.2', + '70.0.3516.1', + '70.0.3516.0', + '69.0.3497.31', + '68.0.3440.107', + '70.0.3515.4', + '68.0.3440.106', + '70.0.3515.3', + '70.0.3515.2', + '70.0.3515.1', + '70.0.3515.0', + '69.0.3497.30', + '68.0.3440.105', + '68.0.3440.104', + '70.0.3514.2', + '70.0.3514.1', + '70.0.3514.0', + '69.0.3497.29', + '68.0.3440.103', + '70.0.3513.1', + '70.0.3513.0', + '69.0.3497.28', + ) + return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + + std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0', + 'User-Agent': random_user_agent(), 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From c5606802474822887b75af7de23de6679264c0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Jun 2019 00:33:35 +0700 Subject: [PATCH 2001/2417] [soundcloud] Update client id --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 277c3c7b4..3a8626e02 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -221,7 +221,7 @@ class SoundcloudIE(InfoExtractor): } ] - _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7' + _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' @staticmethod def _extract_urls(webpage): From 5e3da0d42b3d16465a95451276f021ecd0b7bd75 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Jul 2019 08:37:21 +0100 Subject: [PATCH 2002/2417] [dailymotion] add support embed with DM.player js call --- youtube_dl/extractor/dailymotion.py | 12 +++++++++--- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1a2c1308a..3d3d78041 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -137,10 +137,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor): @staticmethod def _extract_urls(webpage): + urls = [] # Look for embedded Dailymotion player - matches = re.findall( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) - return list(map(lambda m: unescapeHTML(m[1]), matches)) + # https://developer.dailymotion.com/player#player-parameters + for mobj in re.finditer( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage): + urls.append(unescapeHTML(mobj.group('url'))) + for mobj in re.finditer( + r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): + urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eeb0d25f6..77e217460 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2104,6 +2104,23 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to download MPD manifest'], }, + { + # DailyMotion embed with DM.player + 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804', + 'info_dict': { + 'id': 'k6aKkGHd9FJs4mtJN39', + 'ext': 'mp4', + 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final', + 'description': 'This video is private.', + 'uploader_id': 'x1jf30l', + 'uploader': 'beIN SPORTS USA', + 'upload_date': '20190528', + 'timestamp': 1559062971, + }, + 'params': { + 'skip_download': True, + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject From 976e1ff7f9be76588f5e6d4a569a49694072e08b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Jul 2019 12:05:18 +0100 Subject: [PATCH 2003/2417] [acast] add support for URLs with episode id(closes #21444) --- youtube_dl/extractor/acast.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index c4362be88..b17c792d2 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -7,6 +7,7 @@ import functools from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, float_or_none, int_or_none, try_get, @@ -27,7 +28,7 @@ class ACastIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', - 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', + 'md5': '16d936099ec5ca2d5869e3a813ee8dc4', 'info_dict': { 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'ext': 'mp3', @@ -46,28 +47,37 @@ class ACastIE(InfoExtractor): }, { 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', + 'only_matching': True, }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() s = self._download_json( - 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id), - display_id)['result'] + 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), + display_id) media_url = s['url'] + if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): + episode_url = s.get('episodeUrl') + if episode_url: + display_id = episode_url + else: + channel, display_id = re.match(self._VALID_URL, s['link']).groups() cast_data = self._download_json( 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)['result'] e = cast_data['episode'] - title = e['name'] + title = e.get('name') or s['title'] return { 'id': compat_str(e['id']), 'display_id': display_id, 'url': media_url, 'title': title, - 'description': e.get('description') or e.get('summary'), + 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), 'thumbnail': e.get('image'), - 'timestamp': unified_timestamp(e.get('publishingDate')), - 'duration': float_or_none(s.get('duration') or e.get('duration')), + 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), + 'duration': float_or_none(e.get('duration') or s.get('duration')), 'filesize': int_or_none(e.get('contentLength')), 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), From 4e2491f066f81ee9e941c48a910982ec6ac286b5 Mon Sep 17 00:00:00 2001 From: xyssy <52385286+xyssy@users.noreply.github.com> Date: Mon, 1 Jul 2019 12:05:51 -0500 Subject: [PATCH 2004/2417] [yourporn] Fix extraction (#21585) --- youtube_dl/extractor/yourporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index b1d1eb6b6..8a2d5f63b 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -37,7 +37,7 @@ class YourPornIE(InfoExtractor): self._search_regex( r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id]).replace('/cdn/', '/cdn4/') + video_id)[video_id]).replace('/cdn/', '/cdn5/') title = (self._search_regex( r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', From 918398092c5049a6edf940ebe3c2dd46916ee93c Mon Sep 17 00:00:00 2001 From: Fai <4016742+aicest@users.noreply.github.com> Date: Tue, 2 Jul 2019 01:10:55 +0800 Subject: [PATCH 2005/2417] [xiami] Update API base URL (#21575) --- youtube_dl/extractor/xiami.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 8333fb534..618da8382 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -7,7 +7,7 @@ from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): - _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' + _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id' def _download_webpage_handle(self, *args, **kwargs): webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) From 9baf69af450a90ead36af6d205cd0afc87b79253 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Mon, 1 Jul 2019 18:11:38 +0100 Subject: [PATCH 2006/2417] [openload] Add support for oload.biz (#21574) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 237b0d8fb..11e92e471 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -365,6 +365,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', 'only_matching': True, + }, { + 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From d1e41164272a2993816548beebd0d5ef4effafe8 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 1 Jul 2019 19:13:23 +0200 Subject: [PATCH 2007/2417] [vevo] Add support for embed.vevo.com URLs (#21565) --- youtube_dl/extractor/vevo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 232e05816..4ea9f1b4b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -34,6 +34,7 @@ class VevoIE(VevoBaseIE): (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + https?://embed\.vevo\.com/.*?[?&]isrc=| vevo:) (?P[^&?#]+)''' @@ -144,6 +145,9 @@ class VevoIE(VevoBaseIE): # Geo-restricted to Netherlands/Germany 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 'only_matching': True, + }, { + 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', + 'only_matching': True, }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions From c8343f0a4331bd2f561fd67b9b272afb60147a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Jul 2019 01:07:54 +0700 Subject: [PATCH 2008/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4ae3d6c7c..9deeb884a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core ++ [utils] Introduce random_user_agent and use as default User-Agent (#21546) + +Extractors ++ [vevo] Add support for embed.vevo.com URLs (#21565) ++ [openload] Add support for oload.biz (#21574) +* [xiami] Update API base URL (#21575) +* [yourporn] Fix extraction (#21585) ++ [acast] Add support for URLs with episode id (#21444) ++ [dailymotion] Add support for DM.player embeds +* [soundcloud] Update client id + + version 2019.06.27 Extractors From 1335bf10f69b5d2c45b386d3faf71398b9662f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Jul 2019 01:09:59 +0700 Subject: [PATCH 2009/2417] release 2019.07.02 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d7c15e85a..fb0d33b8f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 741862590..3c95565a6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 4fb035ea4..7410776d7 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 73ed62012..cc52bcca6 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index a9d3653e2..bbd421b1a 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 9deeb884a..5ce78b07a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.02 Core + [utils] Introduce random_user_agent and use as default User-Agent (#21546) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 01896873d..78fe54326 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.06.27' +__version__ = '2019.07.02' From ff0f4cfeba73d17c74caa05b55da610d903ae4d3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 Jul 2019 22:07:01 +0100 Subject: [PATCH 2010/2417] [arte] clean extractor(closes #15583)(closes #21614) --- youtube_dl/extractor/arte.py | 330 +++-------------------------- youtube_dl/extractor/extractors.py | 9 - 2 files changed, 29 insertions(+), 310 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ffc321821..2bd3bfe8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -4,17 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - find_xpath_attr, - get_element_by_attribute, int_or_none, - NO_DEFAULT, qualities, try_get, unified_strdate, @@ -25,59 +18,7 @@ from ..utils import ( # add tests. -class ArteTvIE(InfoExtractor): - _VALID_URL = r'https?://videos\.arte\.tv/(?Pfr|de|en|es)/.*-(?P.*?)\.html' - IE_NAME = 'arte.tv' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - lang = mobj.group('lang') - video_id = mobj.group('id') - - ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') - ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') - ref_xml_doc = self._download_xml( - ref_xml_url, video_id, note='Downloading metadata') - config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) - config_xml_url = config_node.attrib['ref'] - config = self._download_xml( - config_xml_url, video_id, note='Downloading configuration') - - formats = [{ - 'format_id': q.attrib['quality'], - # The playpath starts at 'mp4:', if we don't manually - # split the url, rtmpdump will incorrectly parse them - 'url': q.text.split('mp4:', 1)[0], - 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], - 'ext': 'flv', - 'quality': 2 if q.attrib['quality'] == 'hd' else 1, - } for q in config.findall('./urls/url')] - self._sort_formats(formats) - - title = config.find('.//name').text - thumbnail = config.find('.//firstThumbnailUrl').text - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class ArteTVBaseIE(InfoExtractor): - @classmethod - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - lang = mobj.group('lang') - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - if 'vid' in query: - video_id = query['vid'][0] - else: - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') - return video_id, lang - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor): 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) + qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) LANGS = { 'fr': 'F', 'de': 'A', 'en': 'E[ANG]', 'es': 'E[ESP]', + 'it': 'E[ITA]', + 'pl': 'E[POL]', } langcode = LANGS.get(lang, lang) @@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor): l = re.escape(langcode) # Language preference from most to least priority - # Reference: section 5.6.3 of - # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + # Reference: section 6.8 of + # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf PREFERENCES = ( # original version in requested language, without subtitles r'VO{0}$'.format(l), @@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?Pfr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?P\d{6}-\d{3}-[AF])' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', - 'only_matching': True, - }, { - 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', - 'only_matching': True, - }, { - 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn', - 'only_matching': True, + 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', + 'info_dict': { + 'id': '088501-000-A', + 'ext': 'mp4', + 'title': 'Mexico: Stealing Petrol to Survive', + 'upload_date': '20190628', + }, }] - @classmethod - def suitable(cls, url): - return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - entries = [ - self.url_result(url) - for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] - return self.playlist_result(entries) - - -# It also uses the arte_vp_url url from the webpage to extract the information -class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', - 'info_dict': { - 'id': '057405-001-A', - 'ext': 'mp4', - 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', - 'upload_date': '20150716', - }, - }, { - 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'playlist_count': 11, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', - 'only_matching': True, - }] - - -class ArteTVInfoIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:info' - _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', - 'info_dict': { - 'id': '067528-000-A', - 'ext': 'mp4', - 'title': 'Service civique, un cache misère ?', - 'upload_date': '20160403', - }, - }] - - -class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:future' - _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', - 'info_dict': { - 'id': '050940-028-A', - 'ext': 'mp4', - 'title': 'Les écrevisses aussi peuvent être anxieuses', - 'upload_date': '20140902', - }, - }, { - 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', - 'only_matching': True, - }] - - -class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:ddc' - _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' - - _TESTS = [] - - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - if lang == 'folge': - lang = 'de' - elif lang == 'emission': - lang = 'fr' - webpage = self._download_webpage(url, video_id) - scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) - script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') - javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') - json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') - return self._extract_from_json_url(json_url, video_id, lang) - - -class ArteTVConcertIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:concert' - _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', - 'md5': '9ea035b7bd69696b67aa2ccaaa218161', - 'info_dict': { - 'id': '186', - 'ext': 'mp4', - 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', - 'upload_date': '20140128', - 'description': 'md5:486eb08f991552ade77439fe6d82c305', - }, - }] - - -class ArteTVCinemaIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:cinema' - _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', - 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', - 'info_dict': { - 'id': '062494-000-A', - 'ext': 'mp4', - 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', - 'upload_date': '20150807', - }, - }] - - -class ArteTVMagazineIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:magazine' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - # Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..." - 'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium', - 'md5': '2a9369bcccf847d1c741e51416299f25', - 'info_dict': { - 'id': '065965-000-A', - 'ext': 'mp4', - 'title': 'Trepalium - Extrait Ep.01', - 'upload_date': '20160121', - }, - }, { - # Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium" - 'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium', - 'md5': 'fedc64fc7a946110fe311634e79782ca', - 'info_dict': { - 'id': '054813-004_PLUS7-F', - 'ext': 'mp4', - 'title': 'Trepalium (4/6)', - 'description': 'md5:10057003c34d54e95350be4f9b05cb40', - 'upload_date': '20160218', - }, - }, { - 'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis', - 'only_matching': True, - }] + lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url( + 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), + video_id, lang) class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) - http://www\.arte\.tv - /(?:playerv2/embed|arte_vp/index)\.php\?json_url= + https://www\.arte\.tv + /player/v3/index\.php\?json_url= (?P<json_url> - http://arte\.tv/papi/tvguide/videos/stream/player/ - (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* + https?://api\.arte\.tv/api/player/v1/config/ + (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) ) ''' _TESTS = [] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - lang = mobj.group('lang') - json_url = mobj.group('json_url') + json_url, lang, video_id = re.match(self._VALID_URL, url).groups() return self._extract_from_json_url(json_url, video_id, lang) -class TheOperaPlatformIE(ArteTVPlus7IE): - IE_NAME = 'theoperaplatform' - _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello', - 'md5': '970655901fa2e82e04c00b955e9afe7b', - 'info_dict': { - 'id': '060338-009-A', - 'ext': 'mp4', - 'title': 'Verdi - OTELLO', - 'upload_date': '20160927', - }, - }] - - class ArteTVPlaylistIE(ArteTVBaseIE): IE_NAME = 'arte.tv:playlist' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'info_dict': { - 'id': 'PL-013263', - 'title': 'Areva & Uramin', - 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', + 'id': 'RC-016954', + 'title': 'Earn a Living', + 'description': 'md5:d322c55011514b3a7241f7fb80d494c2', }, 'playlist_mincount': 6, - }, { - 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', - 'only_matching': True, }] def _real_extract(self, url): - playlist_id, lang = self._extract_url_info(url) + lang, playlist_id = re.match(self._VALID_URL, url).groups() collection = self._download_json( 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' % (lang, playlist_id), playlist_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 530474f3f..02f17cf0d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -58,17 +58,8 @@ from .ard import ( ARDMediathekIE, ) from .arte import ( - ArteTvIE, ArteTVPlus7IE, - ArteTVCreativeIE, - ArteTVConcertIE, - ArteTVInfoIE, - ArteTVFutureIE, - ArteTVCinemaIE, - ArteTVDDCIE, - ArteTVMagazineIE, ArteTVEmbedIE, - TheOperaPlatformIE, ArteTVPlaylistIE, ) from .asiancrush import ( From e61ac1a09c215d9efb9a65ee798a6c1d6a0863cd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 3 Jul 2019 13:31:47 +0100 Subject: [PATCH 2011/2417] [tvland] fix extraction(closes #21384) --- youtube_dl/extractor/tvland.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index 957cf1ea2..791144128 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -1,32 +1,35 @@ # coding: utf-8 from __future__ import unicode_literals -from .mtv import MTVServicesInfoExtractor +from .spike import ParamountNetworkIE -class TVLandIE(MTVServicesInfoExtractor): +class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a # proxy it redirects to http://m.tvland.com/app/ - 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', + 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19', 'info_dict': { - 'description': 'md5:80973e81b916a324e05c14a3fb506d29', - 'title': 'The Invasion', + 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a', + 'title': 'The Dog', }, - 'playlist': [], + 'playlist_mincount': 5, }, { - 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', + 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6', 'md5': 'e2c6389401cf485df26c79c247b08713', 'info_dict': { - 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', + 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757', 'ext': 'mp4', - 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies', - 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269', - 'upload_date': '20151228', - 'timestamp': 1451289600, + 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6', + 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2', + 'upload_date': '20190430', + 'timestamp': 1556658000, + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', From 313877c6a2b5ac8b880a9c47e8038ea0cdcf3deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 3 Jul 2019 23:16:40 +0700 Subject: [PATCH 2012/2417] [vzaar] Fix videos with empty title (closes #21606) --- youtube_dl/extractor/vzaar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 6000671c3..3336e6c15 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -32,6 +32,10 @@ class VzaarIE(InfoExtractor): 'ext': 'mp3', 'title': 'MP3', }, + }, { + # with null videoTitle + 'url': 'https://view.vzaar.com/20313539/download', + 'only_matching': True, }] @staticmethod @@ -45,7 +49,7 @@ class VzaarIE(InfoExtractor): video_data = self._download_json( 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - title = video_data['videoTitle'] + title = video_data.get('videoTitle') or video_id formats = [] From 2da4316e48475c344be862149f744c3a8a1ab2f1 Mon Sep 17 00:00:00 2001 From: David Caldwell <david+github@porkrind.org> Date: Wed, 3 Jul 2019 09:22:23 -0700 Subject: [PATCH 2013/2417] [twitch:vod] Actualize m3u8 URL (#21538, #21607) --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index dc5ff29c3..0500e33a6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -317,7 +317,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( - '%s/vod/%s?%s' % ( + '%s/vod/%s.m3u8?%s' % ( self._USHER_BASE, item_id, compat_urllib_parse_urlencode({ 'allow_source': 'true', From cdb7c7d147b19f79512d541465cb5be9a54c7950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 02:04:23 +0700 Subject: [PATCH 2014/2417] [ted] Restrict info regex (closes #21631) --- youtube_dl/extractor/ted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 9b60cc462..db5a4f44e 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -133,7 +133,7 @@ class TEDIE(InfoExtractor): def _extract_info(self, webpage): info_json = self._search_regex( - r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>', + r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*</script>', webpage, 'info json') return json.loads(info_json) From 5ae9b8b3a3063c97730b79ea1dfd39bc19fd56c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 03:57:11 +0700 Subject: [PATCH 2015/2417] [adobepass] Add support for AT&T U-verse (mso ATT) (closes #13938, closes #21016) --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 1cf2dcbf3..38dca1b0a 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -25,6 +25,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'ATT': { + 'name': 'AT&T U-verse', + 'username_field': 'userid', + 'password_field': 'password', + }, 'ATTOTT': { 'name': 'DIRECTV NOW', 'username_field': 'email', From a30c2f40550dd1ecc52c470db8ef77ea84bfe85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jul 2019 04:01:30 +0700 Subject: [PATCH 2016/2417] [go] Add site info for disneynow (closes #21613) --- youtube_dl/extractor/go.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 5916f9a8f..03e48f4ea 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -34,9 +34,13 @@ class GoIE(AdobePassIE): 'watchdisneyxd': { 'brand': '009', 'resource_id': 'DisneyXD', + }, + 'disneynow': { + 'brand': '011', + 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -83,7 +87,9 @@ class GoIE(AdobePassIE): display_id)['video'] def _real_extract(self, url): - sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + mobj = re.match(self._VALID_URL, url) + sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2') + video_id, display_id = mobj.group('id', 'display_id') site_info = self._SITE_INFO.get(sub_domain, {}) brand = site_info.get('brand') if not video_id or not site_info: From c9fa84d88ef31c847d418223c0c6eb93651ccbec Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 4 Jul 2019 15:59:25 +0100 Subject: [PATCH 2017/2417] [lecturio] add support id based URLs(closes #21630) --- youtube_dl/extractor/lecturio.py | 113 +++++++++++++++++-------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 24f78d928..6ed7da4ab 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -6,8 +6,8 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, determine_ext, - extract_attributes, ExtractorError, float_or_none, int_or_none, @@ -19,6 +19,7 @@ from ..utils import ( class LecturioBaseIE(InfoExtractor): + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' _NETRC_MACHINE = 'lecturio' @@ -67,51 +68,56 @@ class LecturioIE(LecturioBaseIE): _VALID_URL = r'''(?x) https:// (?: - app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture| - (?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag + app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))| + (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag ) ''' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', - 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', + 'md5': '9a42cf1d8282a6311bf7211bbde26fde', 'info_dict': { 'id': '39634', 'ext': 'mp4', - 'title': 'Important Concepts and Terms – Introduction to Microbiology', + 'title': 'Important Concepts and Terms — Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', }, { 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 'only_matching': True, + }, { + 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', + 'only_matching': True, }] _CC_LANGS = { + 'Arabic': 'ar', + 'Bulgarian': 'bg', 'German': 'de', 'English': 'en', 'Spanish': 'es', + 'Persian': 'fa', 'French': 'fr', + 'Japanese': 'ja', 'Polish': 'pl', + 'Pashto': 'ps', 'Russian': 'ru', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') or mobj.group('id_de') - - webpage = self._download_webpage( - 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, - display_id) - - lecture_id = self._search_regex( - r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id') - - api_url = self._search_regex( - r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'api url', group='url') - - video = self._download_json(api_url, display_id) - + nt = mobj.group('nt') or mobj.group('nt_de') + lecture_id = mobj.group('id') + display_id = nt or lecture_id + api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' + video = self._download_json( + self._API_BASE_URL + api_path, display_id) title = video['title'].strip() + if not lecture_id: + pid = video.get('productId') or video.get('uid') + if pid: + spid = pid.split('_') + if spid and len(spid) == 2: + lecture_id = spid[1] formats = [] for format_ in video['content']['media']: @@ -129,24 +135,30 @@ class LecturioIE(LecturioBaseIE): continue label = str_or_none(format_.get('label')) filesize = int_or_none(format_.get('fileSize')) - formats.append({ + f = { 'url': file_url, 'format_id': label, 'filesize': float_or_none(filesize, invscale=1000) - }) + } + if label: + mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) + if mobj: + f.update({ + 'format_id': mobj.group(2), + 'height': int(mobj.group(1)), + }) + formats.append(f) self._sort_formats(formats) subtitles = {} automatic_captions = {} - cc = self._parse_json( - self._search_regex( - r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles', - default='{}'), display_id, fatal=False) - for cc_label, cc_url in cc.items(): - cc_url = url_or_none(cc_url) + captions = video.get('captions') or [] + for cc in captions: + cc_url = cc.get('url') if not cc_url: continue - lang = self._search_regex( + cc_label = cc.get('translatedCode') + lang = cc.get('languageCode') or self._search_regex( r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0] if cc_label else 'en') original_lang = self._search_regex( @@ -160,7 +172,7 @@ class LecturioIE(LecturioBaseIE): }) return { - 'id': lecture_id, + 'id': lecture_id or nt, 'title': title, 'formats': formats, 'subtitles': subtitles, @@ -169,37 +181,40 @@ class LecturioIE(LecturioBaseIE): class LecturioCourseIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course' - _TEST = { + _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', 'info_dict': { 'id': 'microbiology-introduction', 'title': 'Microbiology: Introduction', + 'description': 'md5:13da8500c25880c6016ae1e6d78c386a', }, 'playlist_count': 45, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://app.lecturio.com/#/course/c/6434', + 'only_matching': True, + }] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - + nt, course_id = re.match(self._VALID_URL, url).groups() + display_id = nt or course_id + api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json' + course = self._download_json( + self._API_BASE_URL + api_path, display_id) entries = [] - for mobj in re.finditer( - r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>', - webpage): - params = extract_attributes(mobj.group(0)) - lecture_url = urljoin(url, params.get('data-url')) - lecture_id = params.get('data-id') + for lecture in course.get('lectures', []): + lecture_id = str_or_none(lecture.get('id')) + lecture_url = lecture.get('url') + if lecture_url: + lecture_url = urljoin(url, lecture_url) + else: + lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id) entries.append(self.url_result( lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) - - title = self._search_regex( - r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage, - 'title', default=None) - - return self.playlist_result(entries, display_id, title) + return self.playlist_result( + entries, display_id, course.get('title'), + clean_html(course.get('description'))) class LecturioDeCourseIE(LecturioBaseIE): From d1850c1a975de37b28c39afdce2e5ea56dec032a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hendrik=20Schr=C3=B6ter?= <Rikorose@users.noreply.github.com> Date: Fri, 5 Jul 2019 15:47:32 +0000 Subject: [PATCH 2018/2417] [mixer:vod] Relax _VALID_URL (closes #21657) (#21658) --- youtube_dl/extractor/beampro.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index e264a145f..86abdae00 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,7 +99,7 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\w+)' + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', @@ -122,6 +122,9 @@ class BeamProVodIE(BeamProBaseIE): }, { 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', 'only_matching': True, + }, { + 'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', + 'only_matching': True, }] @staticmethod From d18003a1419517cad49d4c5e8acb8255dd5422df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jul 2019 00:42:54 +0700 Subject: [PATCH 2019/2417] [peertube] Detect embed URLs in generic extraction (closes #21666) --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e03c3d1d3..b50543e32 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -168,7 +168,7 @@ class PeerTubeIE(InfoExtractor): @staticmethod def _extract_peertube_url(webpage, source_url): mobj = re.match( - r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)' + r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)' % PeerTubeIE._UUID_RE, source_url) if mobj and any(p in webpage for p in ( '<title>PeerTube<', From a6389abfd7fec786ed07031cd7f3a42d02910de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jul 2019 23:16:38 +0700 Subject: [PATCH 2020/2417] [philharmoniedeparis] Relax _VALID_URL (closes #21672) --- youtube_dl/extractor/philharmoniedeparis.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f723a2b3b..03da64b11 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -14,7 +14,7 @@ class PhilharmonieDeParisIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| pad\.philharmoniedeparis\.fr/doc/CIMU/ ) (?P<id>\d+) @@ -40,6 +40,12 @@ class PhilharmonieDeParisIE(InfoExtractor): }, { 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, }] _LIVE_URL = 'https://live.philharmoniedeparis.fr' From 25d71fb058368e1d48c4ad9496d91d33378649f6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 Jul 2019 08:28:39 +0100 Subject: [PATCH 2021/2417] [packtpub] fix extraction(closes #21268) --- youtube_dl/extractor/packtpub.py | 111 ++++++++++++++----------------- 1 file changed, 51 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 1324137df..3d39d1b27 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -5,26 +5,27 @@ import re from .common import InfoExtractor from ..compat import ( - compat_str, + # compat_str, compat_HTTPError, ) from ..utils import ( clean_html, ExtractorError, - remove_end, + # remove_end, + str_or_none, strip_or_none, unified_timestamp, - urljoin, + # urljoin, ) class PacktPubBaseIE(InfoExtractor): - _PACKT_BASE = 'https://www.packtpub.com' - _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + # _PACKT_BASE = 'https://www.packtpub.com' + _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/' class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -50,9 +51,9 @@ class PacktPubIE(PacktPubBaseIE): return try: self._TOKEN = self._download_json( - self._MAPT_REST + '/users/tokens', None, + 'https://services.packtpub.com/auth-v1/users/tokens', None, 'Downloading Authorization Token', data=json.dumps({ - 'email': username, + 'username': username, 'password': password, }).encode())['data']['access'] except ExtractorError as e: @@ -61,54 +62,40 @@ class PacktPubIE(PacktPubBaseIE): raise ExtractorError(message, expected=True) raise - def _handle_error(self, response): - if response.get('status') != 'success': - raise ExtractorError( - '% said: %s' % (self.IE_NAME, response['message']), - expected=True) - - def _download_json(self, *args, **kwargs): - response = super(PacktPubIE, self)._download_json(*args, **kwargs) - self._handle_error(response) - return response - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_id, chapter_id, video_id = mobj.group( - 'course_id', 'chapter_id', 'id') + course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups() headers = {} if self._TOKEN: headers['Authorization'] = 'Bearer ' + self._TOKEN - video = self._download_json( - '%s/users/me/products/%s/chapters/%s/sections/%s' - % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, - 'Downloading JSON video', headers=headers)['data'] + try: + video_url = self._download_json( + 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id, + 'Downloading JSON video', headers=headers)['data'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + self.raise_login_required('This video is locked') + raise - content = video.get('content') - if not content: - self.raise_login_required('This video is locked') + # TODO: find a better way to avoid duplicating course requests + # metadata = self._download_json( + # '%s/products/%s/chapters/%s/sections/%s/metadata' + # % (self._MAPT_REST, course_id, chapter_id, video_id), + # video_id)['data'] - video_url = content['file'] - - metadata = self._download_json( - '%s/products/%s/chapters/%s/sections/%s/metadata' - % (self._MAPT_REST, course_id, chapter_id, video_id), - video_id)['data'] - - title = metadata['pageTitle'] - course_title = metadata.get('title') - if course_title: - title = remove_end(title, ' - %s' % course_title) - timestamp = unified_timestamp(metadata.get('publicationDate')) - thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + # title = metadata['pageTitle'] + # course_title = metadata.get('title') + # if course_title: + # title = remove_end(title, ' - %s' % course_title) + # timestamp = unified_timestamp(metadata.get('publicationDate')) + # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) return { 'id': video_id, 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'timestamp': timestamp, + 'title': display_id or video_id, # title, + # 'thumbnail': thumbnail, + # 'timestamp': timestamp, } @@ -119,6 +106,7 @@ class PacktPubCourseIE(PacktPubBaseIE): 'info_dict': { 'id': '9781787122215', 'title': 'Learn Nodejs by building 12 projects [Video]', + 'description': 'md5:489da8d953f416e51927b60a1c7db0aa', }, 'playlist_count': 90, }, { @@ -136,35 +124,38 @@ class PacktPubCourseIE(PacktPubBaseIE): url, course_id = mobj.group('url', 'id') course = self._download_json( - '%s/products/%s/metadata' % (self._MAPT_REST, course_id), - course_id)['data'] + self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id) + metadata = self._download_json( + self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id, + course_id, fatal=False) or {} entries = [] - for chapter_num, chapter in enumerate(course['tableOfContents'], 1): - if chapter.get('type') != 'chapter': - continue - children = chapter.get('children') - if not isinstance(children, list): + for chapter_num, chapter in enumerate(course['chapters'], 1): + chapter_id = str_or_none(chapter.get('id')) + sections = chapter.get('sections') + if not chapter_id or not isinstance(sections, list): continue chapter_info = { 'chapter': chapter.get('title'), 'chapter_number': chapter_num, - 'chapter_id': chapter.get('id'), + 'chapter_id': chapter_id, } - for section in children: - if section.get('type') != 'section': - continue - section_url = section.get('seoUrl') - if not isinstance(section_url, compat_str): + for section in sections: + section_id = str_or_none(section.get('id')) + if not section_id or section.get('contentType') != 'video': continue entry = { '_type': 'url_transparent', - 'url': urljoin(url + '/', section_url), + 'url': '/'.join([url, chapter_id, section_id]), 'title': strip_or_none(section.get('title')), 'description': clean_html(section.get('summary')), + 'thumbnail': metadata.get('coverImage'), + 'timestamp': unified_timestamp(metadata.get('publicationDate')), 'ie_key': PacktPubIE.ie_key(), } entry.update(chapter_info) entries.append(entry) - return self.playlist_result(entries, course_id, course.get('title')) + return self.playlist_result( + entries, course_id, metadata.get('title'), + clean_html(metadata.get('about'))) From c9b0564ac141bed3766fa65011274cb5c1c5bccb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 9 Jul 2019 11:56:16 +0100 Subject: [PATCH 2022/2417] [packtpub] Relax lesson _VALID_URL regex(closes #21695) --- youtube_dl/extractor/packtpub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 3d39d1b27..11ad3b3b8 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -25,7 +25,7 @@ class PacktPubBaseIE(InfoExtractor): class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -41,6 +41,9 @@ class PacktPubIE(PacktPubBaseIE): }, { 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro', 'only_matching': True, + }, { + 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project', + 'only_matching': True, }] _NETRC_MACHINE = 'packtpub' _TOKEN = None From 4b30282616c35b08308975b9d51614039b3f3d12 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 13:54:49 +0100 Subject: [PATCH 2023/2417] [funk] fix extraction(closes #17915) --- youtube_dl/extractor/funk.py | 171 +++++------------------------------ 1 file changed, 23 insertions(+), 148 deletions(-) diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py index 7e1af95e0..81d1949fd 100644 --- a/youtube_dl/extractor/funk.py +++ b/youtube_dl/extractor/funk.py @@ -1,89 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor from .nexx import NexxIE -from ..compat import compat_str from ..utils import ( int_or_none, - try_get, + str_or_none, ) -class FunkBaseIE(InfoExtractor): - _HEADERS = { - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8', - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4', - } - _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4' - - @staticmethod - def _make_headers(referer): - headers = FunkBaseIE._HEADERS.copy() - headers['Referer'] = referer - return headers - - def _make_url_result(self, video): - return { - '_type': 'url_transparent', - 'url': 'nexx:741:%s' % video['sourceId'], - 'ie_key': NexxIE.ie_key(), - 'id': video['sourceId'], - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'season_number': int_or_none(video.get('seasonNr')), - 'episode_number': int_or_none(video.get('episodeNr')), - } - - -class FunkMixIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _TESTS = [{ - 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten', - 'md5': '8edf617c2f2b7c9847dfda313f199009', - 'info_dict': { - 'id': '123748', - 'ext': 'mp4', - 'title': '"Die realste Kifferdoku aller Zeiten"', - 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d', - 'timestamp': 1490274721, - 'upload_date': '20170323', - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - mix_id = mobj.group('id') - alias = mobj.group('alias') - - lists = self._download_json( - 'https://www.funk.net/api/v3.1/curation/curatedLists/', - mix_id, headers=self._make_headers(url), query={ - 'size': 100, - })['_embedded']['curatedListList'] - - metas = next( - l for l in lists - if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas'] - video = next( - meta['videoDataDelegate'] - for meta in metas - if try_get( - meta, lambda x: x['videoDataDelegate']['alias'], - compat_str) == alias) - - return self._make_url_result(video) - - -class FunkChannelIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2', + 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', + 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'info_dict': { 'id': '1155821', 'ext': 'mp4', @@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE): 'timestamp': 1514507395, 'upload_date': '20171229', }, - 'params': { - 'skip_download': True, - }, + }, { - # only available via byIdList API - 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', - 'info_dict': { - 'id': '205067', - 'ext': 'mp4', - 'title': 'Martin Sonneborn erklärt die EU', - 'description': 'md5:050f74626e4ed87edf4626d2024210c0', - 'timestamp': 1494424042, - 'upload_date': '20170510', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', + 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - alias = mobj.group('alias') - - headers = self._make_headers(url) - - video = None - - # Id-based channels are currently broken on their side: webplayer - # tries to process them via byChannelAlias endpoint and fails - # predictably. - for page_num in itertools.count(): - by_channel_alias = self._download_json( - 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s' - % channel_id, - 'Downloading byChannelAlias JSON page %d' % (page_num + 1), - headers=headers, query={ - 'filterFsk': 'false', - 'sort': 'creationDate,desc', - 'size': 100, - 'page': page_num, - }, fatal=False) - if not by_channel_alias: - break - video_list = try_get( - by_channel_alias, lambda x: x['_embedded']['videoList'], list) - if not video_list: - break - try: - video = next(r for r in video_list if r.get('alias') == alias) - break - except StopIteration: - pass - if not try_get( - by_channel_alias, lambda x: x['_links']['next']): - break - - if not video: - by_id_list = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/byIdList', - channel_id, 'Downloading byIdList JSON', headers=headers, - query={ - 'ids': alias, - }, fatal=False) - if by_id_list: - video = try_get(by_id_list, lambda x: x['result'][0], dict) - - if not video: - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', - channel_id, 'Downloading filter JSON', headers=headers, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] - video = next(r for r in results if r.get('alias') == alias) - - return self._make_url_result(video) + display_id, nexx_id = re.match(self._VALID_URL, url).groups() + video = self._download_json( + 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) + return { + '_type': 'url_transparent', + 'url': 'nexx:741:' + nexx_id, + 'ie_key': NexxIE.ie_key(), + 'id': nexx_id, + 'title': video.get('title'), + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'channel_id': str_or_none(video.get('channelId')), + 'display_id': display_id, + 'tags': video.get('tags'), + 'thumbnail': video.get('imageUrlLandscape'), + } From 253289656f6c49f9bed7d043c9806ce4def4973f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 13:57:43 +0100 Subject: [PATCH 2024/2417] [extractors] update funk.net import --- youtube_dl/extractor/extractors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 02f17cf0d..68b9b9b25 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,10 +395,7 @@ from .frontendmasters import ( FrontendMastersCourseIE ) from .funimation import FunimationIE -from .funk import ( - FunkMixIE, - FunkChannelIE, -) +from .funk import FunkIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE From cfe781d4faf910b8f687ce39480456d97f0946cf Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 15:45:00 +0100 Subject: [PATCH 2025/2417] [gameinformer] fix extraction(closes #8895)(closes #15363)(closes #17206) --- youtube_dl/extractor/gameinformer.py | 34 ++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py index a2920a793..f1b96c172 100644 --- a/youtube_dl/extractor/gameinformer.py +++ b/youtube_dl/extractor/gameinformer.py @@ -1,12 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +from .brightcove import BrightcoveNewIE from .common import InfoExtractor +from ..utils import ( + clean_html, + get_element_by_class, + get_element_by_id, +) class GameInformerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)' + _TESTS = [{ + # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', 'md5': '292f26da1ab4beb4c9099f1304d2b071', 'info_dict': { @@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor): 'upload_date': '20150928', 'uploader_id': '694940074001', }, - } + }, { + # Brightcove id inside unique element with field--name-field-brightcove-video-id class + 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue', + 'info_dict': { + 'id': '6057111913001', + 'ext': 'mp4', + 'title': 'New Gameplay Today – Streets Of Rogue', + 'timestamp': 1562699001, + 'upload_date': '20190709', + 'uploader_id': '694940074001', + + }, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage( url, display_id, headers=self.geo_verification_headers()) - brightcove_id = self._search_regex( - [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"], - webpage, 'brightcove id') - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage)) + brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage) + return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id) From e4d53148f506cfcfab8559d86b40c72b7db87a6f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 10 Jul 2019 16:47:37 +0100 Subject: [PATCH 2026/2417] [funnyordie] move extraction to VoxMedia extractor and improve vox volume embed extraction(closes #16846) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/funnyordie.py | 162 ----------------------------- youtube_dl/extractor/voxmedia.py | 101 ++++++++++++------ 3 files changed, 67 insertions(+), 197 deletions(-) delete mode 100644 youtube_dl/extractor/funnyordie.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68b9b9b25..555fadfaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,7 +396,6 @@ from .frontendmasters import ( ) from .funimation import FunimationIE from .funk import FunkIE -from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py deleted file mode 100644 index f85e7de14..000000000 --- a/youtube_dl/extractor/funnyordie.py +++ /dev/null @@ -1,162 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - unified_timestamp, -) - - -class FunnyOrDieIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' - _TESTS = [{ - 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', - 'info_dict': { - 'id': '0732f586d7', - 'ext': 'mp4', - 'title': 'Heart-Shaped Box: Literal Video Version', - 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', - 'thumbnail': r're:^http:.*\.jpg$', - 'uploader': 'DASjr', - 'timestamp': 1317904928, - 'upload_date': '20111006', - 'duration': 318.3, - }, - }, { - 'url': 'http://www.funnyordie.com/embed/e402820827', - 'info_dict': { - 'id': 'e402820827', - 'ext': 'mp4', - 'title': 'Please Use This Song (Jon Lajoie)', - 'description': 'Please use this to sell something. www.jonlajoie.com', - 'thumbnail': r're:^http:.*\.jpg$', - 'timestamp': 1398988800, - 'upload_date': '20140502', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - - links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage) - if not links: - raise ExtractorError('No media links available for %s' % video_id) - - links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) - - m3u8_url = self._search_regex( - r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1', - webpage, 'm3u8 url', group='url') - - formats = [] - - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - source_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] - bitrates.sort() - - if source_formats: - self._sort_formats(source_formats) - - for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): - for path, ext in links: - ff = f.copy() - if ff: - if ext != 'mp4': - ff = dict( - [(k, v) for k, v in ff.items() - if k in ('height', 'width', 'format_id')]) - ff.update({ - 'format_id': ff['format_id'].replace('hls', ext), - 'ext': ext, - 'protocol': 'http', - }) - else: - ff.update({ - 'format_id': '%s-%d' % (ext, bitrate), - 'vbr': bitrate, - }) - ff['url'] = self._proto_relative_url( - '%s%d.%s' % (path, bitrate, ext)) - formats.append(ff) - self._check_formats(formats, video_id) - - formats.extend(m3u8_formats) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - subtitles = {} - for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage): - subtitles[src_lang] = [{ - 'ext': src.split('/')[-1], - 'url': 'http://www.funnyordie.com%s' % src, - }] - - timestamp = unified_timestamp(self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - - uploader = self._html_search_regex( - r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h', - webpage, 'uploader', default=None) - - title, description, thumbnail, duration = [None] * 4 - - medium = self._parse_json( - self._search_regex( - r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', - default='{}'), - video_id, fatal=False) - if medium: - title = medium.get('title') - duration = float_or_none(medium.get('duration')) - if not timestamp: - timestamp = unified_timestamp(medium.get('publishDate')) - - post = self._parse_json( - self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', - default='{}'), - video_id, fatal=False) - if post: - if not title: - title = post.get('name') - description = post.get('description') - thumbnail = post.get('picture') - - if not title: - title = self._og_search_title(webpage) - if not description: - description = self._og_search_description(webpage) - if not duration: - duration = int_or_none(self._html_search_meta( - ('video:duration', 'duration'), webpage, 'duration', default=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index c7a0a88fe..b318e15d4 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from .once import OnceIE from ..compat import compat_urllib_parse_unquote -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) class VoxMediaVolumeIE(OnceIE): @@ -13,18 +16,43 @@ class VoxMediaVolumeIE(OnceIE): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id) + + setup = self._parse_json(self._search_regex( + r'setup\s*=\s*({.+});', webpage, 'setup'), video_id) + video_data = setup.get('video') or {} + info = { + 'id': video_id, + 'title': video_data.get('title_short'), + 'description': video_data.get('description_long') or video_data.get('description_short'), + 'thumbnail': video_data.get('brightcove_thumbnail') + } + asset = setup.get('asset') or setup.get('params') or {} + + formats = [] + hls_url = asset.get('hls_url') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + mp4_url = asset.get('mp4_url') + if mp4_url: + tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None) + format_id = 'http' + if tbr: + format_id += '-' + tbr + formats.append({ + 'format_id': format_id, + 'url': mp4_url, + 'tbr': int_or_none(tbr), + }) + if formats: + self._sort_formats(formats) + info['formats'] = formats + return info + for provider_video_type in ('ooyala', 'youtube', 'brightcove'): provider_video_id = video_data.get('%s_id' % provider_video_type) if not provider_video_id: continue - info = { - 'id': video_id, - 'title': video_data.get('title_short'), - 'description': video_data.get('description_long') or video_data.get('description_short'), - 'thumbnail': video_data.get('brightcove_thumbnail') - } if provider_video_type == 'brightcove': info['formats'] = self._extract_once_formats(provider_video_id) self._sort_formats(info['formats']) @@ -39,46 +67,49 @@ class VoxMediaVolumeIE(OnceIE): class VoxMediaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)' _TESTS = [{ + # Volume embed, Youtube 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'info_dict': { - 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', + 'id': 'j4mLW6x17VM', 'ext': 'mp4', - 'title': 'Google\'s new material design direction', - 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', + 'title': 'Material world: how Google discovered what software is made of', + 'description': 'md5:dfc17e7715e3b542d66e33a109861382', + 'upload_date': '20190710', + 'uploader_id': 'TheVerge', + 'uploader': 'The Verge', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], }, { - # data-ooyala-id + # Volume embed, Youtube 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', - 'md5': 'd744484ff127884cd2ba09e3fa604e4b', + 'md5': '4c8f4a0937752b437c3ebc0ed24802b5', 'info_dict': { - 'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B', + 'id': 'Gy8Md3Eky38', 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', - 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', + 'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d', + 'uploader_id': 'TheVerge', + 'upload_date': '20141021', + 'uploader': 'The Verge', }, - 'add_ie': ['Ooyala'], - 'skip': 'Video Not Found', + 'add_ie': ['Youtube'], + 'skip': 'similar to the previous test', }, { - # volume embed + # Volume embed, Youtube 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', 'info_dict': { - 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', + 'id': 'YCjDnX-Xzhg', 'ext': 'mp4', - 'title': 'The new frontier of LGBTQ civil rights, explained', - 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', + 'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination", + 'description': 'md5:fc1317922057de31cd74bce91eb1c66c', + 'uploader_id': 'voxdotcom', + 'upload_date': '20150915', + 'uploader': 'Vox', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], + 'add_ie': ['Youtube'], + 'skip': 'similar to the previous test', }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -93,6 +124,7 @@ class VoxMediaIE(InfoExtractor): 'uploader': 'Vox', }, 'add_ie': ['Youtube'], + 'skip': 'Page no longer contain videos', }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', @@ -118,10 +150,11 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:e02d56b026d51aa32c010676765a690d', }, }], + 'skip': 'Page no longer contain videos', }, { # volume embed, Brightcove Once 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya', - 'md5': '01571a896281f77dc06e084138987ea2', + 'md5': '2dbc77b8b0bff1894c2fce16eded637d', 'info_dict': { 'id': '1231c973d', 'ext': 'mp4', From 5fc0896168a9ff155475bfb0b7b66504c7077605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Jul 2019 23:37:09 +0700 Subject: [PATCH 2027/2417] [beeg] Add support for api/v6 v2 URLs without t argument (closes #21701) --- youtube_dl/extractor/beeg.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c15a0ac8f..5788d13ba 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -32,6 +32,10 @@ class BeegIE(InfoExtractor): # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', 'only_matching': True, + }, { + # api/v6 v2 w/o t + 'url': 'https://beeg.com/1277207756', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -49,14 +53,17 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - t = qs.get('t', [''])[0].split('-') - if len(t) > 1: + if len(video_id) >= 10: query = { 'v': 2, - 's': t[0], - 'e': t[1], } + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query.update({ + 's': t[0], + 'e': t[1], + }) else: query = {'v': 1} From 4dcd4b7b163feddc07959ca34cbb29815b354c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:04:25 +0700 Subject: [PATCH 2028/2417] [mgtv] Pass Referer HTTP header for format URLs (closes #21726) --- youtube_dl/extractor/mgtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 84137df50..7ae2e3c3b 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -79,6 +79,9 @@ class MGTVIE(InfoExtractor): 'ext': 'mp4', 'tbr': tbr, 'protocol': 'm3u8_native', + 'http_headers': { + 'Referer': url, + }, }) self._sort_formats(formats) From 7612406bf9de825e569b9d2d1faee3d82fd60f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:34:03 +0700 Subject: [PATCH 2029/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5ce78b07a..ad99a64d6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version <unreleased> + +Core ++ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) + +Extractors ++ [mgtv] Pass Referer HTTP header for format URLs (#21726) ++ [beeg] Add support for api/v6 v2 URLs without t argument (#21701) +* [voxmedia:volume] Improvevox embed extraction (#16846) +* [funnyordie] Move extraction to VoxMedia extractor (#16846) +* [gameinformer] Fix extraction (#8895, #15363, #17206) +* [funk] Fix extraction (#17915) +* [packtpub] Relax lesson URL regular expression (#21695) +* [packtpub] Fix extraction (#21268) +* [philharmoniedeparis] Relax URL regular expression (#21672) +* [peertube] Detect embed URLs in generic extraction (#21666) +* [mixer:vod] Relax URL regular expression (#21657, #21658) ++ [lecturio] Add support id based URLs (#21630) ++ [go] Add site info for disneynow (#21613) +* [ted] Restrict info regular expression (#21631) +* [twitch:vod] Actualize m3u8 URL (#21538, #21607) +* [vzaar] Fix videos with empty title (#21606) +* [tvland] Fix extraction (#21384) +* [arte] Clean extractor (#15583, #21614) + + version 2019.07.02 Core From 0d1f4af39dab36138a3809e31a8c09ee588e1b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 00:43:54 +0700 Subject: [PATCH 2030/2417] release 2019.07.12 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 13 +------------ youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 26 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index fb0d33b8f..fcfadeb1f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.02 + [debug] youtube-dl version 2019.07.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 3c95565a6..7e1a3d1c0 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 7410776d7..71782a104 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index cc52bcca6..6bcfde1f8 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com --> - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.02 + [debug] youtube-dl version 2019.07.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index bbd421b1a..89d9c63aa 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' <!-- Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: -- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. +- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.12. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Finally, put x into all relevant boxes (like this [x]) --> - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've verified that I'm running youtube-dl version **2019.07.12** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index ad99a64d6..45cb2746e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.07.12 Core + [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 55ae43144..4e664336d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -58,16 +58,8 @@ - **ARD:mediathek** - **ARDBetaMediathek** - **Arkena** - - **arte.tv** - **arte.tv:+7** - - **arte.tv:cinema** - - **arte.tv:concert** - - **arte.tv:creative** - - **arte.tv:ddc** - **arte.tv:embed** - - **arte.tv:future** - - **arte.tv:info** - - **arte.tv:magazine** - **arte.tv:playlist** - **AsianCrush** - **AsianCrushPlaylist** @@ -313,9 +305,7 @@ - **FrontendMastersCourse** - **FrontendMastersLesson** - **Funimation** - - **FunkChannel** - - **FunkMix** - - **FunnyOrDie** + - **Funk** - **Fusion** - **Fux** - **FXNetworks** @@ -896,7 +886,6 @@ - **TF1** - **TFO** - **TheIntercept** - - **theoperaplatform** - **ThePlatform** - **ThePlatformFeed** - **TheScene** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 78fe54326..18bcb33e2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.02' +__version__ = '2019.07.12' From baf67a604d912722b0fe03a40e9dc5349a2208cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 02:26:05 +0700 Subject: [PATCH 2031/2417] [youtube] Fix authentication (closes #11270) --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b570d5bae..9f661a84f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -116,6 +116,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'f.req': json.dumps(f_req), 'flowName': 'GlifWebSignIn', 'flowEntry': 'ServiceLogin', + # TODO: reverse actual botguard identifier generation algo + 'bgRequest': '["identifier",""]', }) return self._download_json( url, None, note=note, errnote=errnote, From 27019dbb4b4829b5e1910c6b714f904ce8fad680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jul 2019 03:45:58 +0700 Subject: [PATCH 2032/2417] [youtube] Fix is_live extraction (closes #21734) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9f661a84f..8a3c502ba 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..compat import ( compat_str, ) from ..utils import ( + bool_or_none, clean_html, dict_get, error_to_compat_str, @@ -1890,6 +1891,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if view_count is None and video_details: view_count = int_or_none(video_details.get('viewCount')) + if is_live is None: + is_live = bool_or_none(dict_get( + video_details, ('isLive', 'isLiveContent'), + skip_false_values=False)) + # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) From 0dd58a523fffd06c126c006722850bab36bd3aa2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 11 Jul 2019 23:09:09 +0100 Subject: [PATCH 2033/2417] [fivetv] relax video URL regex and support https URLs --- youtube_dl/extractor/fivetv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 9f9863746..c4c0f1b3d 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -9,7 +9,7 @@ from ..utils import int_or_none class FiveTVIE(InfoExtractor): _VALID_URL = r'''(?x) - http:// + https?:// (?:www\.)?5-tv\.ru/ (?: (?:[^/]+/)+(?P<id>\d+)| @@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor): 'duration': 180, }, }, { + # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails', 'info_dict': { 'id': 'glavnoe', @@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor): 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, + 'skip': 'redirect to «Известия. Главное» project page', }, { 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', 'only_matching': True, @@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - [r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"', + [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"', r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') From 16d3672ad73802043a9cccd1505909949e2ce71f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 11 Jul 2019 23:37:34 +0100 Subject: [PATCH 2034/2417] [espn] fix fivethirtyeight.com extraction --- youtube_dl/extractor/abcnews.py | 9 ++++++--- youtube_dl/extractor/espn.py | 16 ++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index cd29aca77..8b407bf9c 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE): IE_NAME = 'abcnews:video' _VALID_URL = r'''(?x) https?:// - abcnews\.go\.com/ (?: - [^/]+/video/(?P<display_id>[0-9a-z-]+)-| - video/embed\?.*?\bid= + abcnews\.go\.com/ + (?: + [^/]+/video/(?P<display_id>[0-9a-z-]+)-| + video/embed\?.*?\bid= + )| + fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ ) (?P<id>\d+) ''' diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 8cc9bd165..6cf05e6da 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor): _TEST = { 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', 'info_dict': { - 'id': '21846851', - 'ext': 'mp4', + 'id': '56032156', + 'ext': 'flv', 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', - 'timestamp': 1513960621, - 'upload_date': '20171222', }, 'params': { 'skip_download': True, }, - 'expected_warnings': ['Unable to download f4m manifest'], } def _real_extract(self, url): @@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r'data-video-id=["\'](?P<id>\d+)', - webpage, 'video id', group='id') + embed_url = self._search_regex( + r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)', + webpage, 'embed url') - return self.url_result( - 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + return self.url_result(embed_url, 'AbcNewsVideo') From d4ece5d359bfe5c6b87e0ef19f2b351e408e510c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 21:56:49 +0100 Subject: [PATCH 2035/2417] [bleacherreport] fix Bleacher Report CMS extraction --- youtube_dl/extractor/bleacherreport.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index e829974ff..dc60224d0 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor): video = article_data.get('video') if video: video_type = video['type'] - if video_type == 'cms.bleacherreport.com': + if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] elif video_type == 'ooyala.com': info['url'] = 'ooyala:%s' % video['id'] @@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor): class BleacherReportCMSIE(AMPIE): - _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' _TESTS = [{ - 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', 'info_dict': { 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', @@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE): def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) + info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) info['id'] = video_id return info From 82f68e4a0113f00144b55c5d2a1951793ac78818 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:02:06 +0100 Subject: [PATCH 2036/2417] [facebook] fallback to twitter:image meta for thumbnail extraction(closes #21224) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 789dd79d5..a3dcdca3e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -428,7 +428,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) - thumbnail = self._og_search_thumbnail(webpage) + thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) view_count = parse_count(self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', From 0441d6266ca275b1b4a2ad4efdb4b3f54e318e88 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:31:11 +0100 Subject: [PATCH 2037/2417] [rudo] remove extractor(closes #18430)(closes #18474) Covered by generic extractor --- youtube_dl/extractor/biobiochiletv.py | 19 ++++++---- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/rudo.py | 53 --------------------------- 3 files changed, 12 insertions(+), 61 deletions(-) delete mode 100644 youtube_dl/extractor/rudo.py diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index b92031c8a..dc86c57c5 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, remove_end, ) -from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): @@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor): }, { 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', 'info_dict': { - 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'id': 'b4xd0LK3SK', 'ext': 'mp4', - 'uploader': '(none)', - 'upload_date': '20160708', - 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + # TODO: fix url_transparent information overriding + # 'uploader': 'Juan Pablo Echenique', + 'title': 'Comentario Oscar Cáceres', + }, + 'params': { + # empty m3u8 manifest + 'skip_download': True, }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', @@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - rudo_url = RudoIE._extract_url(webpage) + rudo_url = self._search_regex( + r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage, 'embed URL', None, group='url') if not rudo_url: raise ExtractorError('No videos found') @@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', + r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', webpage, 'uploader', fatal=False) return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 555fadfaf..e88ad34a8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -967,7 +967,6 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE -from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py deleted file mode 100644 index f036f6757..000000000 --- a/youtube_dl/extractor/rudo.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - js_to_json, - get_element_by_class, - unified_strdate, -) - - -class RudoIE(InfoExtractor): - _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' - - _TEST = { - 'url': 'http://rudo.video/vod/oTzw0MGnyG', - 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', - 'info_dict': { - 'id': 'oTzw0MGnyG', - 'ext': 'mp4', - 'title': 'Comentario Tomás Mosciatti', - 'upload_date': '20160617', - }, - } - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, - transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) - - info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') - - info_dict.update({ - 'title': self._og_search_title(webpage), - 'upload_date': unified_strdate(get_element_by_class('date', webpage)), - }) - - return info_dict From 57227618fe2708f9e4f1c87fdd08c49c7122c13b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 22:50:37 +0100 Subject: [PATCH 2038/2417] [spike] fix Bellator extraction --- youtube_dl/extractor/spike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 21b93a5b3..7c11ea7aa 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor): 'only_matching': True, }] - _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _FEED_URL = 'http://www.bellator.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] From 272355c17265e8dc921d7f1518606b15fd800112 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 12 Jul 2019 23:26:46 +0100 Subject: [PATCH 2039/2417] [dbtv] fix extraction --- youtube_dl/extractor/dbtv.py | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index f232f0dc5..aaedf2e3d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -7,50 +7,51 @@ from .common import InfoExtractor class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?' + _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' _TESTS = [{ - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': '2e24f67936517b143a234b4cadf792ec', + 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', + 'md5': 'b8f850ba1860adbda668d367f9b77699', 'info_dict': { - 'id': '3649835190001', - 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'id': 'PynxJnNWChE', 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', + 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69.544, - 'uploader_id': '1027729757001', + 'upload_date': '20160916', + 'duration': 69, + 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', + 'uploader': 'Dagbladet', }, - 'add_ie': ['BrightcoveNew'] + 'add_ie': ['Youtube'] }, { - 'url': 'http://dbtv.no/3649835190001', + 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, }, { - 'url': 'http://www.dbtv.no/lazyplayer/4631135248001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/vice/5000634109001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/filmtrailer/3359293614001', + 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', + r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', webpage)] def _real_extract(self, url): - video_id, display_id = re.match(self._VALID_URL, url).groups() - - return { + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = { '_type': 'url_transparent', - 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, 'id': video_id, 'display_id': display_id, - 'ie_key': 'BrightcoveNew', } + if len(video_id) == 11: + info.update({ + 'url': video_id, + 'ie_key': 'Youtube', + }) + else: + info.update({ + 'url': 'jwplatform:' + video_id, + 'ie_key': 'JWPlatform', + }) + return info From c72dc20d099bbe1dc4ede83e8f94a7bc42d81532 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 13 Jul 2019 10:13:07 +0100 Subject: [PATCH 2040/2417] [roosterteeth] fix free episode extraction(#16094) --- youtube_dl/extractor/roosterteeth.py | 97 ++++++++++++++-------------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 857434540..d3eeeba62 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, - strip_or_none, - unescapeHTML, + str_or_none, urlencode_postdata, ) @@ -21,15 +24,14 @@ class RoosterTeethIE(InfoExtractor): 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'md5': 'e2bd7764732d785ef797700a2489f212', 'info_dict': { - 'id': '26576', + 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', - 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', - 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', + 'title': 'Million Dollars, But... The Game Announcement', + 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', - 'comment_count': int, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -89,60 +91,55 @@ class RoosterTeethIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id - webpage = self._download_webpage(url, display_id) - - episode = strip_or_none(unescapeHTML(self._search_regex( - (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', - r'<title>(?P<title>[^<]+)'), webpage, 'title', - default=None, group='title'))) - - title = strip_or_none(self._og_search_title( - webpage, default=None)) or episode - - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', - webpage, 'm3u8 url', default=None, group='url') - - if not m3u8_url: - if re.search(r']+class=["\']non-sponsor', webpage): - self.raise_login_required( - '%s is only available for FIRST members' % display_id) - - if re.search(r']+class=["\']golive-gate', webpage): - self.raise_login_required('%s is not available yet' % display_id) - - raise ExtractorError('Unable to extract m3u8 URL') + try: + m3u8_url = self._download_json( + api_episode_url + '/videos', display_id, + 'Downloading video JSON metadata')['data'][0]['attributes']['url'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + raise formats = self._extract_m3u8_formats( - m3u8_url, display_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - description = strip_or_none(self._og_search_description(webpage)) - thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) + episode = self._download_json( + api_episode_url, display_id, + 'Downloading episode JSON metadata')['data'][0] + attributes = episode['attributes'] + title = attributes.get('title') or attributes['display_title'] + video_id = compat_str(episode['id']) - series = self._search_regex( - (r'

    More ([^<]+)

    ', r']+>See All ([^<]+) Videos<'), - webpage, 'series', fatal=False) - - comment_count = int_or_none(self._search_regex( - r'>Comments \((\d+)\)<', webpage, - 'comment count', fatal=False)) - - video_id = self._search_regex( - (r'containerId\s*=\s*["\']episode-(\d+)\1', - r' Date: Sat, 13 Jul 2019 12:47:02 +0100 Subject: [PATCH 2041/2417] [livejournal] Add new extractor(closes #21526) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/livejournal.py | 42 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/livejournal.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e88ad34a8..75a53f54b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -579,6 +579,7 @@ from .linkedin import ( ) from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE +from .livejournal import LiveJournalIE from .liveleak import ( LiveLeakIE, LiveLeakEmbedIE, diff --git a/youtube_dl/extractor/livejournal.py b/youtube_dl/extractor/livejournal.py new file mode 100644 index 000000000..3a9f4553f --- /dev/null +++ b/youtube_dl/extractor/livejournal.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class LiveJournalIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' + _TEST = { + 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', + 'md5': 'adaf018388572ced8a6f301ace49d4b2', + 'info_dict': { + 'id': '1263729', + 'ext': 'mp4', + 'title': 'Истребители против БПЛА', + 'upload_date': '20190624', + 'timestamp': 1561406715, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + record = self._parse_json(self._search_regex( + r'Site\.page\s*=\s*({.+?});', webpage, + 'page data'), video_id)['video']['record'] + storage_id = compat_str(record['storageid']) + title = record.get('name') + if title: + # remove filename extension(.mp4, .mov, etc...) + title = title.rsplit('.', 1)[0] + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'thumbnail': record.get('thumbnail'), + 'timestamp': int_or_none(record.get('timecreate')), + 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id, + 'ie_key': 'EaglePlatform', + } From 4a71ef6da677bfd08b39d5cb6f584be9f6b2fc27 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Jul 2019 13:08:19 +0100 Subject: [PATCH 2042/2417] [dlive] Add new extractor(closes #18080) --- youtube_dl/extractor/dlive.py | 94 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 98 insertions(+) create mode 100644 youtube_dl/extractor/dlive.py diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py new file mode 100644 index 000000000..b81eaecce --- /dev/null +++ b/youtube_dl/extractor/dlive.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class DLiveVODIE(InfoExtractor): + IE_NAME = 'dlive:vod' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[a-zA-Z0-9]+)' + _TEST = { + 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', + 'info_dict': { + 'id': '3mTzOl4WR', + 'ext': 'mp4', + 'title': 'Minecraft with james charles epic', + 'upload_date': '20190701', + 'timestamp': 1562011015, + 'uploader_id': 'pdp', + } + } + + def _real_extract(self, url): + uploader_id, vod_id = re.match(self._VALID_URL, url).groups() + broadcast = self._download_json( + 'https://graphigo.prd.dlive.tv/', vod_id, + data=json.dumps({'query': '''query { + pastBroadcast(permlink:"%s+%s") { + content + createdAt + length + playbackUrl + title + thumbnailUrl + viewCount + } +}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] + title = broadcast['title'] + formats = self._extract_m3u8_formats( + broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + return { + 'id': vod_id, + 'title': title, + 'uploader_id': uploader_id, + 'formats': formats, + 'description': broadcast.get('content'), + 'thumbnail': broadcast.get('thumbnailUrl'), + 'timestamp': int_or_none(broadcast.get('createdAt'), 1000), + 'view_count': int_or_none(broadcast.get('viewCount')), + } + + +class DLiveStreamIE(InfoExtractor): + IE_NAME = 'dlive:stream' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?P[\w.-]+)' + + def _real_extract(self, url): + display_name = self._match_id(url) + user = self._download_json( + 'https://graphigo.prd.dlive.tv/', display_name, + data=json.dumps({'query': '''query { + userByDisplayName(displayname:"%s") { + livestream { + content + createdAt + title + thumbnailUrl + watchingCount + } + username + } +}''' % display_name}).encode())['data']['userByDisplayName'] + livestream = user['livestream'] + title = livestream['title'] + username = user['username'] + formats = self._extract_m3u8_formats( + 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, + display_name, 'mp4') + self._sort_formats(formats) + return { + 'id': display_name, + 'title': self._live_title(title), + 'uploader': display_name, + 'uploader_id': username, + 'formats': formats, + 'description': livestream.get('content'), + 'thumbnail': livestream.get('thumbnailUrl'), + 'is_live': True, + 'timestamp': int_or_none(livestream.get('createdAt'), 1000), + 'view_count': int_or_none(livestream.get('watchingCount')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 75a53f54b..15f54a214 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1255,6 +1255,10 @@ from .udn import UDNEmbedIE from .ufctv import UFCTVIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE From b99f11a56b64b647366a00c335a4a55f3e9e1854 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Jul 2019 14:11:57 +0100 Subject: [PATCH 2043/2417] [dlive] restrict DLive Stream _VALID_URL regex --- youtube_dl/extractor/dlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py index b81eaecce..8787f15a6 100644 --- a/youtube_dl/extractor/dlive.py +++ b/youtube_dl/extractor/dlive.py @@ -55,7 +55,7 @@ class DLiveVODIE(InfoExtractor): class DLiveStreamIE(InfoExtractor): IE_NAME = 'dlive:stream' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?P[\w.-]+)' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P[\w.-]+)' def _real_extract(self, url): display_name = self._match_id(url) From 5f562bd4bbc780e535e187efb36659247b41d6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 00:09:39 +0700 Subject: [PATCH 2044/2417] [spankbang] Fix extraction (closes #21763, closes #21764) --- youtube_dl/extractor/spankbang.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index f11d728ca..eb0919e3a 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -106,6 +106,8 @@ class SpankBangIE(InfoExtractor): for format_id, format_url in stream.items(): if format_id.startswith(STREAM_URL_PREFIX): + if format_url and isinstance(format_url, list): + format_url = format_url[0] extract_format( format_id[len(STREAM_URL_PREFIX):], format_url) From f9eeeda31c1a643aced8283440983f3a45208840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 00:21:39 +0700 Subject: [PATCH 2045/2417] [spankbang] Fix and improve metadata extraction --- youtube_dl/extractor/spankbang.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index eb0919e3a..e040ada29 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + merge_dicts, orderedSet, parse_duration, parse_resolution, @@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor): 'description': 'dillion harper masturbates on a bed', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', + 'timestamp': 1422571989, + 'upload_date': '20150129', 'age_limit': 18, } }, { @@ -113,26 +116,29 @@ class SpankBangIE(InfoExtractor): self._sort_formats(formats) + info = self._search_json_ld(webpage, video_id, default={}) + title = self._html_search_regex( - r'(?s)]*>(.+?)', webpage, 'title') + r'(?s)]*>(.+?)', webpage, 'title', default=None) description = self._search_regex( r']+\bclass=["\']bottom[^>]+>\s*

    [^<]*

    \s*

    ([^<]+)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r'class="user"[^>]*>]+>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + uploader = self._html_search_regex( + (r'(?s)]+class=["\']profile[^>]+>(.+?)', + r'class="user"[^>]*>]+>([^<]+)'), webpage, 'uploader', default=None) duration = parse_duration(self._search_regex( r']+\bclass=["\']right_side[^>]+>\s*([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) age_limit = self._rta_search(webpage) - return { + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': title or video_id, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -140,7 +146,8 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - } + }, info + ) class SpankBangPlaylistIE(InfoExtractor): From b7ef93f0ab2963047953be1472a5a108d92b621c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 01:19:17 +0700 Subject: [PATCH 2046/2417] [twitter] Improve uploader id extraction (closes #21705) --- youtube_dl/extractor/twitter.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 41d0b6be8..cebb6238c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -428,11 +428,22 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/foobar/status/1087791357756956680', + 'info_dict': { + 'id': '1087791357756956680', + 'ext': 'mp4', + 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:66d493500c013e3e2d434195746a7f78', + 'uploader': 'Twitter', + 'uploader_id': 'Twitter', + 'duration': 61.567, + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user_id') twid = mobj.group('id') webpage, urlh = self._download_webpage_handle( @@ -441,8 +452,13 @@ class TwitterIE(InfoExtractor): if 'twitter.com/account/suspended' in urlh.geturl(): raise ExtractorError('Account suspended by Twitter.', expected=True) - if user_id is None: - mobj = re.match(self._VALID_URL, urlh.geturl()) + user_id = None + + redirect_mobj = re.match(self._VALID_URL, urlh.geturl()) + if redirect_mobj: + user_id = redirect_mobj.group('user_id') + + if not user_id: user_id = mobj.group('user_id') username = remove_end(self._og_search_title(webpage), ' on Twitter') From ba036333bf4ebcba70deb51e77e81dca723fb54d Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Sun, 14 Jul 2019 01:23:22 +0700 Subject: [PATCH 2047/2417] [youtube] Add more invidious instances to _VALID_URL (#21694) --- youtube_dl/extractor/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8a3c502ba..762611b89 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -371,10 +371,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| + # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances (?:(?:www|dev)\.)?invidio\.us/| - (?:www\.)?invidiou\.sh/| - (?:www\.)?invidious\.snopyta\.org/| + (?:(?:www|no)\.)?invidiou\.sh/| + (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?invidious\.enkirton\.net/| + (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls From d89a0a8026e0010a96a1309d70f8fcc2164dd5a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 01:43:31 +0700 Subject: [PATCH 2048/2417] [lynda] Handle missing subtitles (closes #20490, closes #20513) --- youtube_dl/extractor/lynda.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 3084c6dff..b3d8653d0 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', 'only_matching': True, + }, { + # Status="NotFound", Message="Transcript not found" + 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html', + 'only_matching': True, }] def _raise_unavailable(self, video_id): @@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE): def _get_subtitles(self, video_id): url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id - subs = self._download_json(url, None, False) + subs = self._download_webpage( + url, video_id, 'Downloading subtitles JSON', fatal=False) + if not subs or 'Status="NotFound"' in subs: + return {} + subs = self._parse_json(subs, video_id, fatal=False) + if not subs: + return {} fixed_subs = self._fix_subtitles(subs) if fixed_subs: return {'en': [{'ext': 'srt', 'data': fixed_subs}]} - else: - return {} + return {} class LyndaCourseIE(LyndaBaseIE): From c452790a796730113dd62db0e743b11045606e27 Mon Sep 17 00:00:00 2001 From: aerworker Date: Sat, 13 Jul 2019 22:38:47 +0300 Subject: [PATCH 2049/2417] [yandexmusic] Add support for multi disk albums and extract track number and disk number (closes #21420) (#21421) * [yandexmusic] extract tracks from all volumes of an album (closes #21420) * [yandexmusic] extract genre, disk_number and track_number * [yandexmusic] extract decomposed artist names * Update yandexmusic.py * Update yandexmusic.py * Update yandexmusic.py --- youtube_dl/extractor/yandexmusic.py | 63 +++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 1dfee59e9..fea817419 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -51,23 +51,43 @@ class YandexMusicTrackIE(YandexMusicBaseIE): IE_DESC = 'Яндекс.Музыка - Трек' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/track/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508/track/4878838', 'md5': 'f496818aa2f60b6c0062980d2e00dc20', 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, 'track': 'Gypsy Eyes 1', 'album': 'Gypsy Soul', 'album_artist': 'Carlo Ambrosio', - 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari', + 'artist': 'Carlo Ambrosio & Fabio Di Bari', 'release_year': 2009, }, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + # multiple disks + 'url': 'http://music.yandex.ru/album/3840501/track/705105', + 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', + 'info_dict': { + 'id': '705105', + 'ext': 'mp3', + 'title': 'Hooverphonic - Sometimes', + 'filesize': 5743386, + 'duration': 239.27, + 'track': 'Sometimes', + 'album': 'The Best of Hooverphonic', + 'album_artist': 'Hooverphonic', + 'artist': 'Hooverphonic', + 'release_year': 2016, + 'genre': 'pop', + 'disc_number': 2, + 'track_number': 9, + }, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -110,9 +130,21 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'abr': int_or_none(download_data.get('bitrate')), } + def extract_artist_name(artist): + decomposed = artist.get('decomposed') + if not isinstance(decomposed, list): + return artist['name'] + parts = [artist['name']] + for element in decomposed: + if isinstance(element, dict) and element.get('name'): + parts.append(element['name']) + elif isinstance(element, compat_str): + parts.append(element) + return ''.join(parts) + def extract_artist(artist_list): if artist_list and isinstance(artist_list, list): - artists_names = [a['name'] for a in artist_list if a.get('name')] + artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')] if artists_names: return ', '.join(artists_names) @@ -121,10 +153,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE): album = albums[0] if isinstance(album, dict): year = album.get('year') + disc_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['volume'])) + track_number = int_or_none(try_get( + album, lambda x: x['trackPosition']['index'])) track_info.update({ 'album': album.get('title'), 'album_artist': extract_artist(album.get('artists')), 'release_year': int_or_none(year), + 'genre': album.get('genre'), + 'disc_number': disc_number, + 'track_number': track_number, }) track_artist = extract_artist(track.get('artists')) @@ -152,7 +191,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_DESC = 'Яндекс.Музыка - Альбом' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/?(\?|$)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508', 'info_dict': { 'id': '540508', @@ -160,7 +199,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): }, 'playlist_count': 50, 'skip': 'Travis CI servers blocked by YandexMusic', - } + }, { + 'url': 'https://music.yandex.ru/album/3840501', + 'info_dict': { + 'id': '3840501', + 'title': 'Hooverphonic - The Best of Hooverphonic (2016)', + }, + 'playlist_count': 33, + 'skip': 'Travis CI servers blocked by YandexMusic', + }] def _real_extract(self, url): album_id = self._match_id(url) @@ -169,7 +216,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, album_id, 'Downloading album JSON') - entries = self._build_playlist(album['volumes'][0]) + entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) title = '%s - %s' % (album['artists'][0]['name'], album['title']) year = album.get('year') From 2fe074a960773c2ec6f0a94a8c5fab5af8714651 Mon Sep 17 00:00:00 2001 From: hrimfaxi Date: Sun, 14 Jul 2019 03:57:44 +0800 Subject: [PATCH 2050/2417] [porn91] Fix extraction (#21312) --- youtube_dl/extractor/porn91.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 24c3600fe..20eac647a 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -39,7 +39,12 @@ class Porn91IE(InfoExtractor): r'

    ([^<]+)
    ', webpage, 'title') title = title.replace('\n', '') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + video_link_url = self._search_regex( + r']+id=["\']fm-video_link[^>]+>([^<]+)', + webpage, 'video link') + videopage = self._download_webpage(video_link_url, video_id) + + info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0] duration = parse_duration(self._search_regex( r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) From 364a2cb658a0db069a746ca5e25c8b589b3c509d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:07:02 +0700 Subject: [PATCH 2051/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 45cb2746e..bc722b73c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version + +Extractors +* [porn91] Fix extraction (#21312) ++ [yandexmusic] Extract track number and disk number (#21421) ++ [yandexmusic] Add support for multi disk albums (#21420, #21421) +* [lynda] Handle missing subtitles (#20490, #20513) ++ [youtube] Add more invidious instances to URL regular expression (#21694) +* [twitter] Improve uploader id extraction (#21705) +* [spankbang] Fix and improve metadata extraction +* [spankbang] Fix extraction (#21763, #21764) ++ [dlive] Add support for dlive.tv (#18080) ++ [livejournal] Add support for livejournal.com (#21526) +* [roosterteeth] Fix free episode extraction (#16094) +* [dbtv] Fix extraction +* [bellator] Fix extraction +- [rudo] Remove extractor (#18430, #18474) +* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224) +* [bleacherreport] Fix Bleacher Report CMS extraction +* [espn] Fix fivethirtyeight.com extraction +* [5tv] Relax video URL regular expression and support https URLs +* [youtube] Fix is_live extraction (#21734) +* [youtube] Fix authentication (#11270) + + version 2019.07.12 Core From 0250161c5272e2794f33085f9f8c3f464d8ee996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:09:16 +0700 Subject: [PATCH 2052/2417] [yandexmusic] Add missing import --- youtube_dl/extractor/yandexmusic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index fea817419..08d35e04c 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, int_or_none, float_or_none, + try_get, ) From ce80cacefd70a2c268de2fb1d5838ce66ac9a683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 03:10:49 +0700 Subject: [PATCH 2053/2417] release 2019.07.14 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 4 +++- youtube_dl/version.py | 2 +- 8 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index fcfadeb1f..80ca6d5f1 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.12 + [debug] youtube-dl version 2019.07.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 7e1a3d1c0..a4f3c4dd9 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 71782a104..9d82e1cd9 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 6bcfde1f8..ff82a7435 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.12 + [debug] youtube-dl version 2019.07.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 89d9c63aa..f692c663d 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.12** +- [ ] I've verified that I'm running youtube-dl version **2019.07.14** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index bc722b73c..be1606586 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.14 Extractors * [porn91] Fix extraction (#21312) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4e664336d..9ae6e5c96 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -223,6 +223,8 @@ - **DiscoveryNetworksDe** - **DiscoveryVR** - **Disney** + - **dlive:stream** + - **dlive:vod** - **Dotsub** - **DouyuShow** - **DouyuTV**: 斗鱼 @@ -448,6 +450,7 @@ - **linkedin:learning:course** - **LinuxAcademy** - **LiTV** + - **LiveJournal** - **LiveLeak** - **LiveLeakEmbed** - **livestream** @@ -754,7 +757,6 @@ - **rtve.es:television** - **RTVNH** - **RTVS** - - **Rudo** - **RUHD** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 18bcb33e2..8a7f4d733 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.12' +__version__ = '2019.07.14' From 898238e9f82e29ef139ff934f6949ddf574bd4d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jul 2019 20:30:05 +0700 Subject: [PATCH 2054/2417] [youtube] Restrict is_live extraction (closes #21782) --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 762611b89..43a3fad9f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1896,9 +1896,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = int_or_none(video_details.get('viewCount')) if is_live is None: - is_live = bool_or_none(dict_get( - video_details, ('isLive', 'isLiveContent'), - skip_false_values=False)) + is_live = bool_or_none(video_details.get('isLive')) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: From 2adedc477ee4c87709ca8d1c9bdfac3c31b1a57b Mon Sep 17 00:00:00 2001 From: Gary <35942108+LameLemon@users.noreply.github.com> Date: Mon, 15 Jul 2019 18:53:20 +0300 Subject: [PATCH 2055/2417] [gfycat] Extend _VALID_URL (closes #21779) (#21780) --- youtube_dl/extractor/gfycat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index eb6f85836..bbe3cb283 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor): 'categories': list, 'age_limit': 0, } + }, { + 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish', + 'only_matching': True }, { 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', 'only_matching': True From 791d2e81172826ef645b62c6961c65f8c2cb2a4f Mon Sep 17 00:00:00 2001 From: geditorit <52565706+geditorit@users.noreply.github.com> Date: Mon, 15 Jul 2019 22:54:22 +0700 Subject: [PATCH 2056/2417] [youtube] Add support for invidious.mastodon.host (#21777) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 43a3fad9f..a87a46b3b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -378,6 +378,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| + (?:www\.)?invidious\.mastodon\.host/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains From f2a213d02596b603dea5be65f4778591101db5a2 Mon Sep 17 00:00:00 2001 From: tlonic Date: Mon, 15 Jul 2019 11:58:55 -0400 Subject: [PATCH 2057/2417] [einthusan] Add support for einthusan.com (closes #21748) (#21775) --- youtube_dl/extractor/einthusan.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 4485bf8c1..1fb00c9b0 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..compat import ( @@ -18,7 +19,7 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com))/movie/watch/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', @@ -32,6 +33,9 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', 'only_matching': True, + }, { + 'url': 'https://einthusan.com/movie/watch/9097/', + 'only_matching': True, }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js @@ -41,7 +45,9 @@ class EinthusanIE(InfoExtractor): )).decode('utf-8'), video_id) def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -53,7 +59,7 @@ class EinthusanIE(InfoExtractor): page_id = self._html_search_regex( ']+data-pageid="([^"]+)"', webpage, 'page ID') video_data = self._download_json( - 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id, + 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id, data=urlencode_postdata({ 'xEvent': 'UIVideoPlayer.PingOutcome', 'xJson': json.dumps({ From 7d4dd3e5b444c43c1cc19b53689514e8deaf3849 Mon Sep 17 00:00:00 2001 From: chien-yu <32920873+chien-yu@users.noreply.github.com> Date: Mon, 15 Jul 2019 09:03:03 -0700 Subject: [PATCH 2058/2417] [ctsnews] Fix YouTube embeds extraction (#21678) --- youtube_dl/extractor/ctsnews.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index d565335cf..dcda7e89d 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import unified_timestamp - +from .youtube import YoutubeIE class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' @@ -14,8 +14,8 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201501291578109', 'ext': 'mp4', - 'title': '以色列.真主黨交火 3人死亡', - 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...', + 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網', + 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...', 'timestamp': 1422528540, 'upload_date': '20150129', } @@ -26,7 +26,7 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201309031304098', 'ext': 'mp4', - 'title': '韓國31歲童顏男 貌如十多歲小孩', + 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網', 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1378205880, @@ -62,8 +62,7 @@ class CtsNewsIE(InfoExtractor): video_url = mp4_feed['source_url'] else: self.to_screen('Not CTSPlayer video, trying Youtube...') - youtube_url = self._search_regex( - r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url') + youtube_url = YoutubeIE._extract_url(page) return self.url_result(youtube_url, ie='Youtube') From 799756a3b3c794284ca52b9af482e1f03fc46833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:47:10 +0700 Subject: [PATCH 2059/2417] [kaltura] Check source format URL (#21290) --- youtube_dl/extractor/kaltura.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 639d73837..0a733424c 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor): { 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'only_matching': True, + }, + { + # unavailable source format + 'url': 'kaltura:513551:1_66x4rg7o', + 'only_matching': True, } ] @@ -306,12 +311,17 @@ class KalturaIE(InfoExtractor): f['fileExt'] = 'mp4' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + format_id = '%(fileExt)s-%(bitrate)s' % f + # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o) + if f.get('isOriginal') is True and not self._is_valid_url( + video_url, entry_id, format_id): + continue # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g # -f mp4-56) vcodec = 'none' if 'videoCodecId' not in f and f.get( 'frameRate') == 0 else f.get('videoCodecId') formats.append({ - 'format_id': '%(fileExt)s-%(bitrate)s' % f, + 'format_id': format_id, 'ext': f.get('fileExt'), 'tbr': int_or_none(f['bitrate']), 'fps': int_or_none(f.get('frameRate')), From f61496863d2207718a2a6cb1591dcbc7abc282de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:56:05 +0700 Subject: [PATCH 2060/2417] [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv (closes #21281, closes #21290) --- youtube_dl/extractor/asiancrush.py | 80 +++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 6d71c5ad5..0348e680c 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -5,14 +5,12 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - extract_attributes, - remove_end, -) +from ..utils import extract_attributes class AsianCrushIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P\d+)v\b' + _VALID_URL_BASE = r'https?://(?:www\.)?(?P(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))' + _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P\d+)v\b' % _VALID_URL_BASE _TESTS = [{ 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', 'md5': 'c3b740e48d0ba002a42c0b72857beae6', @@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor): 'id': '1_y4tmjm5r', 'ext': 'mp4', 'title': 'Women Who Flirt', - 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'description': 'md5:7e986615808bcfb11756eb503a751487', 'timestamp': 1496936429, 'upload_date': '20170608', 'uploader_id': 'craig@crifkin.com', @@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor): }, { 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', + 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor): r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') player = self._download_webpage( - 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + 'https://api.%s/embeddedVideoPlayer' % host, video_id, query={'id': entry_id}) kaltura_id = self._search_regex( @@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor): r'/p(?:artner_id)?/(\d+)', player, 'partner id', default='513551') - return self.url_result( - 'kaltura:%s:%s' % (partner_id, kaltura_id), - ie=KalturaIE.ie_key(), video_id=kaltura_id, - video_title=title) + description = self._html_search_regex( + r'(?s)]+\bclass=["\']description["\'][^>]*>(.+?)', + webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': KalturaIE.ie_key(), + 'id': video_id, + 'title': title, + 'description': description, + } class AsianCrushPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P\d+)s\b' - _TEST = { + _VALID_URL = r'%s/series/0+(?P\d+)s\b' % AsianCrushIE._VALID_URL_BASE + _TESTS = [{ 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', 'info_dict': { 'id': '12481', @@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor): 'description': 'md5:7addd7c5132a09fd4741152d96cce886', }, 'playlist_count': 20, - } + }, { + 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', + 'only_matching': True, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor): entries.append(self.url_result( mobj.group('url'), ie=AsianCrushIE.ie_key())) - title = remove_end( - self._html_search_regex( - r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, - 'title', default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', - default=None) or self._search_regex( - r'([^<]+)', webpage, 'title', fatal=False), - ' | AsianCrush') + title = self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False) + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) description = self._og_search_description( webpage, default=None) or self._html_search_meta( From 8b4a0ebf10376e89daa9da3cd9570a3f16f8f375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jul 2019 23:59:23 +0700 Subject: [PATCH 2061/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index be1606586..a67b0595a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version + +Extractors ++ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv + (#21281, #21290) +* [kaltura] Check source format URL (#21290) +* [ctsnews] Fix YouTube embeds extraction (#21678) ++ [einthusan] Add support for einthusan.com (#21748, #21775) ++ [youtube] Add support for invidious.mastodon.host (#21777) ++ [gfycat] Extend URL regular expression (#21779, #21780) +* [youtube] Restrict is_live extraction (#21782) + + version 2019.07.14 Extractors From 2f1991ff1461b11134d2b09d6e8d681ce51d93d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Jul 2019 00:01:46 +0700 Subject: [PATCH 2062/2417] release 2019.07.16 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 80ca6d5f1..89001802b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.14 + [debug] youtube-dl version 2019.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a4f3c4dd9..4cc58fa42 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 9d82e1cd9..f38760b77 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index ff82a7435..e4133dc4e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.14 + [debug] youtube-dl version 2019.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index f692c663d..0bb6543e3 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.14** +- [ ] I've verified that I'm running youtube-dl version **2019.07.16** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index a67b0595a..fe0ca7164 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.16 Extractors + [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8a7f4d733..b0f5a6b47 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.14' +__version__ = '2019.07.16' From 1824bfdcdff1af4bfc4f7f6ed885d45ee7e8c376 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Jul 2019 22:51:10 +0100 Subject: [PATCH 2063/2417] [vrv] fix CMS signing query extraction(closes #21809) --- youtube_dl/extractor/vrv.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index c814a8a4a..6e51469b0 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -64,7 +64,15 @@ class VRVBaseIE(InfoExtractor): def _call_cms(self, path, video_id, note): if not self._CMS_SIGNING: - self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] + index = self._call_api('index', video_id, 'CMS Signing') + self._CMS_SIGNING = index.get('cms_signing') or {} + if not self._CMS_SIGNING: + for signing_policy in index.get('signing_policies', []): + signing_path = signing_policy.get('path') + if signing_path and signing_path.startswith('/cms/'): + name, value = signing_policy.get('name'), signing_policy.get('value') + if name and value: + self._CMS_SIGNING[name] = value return self._download_json( self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) From 5e1c39ac853bfe4da7feda2a48544cb5811873d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 17 Jul 2019 17:47:53 +0200 Subject: [PATCH 2064/2417] [extractor/common] Fix typo in thumbnails resolution description (#21817) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c3e9eec6..859786617 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -220,7 +220,7 @@ class InfoExtractor(object): * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) - * "resolution" (optional, string "{width}x{height"}, + * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. From 9c1da4a9f9fc17cffc2fa2261030c66d2a032a58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Jul 2019 23:08:26 +0700 Subject: [PATCH 2065/2417] [extractor/generic] Restrict --default-search schemeless URLs detection pattern (closes #21842) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 77e217460..d34fc4b15 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2226,7 +2226,7 @@ class GenericIE(InfoExtractor): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if '/' in url: + if re.match(r'^[^\s/]+\.[^\s/]+/', url): self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': From 2e18adec98a44ca839cbaaed7ce27d8d07f54cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Jul 2019 23:46:34 +0700 Subject: [PATCH 2066/2417] [youtube:playlist] Relax _VIDEO_RE (closes #21844) --- youtube_dl/extractor/youtube.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a87a46b3b..aa316ba88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2432,7 +2432,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' + _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', @@ -2556,6 +2556,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'noplaylist': True, 'skip_download': True, }, + }, { + # https://github.com/ytdl-org/youtube-dl/issues/21844 + 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'info_dict': { + 'title': 'Data Analysis with Dr Mike Pound', + 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'uploader_id': 'Computerphile', + 'uploader': 'Computerphile', + }, + 'playlist_mincount': 11, }, { 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 'only_matching': True, From 13a75688a55f32cde316b0f7d5992ff4a1f6d279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Jul 2019 00:01:46 +0700 Subject: [PATCH 2067/2417] [youtube] Fix some tests --- youtube_dl/extractor/youtube.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aa316ba88..b2c714505 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2455,6 +2455,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': '29C3: Not my department', 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'uploader': 'Christiaan008', + 'uploader_id': 'ChRiStIaAn008', }, 'playlist_count': 95, }, { @@ -2463,6 +2465,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': '[OLD]Team Fortress 2 (Class-based LP)', 'id': 'PLBB231211A4F62143', + 'uploader': 'Wickydoo', + 'uploader_id': 'Wickydoo', }, 'playlist_mincount': 26, }, { @@ -2471,6 +2475,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'Uploads from Cauchemar', 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', + 'uploader': 'Cauchemar', + 'uploader_id': 'Cauchemar89', }, 'playlist_mincount': 799, }, { @@ -2488,13 +2494,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'JODA15', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'uploader': 'milan', + 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw', } }, { 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'playlist_mincount': 485, 'info_dict': { - 'title': '2017 華語最新單曲 (2/24更新)', + 'title': '2018 Chinese New Singles (11/6 updated)', 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + 'uploader': 'LBK', + 'uploader_id': 'sdragonfang', } }, { 'note': 'Embedded SWF player', @@ -2503,13 +2513,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'title': 'JODA7', 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ', - } + }, + 'skip': 'This playlist does not exist', }, { 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'info_dict': { 'title': 'Uploads from Interstellar Movie', 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', + 'uploader': 'Interstellar Movie', + 'uploader_id': 'InterstellarMovie1', }, 'playlist_mincount': 21, }, { @@ -2534,6 +2547,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video is not available.', 'add_ie': [YoutubeIE.ie_key()], }, { 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', @@ -2545,7 +2559,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'uploader_id': 'backuspagemuseum', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 'upload_date': '20161008', - 'license': 'Standard YouTube License', 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', 'categories': ['Nonprofits & Activism'], 'tags': list, @@ -2732,6 +2745,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w', 'title': 'Uploads from lex will', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', } }, { 'note': 'Age restricted channel', @@ -2741,6 +2756,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'info_dict': { 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', + 'uploader': 'Deus Ex', + 'uploader_id': 'DeusExOfficial', }, }, { 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', @@ -2825,6 +2842,8 @@ class YoutubeUserIE(YoutubeChannelIE): 'info_dict': { 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ', 'title': 'Uploads from The Linux Foundation', + 'uploader': 'The Linux Foundation', + 'uploader_id': 'TheLinuxFoundation', } }, { # Only available via https://www.youtube.com/c/12minuteathlete/videos @@ -2834,6 +2853,8 @@ class YoutubeUserIE(YoutubeChannelIE): 'info_dict': { 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ', 'title': 'Uploads from 12 Minute Athlete', + 'uploader': '12 Minute Athlete', + 'uploader_id': 'the12minuteathlete', } }, { 'url': 'ytuser:phihag', @@ -2927,7 +2948,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', - 'title': 'Thirst for Science', + 'title': 'ThirstForScience', }, }, { # with "Load more" button @@ -2944,6 +2965,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): 'id': 'UCiU1dHvZObB2iP6xkJ__Icw', 'title': 'Chem Player', }, + 'skip': 'Blocked', }] From 3b446ab3519948980630e3328b971385826ffba8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 20 Jul 2019 20:20:30 +0100 Subject: [PATCH 2068/2417] [discovery] add support go.discovery.com URLs --- youtube_dl/extractor/discovery.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index b70c307a7..9003545ce 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -19,9 +19,9 @@ from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): _VALID_URL = r'''(?x)https?:// (?P<site> + (?:(?:www|go)\.)?discovery| (?:www\.)? (?: - discovery| investigationdiscovery| discoverylife| animalplanet| @@ -56,6 +56,9 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', 'only_matching': True, + }, { + 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False From ab794a553c36ddd690e2243450653c3ede43e606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Jul 2019 13:20:21 +0700 Subject: [PATCH 2069/2417] [ctsnews] PEP 8 --- youtube_dl/extractor/ctsnews.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index dcda7e89d..679f1d92e 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import unified_timestamp from .youtube import YoutubeIE + class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' From 608b8a4300fab7792e637fd9b7045adf1c0cb2aa Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Mon, 22 Jul 2019 02:59:36 +0900 Subject: [PATCH 2070/2417] [yahoo:japannews] Add extractor (closes #21698) (#21265) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yahoo.py | 131 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15f54a214..06de556b7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1448,6 +1448,7 @@ from .yahoo import ( YahooSearchIE, YahooGyaOPlayerIE, YahooGyaOIE, + YahooJapanNewsIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index a3b5f00c8..e5ebdd180 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib import itertools import json import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( + compat_str, compat_urllib_parse, compat_urlparse, ) @@ -18,7 +20,9 @@ from ..utils import ( int_or_none, mimetype2ext, smuggle_url, + try_get, unescapeHTML, + url_or_none, ) from .brightcove import ( @@ -556,3 +560,130 @@ class YahooGyaOIE(InfoExtractor): 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'), YahooGyaOPlayerIE.ie_key(), video_id)) return self.playlist_result(entries, program_id) + + +class YahooJapanNewsIE(InfoExtractor): + IE_NAME = 'yahoo:japannews' + IE_DESC = 'Yahoo! Japan News' + _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?' + _GEO_COUNTRIES = ['JP'] + _TESTS = [{ + 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int', + 'info_dict': { + 'id': '1736242', + 'ext': 'mp4', + 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース', + 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))', + 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$', + }, + 'params': { + 'skip_download': True, + }, + }, { + # geo restricted + 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04', + 'only_matching': True, + }, { + 'url': 'https://headlines.yahoo.co.jp/videonews/', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/', + 'only_matching': True, + }, { + 'url': 'https://news.yahoo.co.jp/feature/1356', + 'only_matching': True + }] + + def _extract_formats(self, json_data, content_id): + formats = [] + + video_data = try_get( + json_data, + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) + for vid in video_data or []: + delivery = vid.get('delivery') + url = url_or_none(vid.get('Url')) + if not delivery or not url: + continue + elif delivery == 'hls': + formats.extend( + self._extract_m3u8_formats( + url, content_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': url, + 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')), + 'height': int_or_none(vid.get('height')), + 'width': int_or_none(vid.get('width')), + 'tbr': int_or_none(vid.get('bitrate')), + }) + self._remove_duplicate_formats(formats) + self._sort_formats(formats) + + return formats + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + display_id = mobj.group('id') or host + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage, 'title', default=None + ) or self._html_search_regex('<title>([^<]+)', webpage, 'title') + + if display_id == host: + # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) + stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage) + entries = [ + self.url_result( + smuggle_url( + 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id, + {'geo_countries': ['JP']}), + ie='BrightcoveNew', video_id=plist_id) + for plist_id in stream_plists] + return self.playlist_result(entries, playlist_title=title) + + # Article page + description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image', webpage, 'thumbnail', default=None) + space_id = self._search_regex([ + r']+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)', + r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)', + r' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.16 + [debug] youtube-dl version 2019.07.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 4cc58fa42..aeca69974 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index f38760b77..e232df726 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index e4133dc4e..8608a085c 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.16 + [debug] youtube-dl version 2019.07.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 0bb6543e3..64864a3b7 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.16** +- [ ] I've verified that I'm running youtube-dl version **2019.07.27** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 08e58524e..32070b8d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.27 Extractors + [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9ae6e5c96..7cf60eefe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1117,6 +1117,7 @@ - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - **yahoo:gyao:player** + - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b0f5a6b47..e3e37b8c5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.16' +__version__ = '2019.07.27' From 8dbf751aa241475dd8a7a6d3040713b5874fd057 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 30 Jul 2019 00:13:33 +0100 Subject: [PATCH 2074/2417] [youtube] improve title and description extraction(closes #21934) --- youtube_dl/extractor/youtube.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b2c714505..9a182fcf6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1820,16 +1820,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} - # title - if 'title' in video_info: - video_title = video_info['title'][0] - elif 'title' in player_response: - video_title = video_details['title'] - else: + video_title = video_info.get('title', [None])[0] or video_details.get('title') + if not video_title: self._downloader.report_warning('Unable to extract video title') video_title = '_' - # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: @@ -1854,11 +1849,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - fd_mobj = re.search(r' Date: Tue, 30 Jul 2019 09:41:23 +0700 Subject: [PATCH 2075/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 32070b8d5..0dbfc4dbf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [youtube] Fix and improve title and description extraction (#21934) + + version 2019.07.27 Extractors From 85c2c4b4abea4618be8013d41f6ba9e95c4e5e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Jul 2019 09:43:47 +0700 Subject: [PATCH 2076/2417] release 2019.07.30 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 06322bb2f..ccd033716 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.27 + [debug] youtube-dl version 2019.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index aeca69974..8709937ad 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e232df726..c3a555ed3 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8608a085c..07042a466 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.27 + [debug] youtube-dl version 2019.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 64864a3b7..4cf75a2eb 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.27** +- [ ] I've verified that I'm running youtube-dl version **2019.07.30** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 0dbfc4dbf..f6f1f7e38 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.07.30 Extractors * [youtube] Fix and improve title and description extraction (#21934) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e3e37b8c5..04dc83605 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.27' +__version__ = '2019.07.30' From c2d125d99f81aa33429b2158acd9a90524575378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 00:14:08 +0700 Subject: [PATCH 2077/2417] [youtube] Improve metadata extraction for age gate content (closes #21943) --- youtube_dl/extractor/youtube.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9a182fcf6..1aee0e465 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1700,6 +1700,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_token(v_info): return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + def extract_player_response(player_response, video_id): + pl_response = str_or_none(player_response) + if not pl_response: + return + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + add_dash_mpd_pr(pl_response) + return pl_response + player_response = {} # Get video info @@ -1722,7 +1731,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): note='Refetching age-gated info webpage', errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) + pl_response = video_info.get('player_response', [None])[0] + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(video_info) + view_count = extract_view_count(video_info) else: age_gate = False video_info = None @@ -1745,11 +1757,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = True sts = ytplayer_config.get('sts') if not player_response: - pl_response = str_or_none(args.get('player_response')) - if pl_response: - pl_response = self._parse_json(pl_response, video_id, fatal=False) - if isinstance(pl_response, dict): - player_response = pl_response + player_response = extract_player_response(args.get('player_response'), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) # We also try looking in get_video_info since it may contain different dashmpd @@ -1781,9 +1789,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): get_video_info = compat_parse_qs(video_info_webpage) if not player_response: pl_response = get_video_info.get('player_response', [None])[0] - if isinstance(pl_response, dict): - player_response = pl_response - add_dash_mpd_pr(player_response) + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) From 2c8b1a21e8901904ab674264f5eda118bca992a5 Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Tue, 30 Jul 2019 19:40:50 +0100 Subject: [PATCH 2078/2417] [openload] Add support for oload.best (#21913) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 11e92e471..030355257 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P @@ -368,6 +368,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', 'only_matching': True, + }, { + 'url': 'https://oload.best/embed/kkz9JgVZeWc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 07ab44c420a79d1faae09d00323242746e522c4c Mon Sep 17 00:00:00 2001 From: CeruleanSky Date: Tue, 30 Jul 2019 14:43:49 -0400 Subject: [PATCH 2079/2417] [dlive] Relax _VALID_URL (#21909) --- youtube_dl/extractor/dlive.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py index 8787f15a6..d95c67a5b 100644 --- a/youtube_dl/extractor/dlive.py +++ b/youtube_dl/extractor/dlive.py @@ -9,8 +9,8 @@ from ..utils import int_or_none class DLiveVODIE(InfoExtractor): IE_NAME = 'dlive:vod' - _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[a-zA-Z0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', 'info_dict': { 'id': '3mTzOl4WR', @@ -20,7 +20,10 @@ class DLiveVODIE(InfoExtractor): 'timestamp': 1562011015, 'uploader_id': 'pdp', } - } + }, { + 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg', + 'only_matching': True, + }] def _real_extract(self, url): uploader_id, vod_id = re.match(self._VALID_URL, url).groups() From 72791634127cc3093592c807225ec684af1cfcc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 02:31:19 +0700 Subject: [PATCH 2080/2417] [tvn24] Fix metadata extraction (closes #21833, closes #21834) --- youtube_dl/extractor/tvn24.py | 42 +++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 6590e1fd0..39f57ae6b 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + NO_DEFAULT, unescapeHTML, ) @@ -20,6 +21,18 @@ class TVN24IE(InfoExtractor): 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', 'thumbnail': 're:https?://.*[.]jpeg', } + }, { + # different layout + 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', + 'info_dict': { + 'id': '1771763', + 'ext': 'mp4', + 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', + 'thumbnail': 're:https?://.*', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', 'only_matching': True, @@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - title = self._og_search_title(webpage) + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r']+class=["\']magazineItemHeader[^>]+>(.+?)(?!\1).+?)\1' % attr, webpage, - name, group='json', fatal=fatal) or '{}', - video_id, transform_source=unescapeHTML, fatal=fatal) + name, group='json', default=default, fatal=fatal) or '{}', + display_id, transform_source=unescapeHTML, fatal=fatal) quality_data = extract_json('data-quality', 'formats') @@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor): }) self._sort_formats(formats) - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_regex( r'\bdata-poster=(["\'])(?P(?!\1).+?)\1', webpage, 'thumbnail', group='url') + video_id = None + share_params = extract_json( - 'data-share-params', 'share params', fatal=False) + 'data-share-params', 'share params', default=None) if isinstance(share_params, dict): - video_id = share_params.get('id') or video_id + video_id = share_params.get('id') + + if not video_id: + video_id = self._search_regex( + r'data-vid-id=["\'](\d+)', webpage, 'video id', + default=None) or self._search_regex( + r',(\d+)\.html', url, 'video id', default=display_id) return { 'id': video_id, From 766c4f6090fdea635f50597a3c5d60643e3a2913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Jul 2019 02:32:02 +0700 Subject: [PATCH 2081/2417] [tvn24] Fix test --- youtube_dl/extractor/tvn24.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 39f57ae6b..de0fb5063 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor): 'id': '1584444', 'ext': 'mp4', 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', - 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', 'thumbnail': 're:https?://.*[.]jpeg', } }, { From 9a37ff82f17383336251afcd80821620dd86ee95 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Wed, 31 Jul 2019 13:45:02 -0700 Subject: [PATCH 2082/2417] [mgtv] Extract format_note (#21881) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit format_note should now show 标清, 高清, 超清, 蓝光, etc. --- youtube_dl/extractor/mgtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 7ae2e3c3b..71fc3ec56 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -82,6 +82,7 @@ class MGTVIE(InfoExtractor): 'http_headers': { 'Referer': url, }, + 'format_note': stream.get('name'), }) self._sort_formats(formats) From 826dcff99cd0a44ec5fa94f0e0201f5115d097ef Mon Sep 17 00:00:00 2001 From: cantandwont <52587695+cantandwont@users.noreply.github.com> Date: Thu, 1 Aug 2019 06:54:39 +1000 Subject: [PATCH 2083/2417] Output batch filename when it could not be read (#21915) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 165c975dd..9a659fc65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -94,7 +94,7 @@ def _real_main(argv=None): if opts.verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: - sys.exit('ERROR: batch file could not be read') + sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] From 535111657b507d4f4454160aaf2587e7ce6b9936 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Aug 2019 22:44:38 +0100 Subject: [PATCH 2084/2417] [discovery] use API call for video data extraction(#21808) --- youtube_dl/extractor/discovery.py | 59 ++++++++++++++----------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 9003545ce..c4b90cd90 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -5,14 +5,8 @@ import re import string from .discoverygo import DiscoveryGoBaseIE -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - try_get, -) +from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError from ..compat import compat_HTTPError @@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))''' + )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ - 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', + 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { - 'id': '5a2d9b4d6b66d17a5026e1fd', + 'id': '5a2f35ce6b66d17a5026e29e', 'ext': 'mp4', - 'title': 'Dave Foley', - 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', - 'duration': 608, + 'title': 'Riding with Matthew Perry', + 'description': 'md5:a34333153e79bc4526019a5129e7f878', + 'duration': 84, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE): }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False + _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, path, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - - react_data = self._parse_json(self._search_regex( - r'window\.__reactTransmitPacket\s*=\s*({.+?});', - webpage, 'react data'), display_id) - content_blocks = react_data['layout'][path]['contentBlocks'] - video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0] - video_id = video['id'] + site, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE): if auth_storage_cookie and auth_storage_cookie.value: auth_storage = self._parse_json(compat_urllib_parse_unquote( compat_urllib_parse_unquote(auth_storage_cookie.value)), - video_id, fatal=False) or {} + display_id, fatal=False) or {} access_token = auth_storage.get('a') or auth_storage.get('access_token') if not access_token: access_token = self._download_json( - 'https://%s.com/anonymous' % site, display_id, query={ + 'https://%s.com/anonymous' % site, display_id, + 'Downloading token JSON metadata', query={ 'authRel': 'authorization', - 'client_id': try_get( - react_data, lambda x: x['application']['apiClientId'], - compat_str) or '3020a40c2356a645b4b4', + 'client_id': '3020a40c2356a645b4b4', 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, })['access_token'] - try: - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token + headers = self.geo_verification_headers() + headers['Authorization'] = 'Bearer ' + access_token + try: + video = self._download_json( + self._API_BASE_URL + 'content/videos', + display_id, 'Downloading content JSON metadata', + headers=headers, query={ + 'slug': display_id, + })[0] + video_id = video['id'] stream = self._download_json( - 'https://api.discovery.com/v1/streaming/video/' + video_id, - display_id, headers=headers) + self._API_BASE_URL + 'streaming/video/' + video_id, + display_id, 'Downloading streaming JSON metadata', headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): e_description = self._parse_json( From 07f3a05c87619d01c195cad8cd57ec72291ad78d Mon Sep 17 00:00:00 2001 From: Kyle <40903431+kylepw@users.noreply.github.com> Date: Fri, 2 Aug 2019 06:49:01 +0900 Subject: [PATCH 2085/2417] [CONTRIBUTING.md] Add some more coding conventions (#21939) --- CONTRIBUTING.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd9ccbe96..d0e0a5637 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + From 33b529fabd282a371d3a4c21ee861badd20dae28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:03:25 +0700 Subject: [PATCH 2086/2417] [yandexvideo] Add support for DASH formats (#21971) --- youtube_dl/extractor/yandexvideo.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py index 1aea95383..46529be05 100644 --- a/youtube_dl/extractor/yandexvideo.py +++ b/youtube_dl/extractor/yandexvideo.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, url_or_none, ) @@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor): # episode, sports 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d', 'only_matching': True, + }, { + # DASH with DRM + 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', + 'only_matching': True, }] def _real_extract(self, url): @@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor): 'disable_trackings': 1, })['content'] - m3u8_url = url_or_none(content.get('content_url')) or url_or_none( + content_url = url_or_none(content.get('content_url')) or url_or_none( content['streams'][0]['url']) title = content.get('title') or content.get('computed_title') - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + ext = determine_ext(content_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + content_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mpd': + formats = self._extract_mpd_formats( + content_url, video_id, mpd_id='dash') + else: + formats = [{'url': content_url}] + self._sort_formats(formats) description = content.get('description') From be306d6a313903a3ebdb8a8ff055bb6b58c9f818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:25:01 +0700 Subject: [PATCH 2087/2417] [tvigle] Fix extraction and add support for HLS and DASH formats (closes #21967) --- youtube_dl/extractor/tvigle.py | 53 +++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 3475ef4c3..180259aba 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -9,6 +9,8 @@ from ..utils import ( float_or_none, int_or_none, parse_age_limit, + try_get, + url_or_none, ) @@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.tvigle.ru/video/sokrat/', - 'md5': '36514aed3657d4f70b4b2cef8eb520cd', 'info_dict': { 'id': '1848932', 'display_id': 'sokrat', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Сократ', 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17', 'duration': 6586, @@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor): }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', - 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', 'info_dict': { 'id': '5142516', 'ext': 'flv', @@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( (r']+class=["\']player["\'][^>]+id=["\'](\d+)', - r'var\s+cloudId\s*=\s*["\'](\d+)', + r'cloudId\s*=\s*["\'](\d+)', r'class="video-preview current_playing" id="(\d+)"'), webpage, 'video id') @@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor): age_limit = parse_age_limit(item.get('ageRestrictions')) formats = [] - for vcodec, fmts in item['videos'].items(): + for vcodec, url_or_fmts in item['videos'].items(): if vcodec == 'hls': - continue - for format_id, video_url in fmts.items(): - if format_id == 'm3u8': + m3u8_url = url_or_none(url_or_fmts) + if not m3u8_url: continue - height = self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None) - formats.append({ - 'url': video_url, - 'format_id': '%s-%s' % (vcodec, format_id), - 'vcodec': vcodec, - 'height': int_or_none(height), - 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), - }) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif vcodec == 'dash': + mpd_url = url_or_none(url_or_fmts) + if not mpd_url: + continue + formats.extend(self._extract_mpd_formats( + mpd_url, video_id, mpd_id='dash', fatal=False)) + else: + if not isinstance(url_or_fmts, dict): + continue + for format_id, video_url in url_or_fmts.items(): + if format_id == 'm3u8': + continue + video_url = url_or_none(video_url) + if not video_url: + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + filesize = int_or_none(try_get( + item, lambda x: x['video_files_size'][vcodec][format_id])) + formats.append({ + 'url': video_url, + 'format_id': '%s-%s' % (vcodec, format_id), + 'vcodec': vcodec, + 'height': int_or_none(height), + 'filesize': filesize, + }) self._sort_formats(formats) return { From 2e9522b06173f2c5cfb2ba020958242d2a93feb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:36:32 +0700 Subject: [PATCH 2088/2417] [ChangeLog] Actualize [ci skip] --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6f1f7e38..c650e25d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version + +Extractors ++ [tvigle] Add support for HLS and DASH formats (#21967) +* [tvigle] Fix extraction (#21967) ++ [yandexvideo] Add support for DASH formats (#21971) +* [discovery] Use API call for video data extraction (#21808) ++ [mgtv] Extract format_note (#21881) +* [tvn24] Fix metadata extraction (#21833, #21834) +* [dlive] Relax URL regular expression (#21909) ++ [openload] Add support for oload.best (#21913) +* [youtube] Improve metadata extraction for age gate content (#21943) + + version 2019.07.30 Extractors From 4f2d735803f723a8d8d6ffbbb1dd6b203f71af58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Aug 2019 05:37:54 +0700 Subject: [PATCH 2089/2417] release 2019.08.02 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +- .../ISSUE_TEMPLATE/2_site_support_request.md | 4 +- .../ISSUE_TEMPLATE/3_site_feature_request.md | 4 +- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 +- CONTRIBUTING.md | 64 ------------------- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 8 files changed, 14 insertions(+), 78 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index ccd033716..4d3894ad3 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.30 + [debug] youtube-dl version 2019.08.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8709937ad..796e11e54 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index c3a555ed3..aa2348548 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 07042a466..5b2501a65 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.07.30 + [debug] youtube-dl version 2019.08.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4cf75a2eb..d1758a95c 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.07.30** +- [ ] I've verified that I'm running youtube-dl version **2019.08.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d0e0a5637..cd9ccbe96 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -366,67 +366,3 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` -### Inline values - -Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. - -#### Example - -Correct: - -```python -title = self._html_search_regex(r'([^<]+)', webpage, 'title') -``` - -Incorrect: - -```python -TITLE_RE = r'([^<]+)' -# ...some lines of code... -title = self._html_search_regex(TITLE_RE, webpage, 'title') -``` - -### Collapse fallbacks - -Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values. - -#### Example - -Good: - -```python -description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], - webpage, 'description', default=None) -``` - -Unwieldy: - -```python -description = ( - self._og_search_description(webpage, default=None) - or self._html_search_meta('description', webpage, default=None) - or self._html_search_meta('twitter:description', webpage, default=None)) -``` - -### Trailing parentheses - -Always move trailing parentheses after the last argument. - -#### Example - -Correct: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list) -``` - -Incorrect: - -```python - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list, -) -``` - diff --git a/ChangeLog b/ChangeLog index c650e25d5..7db147498 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.08.02 Extractors + [tvigle] Add support for HLS and DASH formats (#21967) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 04dc83605..0f7fdb23d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.07.30' +__version__ = '2019.08.02' From d9d3a5a816253f14ee33623662690293365013e0 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Fri, 2 Aug 2019 05:54:56 +0700 Subject: [PATCH 2090/2417] [README.md] Move code from #21939 to the right place --- README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/README.md b/README.md index 8c48a3012..c39b13616 100644 --- a/README.md +++ b/README.md @@ -1216,6 +1216,72 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. From 995f319b0605188d145c78b88319d38b69130132 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Aug 2019 18:08:26 +0100 Subject: [PATCH 2091/2417] [discovery] limit video data by show slug(closes #21980) --- youtube_dl/extractor/discovery.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index c4b90cd90..6287ca685 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -34,7 +34,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)''' + )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { @@ -53,13 +53,17 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', 'only_matching': True, + }, { + # using `show_slug` is important to get the correct video data + 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() + site, show_slug, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -91,6 +95,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): display_id, 'Downloading content JSON metadata', headers=headers, query={ 'slug': display_id, + 'show_slug': show_slug, })[0] video_id = video['id'] stream = self._download_json( From 5efbc1366f4e4d9d4969cbfb404657349a5b3f99 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Aug 2019 19:38:35 +0100 Subject: [PATCH 2092/2417] [roosterteeth] add support for watch URLs --- youtube_dl/extractor/roosterteeth.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index d3eeeba62..8d88ee499 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -17,7 +17,7 @@ from ..utils import ( class RoosterTeethIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P[^/?#&]+)' _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ @@ -49,6 +49,9 @@ class RoosterTeethIE(InfoExtractor): # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, }] def _login(self): From eb9c9c74a6a2f9e13d0efaef304416b30354e5a3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 3 Aug 2019 10:29:20 +0100 Subject: [PATCH 2093/2417] [vimeo] fix album extraction closes #1933 closes #15704 closes #15855 closes #18967 closes #21986 --- youtube_dl/extractor/vimeo.py | 58 +++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b5b44a79a..ddf375c6c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -2,12 +2,14 @@ from __future__ import unicode_literals import base64 +import functools import json import re import itertools from .common import InfoExtractor from ..compat import ( + compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -19,6 +21,7 @@ from ..utils import ( int_or_none, merge_dicts, NO_DEFAULT, + OnDemandPagedList, parse_filesize, qualities, RegexNotFoundError, @@ -98,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): webpage, 'vuid', group='vuid') return xsrft, vuid + def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', + webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + if vimeo_config: + return self._parse_json(vimeo_config, video_id) + def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) @@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?Ppro)?\.com/ - (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) - vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, - 'vimeo config', default=None) + vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + seed_status = vimeo_config.get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'