1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-12-03 22:07:55 +01:00

Merge branch 'master' of https://github.com/speakerender/youtube-dl into directvnow-auth-fix

This commit is contained in:
SpeakerEnder 2019-05-26 19:30:53 -04:00
commit b729ea0ad5
85 changed files with 841 additions and 932 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.04.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.04.30** - [ ] I've verified that I'm running youtube-dl version **2019.05.20**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.04.30 [debug] youtube-dl version 2019.05.20
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.04.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.04.30** - [ ] I've verified that I'm running youtube-dl version **2019.05.20**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.04.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.04.30** - [ ] I've verified that I'm running youtube-dl version **2019.05.20**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.04.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.04.30** - [ ] I've verified that I'm running youtube-dl version **2019.05.20**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.04.30 [debug] youtube-dl version 2019.05.20
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.04.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.04.30** - [ ] I've verified that I'm running youtube-dl version **2019.05.20**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -9,6 +9,7 @@ python:
- "3.6" - "3.6"
- "pypy" - "pypy"
- "pypy3" - "pypy3"
dist: trusty
env: env:
- YTDL_TEST_SET=core - YTDL_TEST_SET=core
- YTDL_TEST_SET=download - YTDL_TEST_SET=download

View File

@ -1,3 +1,45 @@
version 2019.05.20
Core
+ [extractor/common] Move workaround for applying first Set-Cookie header
into a separate _apply_first_set_cookie_header method
Extractors
* [safari] Fix authentication (#21090)
* [vk] Use _apply_first_set_cookie_header
* [vrt] Fix extraction (#20527)
+ [canvas] Add support for vrtnieuws and sporza site ids and extract
AES HLS formats
+ [vrv] Extract captions (#19238)
* [tele5] Improve video id extraction
* [tele5] Relax URL regular expression (#21020, #21063)
* [svtplay] Update API URL (#21075)
+ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
version 2019.05.11
Core
* [utils] Transliterate "þ" as "th" (#20897)
Extractors
+ [cloudflarestream] Add support for videodelivery.net (#21049)
+ [byutv] Add support for DVR videos (#20574, #20676)
+ [gfycat] Add support for URLs with tags (#20696, #20731)
+ [openload] Add support for verystream.com (#20701, #20967)
* [youtube] Use sp field value for signature field name (#18841, #18927,
#21028)
+ [yahoo:gyao] Extend URL regular expression (#21008)
* [youtube] Fix channel id extraction (#20982, #21003)
+ [sky] Add support for news.sky.com (#13055)
+ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965)
+ [francetvinfo] Extend video id extraction (#20619, #20740)
* [4tube] Update token hosts (#20918)
* [hotstar] Move to API v2 (#20931)
* [fox] Fix API error handling under python 2 (#20925)
+ [redbulltv] Extend URL regular expression (#20922)
version 2019.04.30 version 2019.04.30
Extractors Extractors

View File

@ -45,12 +45,12 @@ for test in gettestcases():
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
test['info_dict']['age_limit'] != 18): or test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name'])) print('\nPotential missing age_limit check: {0}'.format(test['name']))
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
test['info_dict']['age_limit'] == 18): and test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name'])) print('\nPotential false negative: {0}'.format(test['name']))
else: else:

View File

@ -805,6 +805,7 @@
- **ShowRoomLive** - **ShowRoomLive**
- **Sina** - **Sina**
- **SkylineWebcams** - **SkylineWebcams**
- **SkyNews**
- **skynewsarabia:article** - **skynewsarabia:article**
- **skynewsarabia:video** - **skynewsarabia:video**
- **SkySports** - **SkySports**
@ -999,6 +1000,7 @@
- **Vbox7** - **Vbox7**
- **VeeHD** - **VeeHD**
- **Veoh** - **Veoh**
- **verystream**
- **Vessel** - **Vessel**
- **Vesti**: Вести.Ru - **Vesti**: Вести.Ru
- **Vevo** - **Vevo**
@ -1069,7 +1071,7 @@
- **VoxMediaVolume** - **VoxMediaVolume**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak** - **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
- **VrtNU**: VrtNU.be - **VrtNU**: VrtNU.be
- **vrv** - **vrv**
- **vrv:series** - **vrv:series**

View File

@ -3,4 +3,4 @@ universal = True
[flake8] [flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
ignore = E402,E501,E731,E741 ignore = E402,E501,E731,E741,W503

View File

@ -44,16 +44,16 @@ class TestAES(unittest.TestCase):
def test_decrypt_text(self): def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8') password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) + intlist_to_bytes(self.iv[:8])
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
).decode('utf-8') ).decode('utf-8')
decrypted = (aes_decrypt_text(encrypted, password, 16)) decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode('utf-8') password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) + intlist_to_bytes(self.iv[:8])
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
).decode('utf-8') ).decode('utf-8')
decrypted = (aes_decrypt_text(encrypted, password, 32)) decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)

View File

@ -34,8 +34,8 @@ def _make_testfunc(testfile):
def test_func(self): def test_func(self):
as_file = os.path.join(TEST_DIR, testfile) as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf') swf_file = os.path.join(TEST_DIR, test_id + '.swf')
if ((not os.path.exists(swf_file)) or if ((not os.path.exists(swf_file))
os.path.getmtime(swf_file) < os.path.getmtime(as_file)): or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile # Recompile
try: try:
subprocess.check_call([ subprocess.check_call([

View File

@ -73,6 +73,7 @@ from youtube_dl.utils import (
smuggle_url, smuggle_url,
str_to_int, str_to_int,
strip_jsonp, strip_jsonp,
strip_or_none,
timeconvert, timeconvert,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -183,7 +184,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename( self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
def test_sanitize_ids(self): def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
@ -752,6 +753,18 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'}) self.assertEqual(d, {'status': 'success'})
def test_strip_or_none(self):
self.assertEqual(strip_or_none(' abc'), 'abc')
self.assertEqual(strip_or_none('abc '), 'abc')
self.assertEqual(strip_or_none(' abc '), 'abc')
self.assertEqual(strip_or_none('\tabc\t'), 'abc')
self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
self.assertEqual(strip_or_none('abc'), 'abc')
self.assertEqual(strip_or_none(''), '')
self.assertEqual(strip_or_none(None), None)
self.assertEqual(strip_or_none(42), None)
self.assertEqual(strip_or_none([]), None)
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@ -400,9 +400,9 @@ class YoutubeDL(object):
else: else:
raise raise
if (sys.platform != 'win32' and if (sys.platform != 'win32'
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
not params.get('restrictfilenames', False)): and not params.get('restrictfilenames', False)):
# Unicode filesystem API will throw errors (#1474, #13027) # Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning( self.report_warning(
'Assuming --restrict-filenames since file system encoding ' 'Assuming --restrict-filenames since file system encoding '
@ -440,9 +440,9 @@ class YoutubeDL(object):
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
if idxs: if idxs:
correct_argv = ( correct_argv = (
['youtube-dl'] + ['youtube-dl']
[a for i, a in enumerate(argv) if i not in idxs] + + [a for i, a in enumerate(argv) if i not in idxs]
['--'] + [argv[i] for i in idxs] + ['--'] + [argv[i] for i in idxs]
) )
self.report_warning( self.report_warning(
'Long argument string detected. ' 'Long argument string detected. '
@ -850,8 +850,8 @@ class YoutubeDL(object):
if result_type in ('url', 'url_transparent'): if result_type in ('url', 'url_transparent'):
ie_result['url'] = sanitize_url(ie_result['url']) ie_result['url'] = sanitize_url(ie_result['url'])
extract_flat = self.params.get('extract_flat', False) extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
extract_flat is True): or extract_flat is True):
if self.params.get('forcejson', False): if self.params.get('forcejson', False):
self.to_stdout(json.dumps(ie_result)) self.to_stdout(json.dumps(ie_result))
return ie_result return ie_result
@ -1619,9 +1619,9 @@ class YoutubeDL(object):
# https://github.com/ytdl-org/youtube-dl/issues/10083). # https://github.com/ytdl-org/youtube-dl/issues/10083).
incomplete_formats = ( incomplete_formats = (
# All formats are video-only or # All formats are video-only or
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
# all formats are audio-only # all formats are audio-only
all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
ctx = { ctx = {
'formats': formats, 'formats': formats,
@ -1947,8 +1947,8 @@ class YoutubeDL(object):
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
if (info_dict.get('requested_formats') is None and if (info_dict.get('requested_formats') is None
info_dict.get('container') == 'm4a_dash'): and info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning( self.report_warning(
'%s: writing DASH m4a. ' '%s: writing DASH m4a. '
@ -1967,9 +1967,9 @@ class YoutubeDL(object):
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
if (info_dict.get('protocol') == 'm3u8_native' or if (info_dict.get('protocol') == 'm3u8_native'
info_dict.get('protocol') == 'm3u8' and or info_dict.get('protocol') == 'm3u8'
self.params.get('hls_prefer_native')): and self.params.get('hls_prefer_native')):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning('%s: malformed AAC bitstream detected.' % ( self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id'])) info_dict['id']))
@ -1995,10 +1995,10 @@ class YoutubeDL(object):
def download(self, url_list): def download(self, url_list):
"""Download a given list of URLs.""" """Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and if (len(url_list) > 1
outtmpl != '-' and and outtmpl != '-'
'%' not in outtmpl and and '%' not in outtmpl
self.params.get('max_downloads') != 1): and self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl) raise SameFileError(outtmpl)
for url in url_list: for url in url_list:
@ -2143,8 +2143,8 @@ class YoutubeDL(object):
if res: if res:
res += ', ' res += ', '
res += '%s container' % fdict['container'] res += '%s container' % fdict['container']
if (fdict.get('vcodec') is not None and if (fdict.get('vcodec') is not None
fdict.get('vcodec') != 'none'): and fdict.get('vcodec') != 'none'):
if res: if res:
res += ', ' res += ', '
res += fdict['vcodec'] res += fdict['vcodec']

View File

@ -230,14 +230,14 @@ def _real_main(argv=None):
if opts.allsubtitles and not opts.writeautomaticsub: if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True opts.writesubtitles = True
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
(opts.useid and '%(id)s.%(ext)s') or or (opts.useid and '%(id)s.%(ext)s')
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
DEFAULT_OUTTMPL) or DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio: if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same' parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'

View File

@ -2649,9 +2649,9 @@ else:
try: try:
args = shlex.split('中文') args = shlex.split('中文')
assert (isinstance(args, list) and assert (isinstance(args, list)
isinstance(args[0], compat_str) and and isinstance(args[0], compat_str)
args[0] == '中文') and args[0] == '中文')
compat_shlex_split = shlex.split compat_shlex_split = shlex.split
except (AssertionError, UnicodeEncodeError): except (AssertionError, UnicodeEncodeError):
# Working around shlex issue with unicode strings on some python 2 # Working around shlex issue with unicode strings on some python 2

View File

@ -330,15 +330,15 @@ class FileDownloader(object):
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
self.params.get('nooverwrites', False) and self.params.get('nooverwrites', False)
os.path.exists(encodeFilename(filename)) and os.path.exists(encodeFilename(filename))
) )
if not hasattr(filename, 'write'): if not hasattr(filename, 'write'):
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', True) and self.params.get('continuedl', True)
os.path.isfile(encodeFilename(filename)) and and os.path.isfile(encodeFilename(filename))
not self.params.get('nopart', False) and not self.params.get('nopart', False)
) )
# Check file already present # Check file already present

View File

@ -238,8 +238,8 @@ def write_metadata_tag(stream, metadata):
def remove_encrypted_media(media): def remove_encrypted_media(media):
return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
'drmAdditionalHeaderSetId' not in e.attrib, and 'drmAdditionalHeaderSetId' not in e.attrib,
media)) media))
@ -267,8 +267,8 @@ class F4mFD(FragmentFD):
media = doc.findall(_add_ns('media')) media = doc.findall(_add_ns('media'))
if not media: if not media:
self.report_error('No media found') self.report_error('No media found')
for e in (doc.findall(_add_ns('drmAdditionalHeader')) + for e in (doc.findall(_add_ns('drmAdditionalHeader'))
doc.findall(_add_ns('drmAdditionalHeaderSet'))): + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes # If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib: if 'id' not in e.attrib:

View File

@ -219,8 +219,8 @@ class FragmentFD(FileDownloader):
frag_total_bytes = s.get('total_bytes') or 0 frag_total_bytes = s.get('total_bytes') or 0
if not ctx['live']: if not ctx['live']:
estimated_size = ( estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
(state['fragment_index'] + 1) * total_frags) / (state['fragment_index'] + 1) * total_frags)
state['total_bytes_estimate'] = estimated_size state['total_bytes_estimate'] = estimated_size
if s['status'] == 'finished': if s['status'] == 'finished':

View File

@ -76,12 +76,12 @@ class HlsFD(FragmentFD):
return fd.real_download(filename, info_dict) return fd.real_download(filename, info_dict)
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
def is_ad_fragment_end(s): def is_ad_fragment_end(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
media_frags = 0 media_frags = 0
ad_frags = 0 ad_frags = 0

View File

@ -46,8 +46,8 @@ class HttpFD(FileDownloader):
is_test = self.params.get('test', False) is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else ( chunk_size = self._TEST_FILE_SIZE if is_test else (
info_dict.get('downloader_options', {}).get('http_chunk_size') or info_dict.get('downloader_options', {}).get('http_chunk_size')
self.params.get('http_chunk_size') or 0) or self.params.get('http_chunk_size') or 0)
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
ctx.resume_len = 0 ctx.resume_len = 0
@ -123,11 +123,11 @@ class HttpFD(FileDownloader):
content_len = int_or_none(content_range_m.group(3)) content_len = int_or_none(content_range_m.group(3))
accept_content_len = ( accept_content_len = (
# Non-chunked download # Non-chunked download
not ctx.chunk_size or not ctx.chunk_size
# Chunked download and requested piece or # Chunked download and requested piece or
# its part is promised to be served # its part is promised to be served
content_range_end == range_end or or content_range_end == range_end
content_len < range_end) or content_len < range_end)
if accept_content_len: if accept_content_len:
ctx.data_len = content_len ctx.data_len = content_len
return return
@ -152,8 +152,8 @@ class HttpFD(FileDownloader):
raise raise
else: else:
# Examine the reported length # Examine the reported length
if (content_length is not None and if (content_length is not None
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded. # The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that # Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file, # YouTube sometimes adds or removes a few bytes from the end of the file,

View File

@ -59,9 +59,9 @@ class AddAnimeIE(InfoExtractor):
parsed_url = compat_urllib_parse_urlparse(url) parsed_url = compat_urllib_parse_urlparse(url)
av_val = av_res + len(parsed_url.netloc) av_val = av_res + len(parsed_url.netloc)
confirm_url = ( confirm_url = (
parsed_url.scheme + '://' + parsed_url.netloc + parsed_url.scheme + '://' + parsed_url.netloc
action + '?' + + action + '?'
compat_urllib_parse_urlencode({ + compat_urllib_parse_urlencode({
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
self._download_webpage( self._download_webpage(
confirm_url, video_id, confirm_url, video_id,

View File

@ -65,8 +65,9 @@ class BitChuteIE(InfoExtractor):
webpage, default=None) or self._html_search_meta( webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail') 'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage, (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
'uploader', fatal=False) r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
webpage, 'uploader', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -32,8 +32,8 @@ class BlinkxIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
display_id = video_id[:8] display_id = video_id[:8]
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
'video=%s' % video_id) + 'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id) data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0] data = json.loads(data_json)['api']['results'][0]
duration = None duration = None

View File

@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_duration
class BYUtvIE(InfoExtractor): class BYUtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{ _TESTS = [{
# ooyalaVOD
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': { 'info_dict': {
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
@ -22,6 +24,20 @@ class BYUtvIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, {
# dvr
'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
'info_dict': {
'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
'ext': 'mp4',
'title': 'Pacific vs. BYU (4/12/19)',
'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
'duration': 11645,
},
'params': {
'skip_download': True
},
}, { }, {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'only_matching': True, 'only_matching': True,
@ -35,24 +51,42 @@ class BYUtvIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
ep = self._download_json( info = self._download_json(
'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, 'https://api.byutv.org/api3/catalog/getvideosforcontent',
query={ display_id, query={
'contentid': video_id, 'contentid': video_id,
'channel': 'byutv', 'channel': 'byutv',
'x-byutv-context': 'web$US', 'x-byutv-context': 'web$US',
}, headers={ }, headers={
'x-byutv-context': 'web$US', 'x-byutv-context': 'web$US',
'x-byutv-platformkey': 'xsaaw9c7y5', 'x-byutv-platformkey': 'xsaaw9c7y5',
})['ooyalaVOD'] })
ep = info.get('ooyalaVOD')
if ep:
return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % ep['providerId'],
'id': video_id,
'display_id': display_id,
'title': ep.get('title'),
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
}
ep = info['dvr']
title = ep['title']
formats = self._extract_m3u8_formats(
ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
return { return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % ep['providerId'],
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': ep.get('title'), 'title': title,
'description': ep.get('description'), 'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'), 'thumbnail': ep.get('imageThumbnail'),
'duration': parse_duration(ep.get('length')),
'formats': formats,
} }

View File

@ -17,7 +17,7 @@ from ..utils import (
class CanvasIE(InfoExtractor): class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e', 'md5': '90139b746a0a9bd7bb631283f6e2a64e',
@ -35,6 +35,10 @@ class CanvasIE(InfoExtractor):
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True, 'only_matching': True,
}] }]
_HLS_ENTRY_PROTOCOLS_MAP = {
'HLS': 'm3u8_native',
'HLS_AES': 'm3u8',
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -52,9 +56,9 @@ class CanvasIE(InfoExtractor):
format_url, format_type = target.get('url'), target.get('type') format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type: if not format_url or not format_type:
continue continue
if format_type == 'HLS': if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native', format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
m3u8_id=format_type, fatal=False)) m3u8_id=format_type, fatal=False))
elif format_type == 'HDS': elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(

View File

@ -10,8 +10,8 @@ class CloudflareStreamIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:watch\.)?cloudflarestream\.com/| (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/|
embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo= embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=
) )
(?P<id>[\da-f]+) (?P<id>[\da-f]+)
''' '''
@ -31,6 +31,9 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd', 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -38,7 +41,7 @@ class CloudflareStreamIE(InfoExtractor):
return [ return [
mobj.group('url') mobj.group('url')
for mobj in re.finditer( for mobj in re.finditer(
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -67,6 +67,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
sanitize_filename, sanitize_filename,
str_or_none, str_or_none,
strip_or_none,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
@ -117,7 +118,7 @@ class InfoExtractor(object):
unfragmented media) unfragmented media)
- URL of the MPD manifest or base URL - URL of the MPD manifest or base URL
representing the media if MPD manifest representing the media if MPD manifest
is parsed froma string (in case of is parsed from a string (in case of
fragmented media) fragmented media)
for MSS - URL of the ISM manifest. for MSS - URL of the ISM manifest.
* manifest_url * manifest_url
@ -542,11 +543,11 @@ class InfoExtractor(object):
raise ExtractorError('An extractor error has occurred.', cause=e) raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries): def __maybe_fake_ip_and_retry(self, countries):
if (not self._downloader.params.get('geo_bypass_country', None) and if (not self._downloader.params.get('geo_bypass_country', None)
self._GEO_BYPASS and and self._GEO_BYPASS
self._downloader.params.get('geo_bypass', True) and and self._downloader.params.get('geo_bypass', True)
not self._x_forwarded_for_ip and and not self._x_forwarded_for_ip
countries): and countries):
country_code = random.choice(countries) country_code = random.choice(countries)
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._x_forwarded_for_ip: if self._x_forwarded_for_ip:
@ -682,8 +683,8 @@ class InfoExtractor(object):
def __check_blocked(self, content): def __check_blocked(self, content):
first_block = content[:512] first_block = content[:512]
if ('<title>Access to this site is blocked</title>' in content and if ('<title>Access to this site is blocked</title>' in content
'Websense' in first_block): and 'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex( blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content, r'<iframe src="([^"]+)"', content,
@ -701,8 +702,8 @@ class InfoExtractor(object):
if block_msg: if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True) raise ExtractorError(msg, expected=True)
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
'blocklist.rkn.gov.ru' in content): and 'blocklist.rkn.gov.ru' in content):
raise ExtractorError( raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. ' 'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.', 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
@ -1709,8 +1710,8 @@ class InfoExtractor(object):
continue continue
else: else:
tbr = float_or_none( tbr = float_or_none(
last_stream_inf.get('AVERAGE-BANDWIDTH') or last_stream_inf.get('AVERAGE-BANDWIDTH')
last_stream_inf.get('BANDWIDTH'), scale=1000) or last_stream_inf.get('BANDWIDTH'), scale=1000)
format_id = [] format_id = []
if m3u8_id: if m3u8_id:
format_id.append(m3u8_id) format_id.append(m3u8_id)
@ -2480,7 +2481,7 @@ class InfoExtractor(object):
'subtitles': {}, 'subtitles': {},
} }
media_attributes = extract_attributes(media_tag) media_attributes = extract_attributes(media_tag)
src = media_attributes.get('src') src = strip_or_none(media_attributes.get('src'))
if src: if src:
_, formats = _media_formats(src, media_type) _, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats) media_info['formats'].extend(formats)
@ -2490,7 +2491,7 @@ class InfoExtractor(object):
s_attr = extract_attributes(source_tag) s_attr = extract_attributes(source_tag)
# data-video-src and data-src are non standard but seen # data-video-src and data-src are non standard but seen
# several times in the wild # several times in the wild
src = dict_get(s_attr, ('src', 'data-video-src', 'data-src')) src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
if not src: if not src:
continue continue
f = parse_content_type(s_attr.get('type')) f = parse_content_type(s_attr.get('type'))
@ -2504,8 +2505,8 @@ class InfoExtractor(object):
if str_or_none(s_attr.get(lbl)) if str_or_none(s_attr.get(lbl))
] ]
width = int_or_none(s_attr.get('width')) width = int_or_none(s_attr.get('width'))
height = (int_or_none(s_attr.get('height')) or height = (int_or_none(s_attr.get('height'))
int_or_none(s_attr.get('res'))) or int_or_none(s_attr.get('res')))
if not width or not height: if not width or not height:
for lbl in labels: for lbl in labels:
resolution = parse_resolution(lbl) resolution = parse_resolution(lbl)
@ -2533,7 +2534,7 @@ class InfoExtractor(object):
track_attributes = extract_attributes(track_tag) track_attributes = extract_attributes(track_tag)
kind = track_attributes.get('kind') kind = track_attributes.get('kind')
if not kind or kind in ('subtitles', 'captions'): if not kind or kind in ('subtitles', 'captions'):
src = track_attributes.get('src') src = strip_or_none(track_attributes.get('src'))
if not src: if not src:
continue continue
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label') lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
@ -2817,6 +2818,33 @@ class InfoExtractor(object):
self._downloader.cookiejar.add_cookie_header(req) self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie')) return compat_cookies.SimpleCookie(req.get_header('Cookie'))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
Apply first Set-Cookie header instead of the last. Experimental.
Some sites (e.g. [1-3]) may serve two cookies under the same name
in Set-Cookie header and expect the first (old) one to be set rather
than second (new). However, as of RFC6265 the newer one cookie
should be set into cookie store what actually happens.
We will workaround this issue by resetting the cookie to
the first one manually.
1. https://new.vk.com/
2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
3. https://learning.oreilly.com/
"""
for header, cookies in url_handle.headers.items():
if header.lower() != 'set-cookie':
continue
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
cookie_value = re.search(
r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
if cookie_value:
value, domain = cookie_value.groups()
self._set_cookie(domain, cookie, value)
break
def get_testcases(self, include_onlymatching=False): def get_testcases(self, include_onlymatching=False):
t = getattr(self, '_TEST', None) t = getattr(self, '_TEST', None)
if t: if t:
@ -2847,8 +2875,8 @@ class InfoExtractor(object):
return not any_restricted return not any_restricted
def extract_subtitles(self, *args, **kwargs): def extract_subtitles(self, *args, **kwargs):
if (self._downloader.params.get('writesubtitles', False) or if (self._downloader.params.get('writesubtitles', False)
self._downloader.params.get('listsubtitles')): or self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs) return self._get_subtitles(*args, **kwargs)
return {} return {}
@ -2873,8 +2901,8 @@ class InfoExtractor(object):
return ret return ret
def extract_automatic_captions(self, *args, **kwargs): def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or if (self._downloader.params.get('writeautomaticsub', False)
self._downloader.params.get('listsubtitles')): or self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs) return self._get_automatic_captions(*args, **kwargs)
return {} return {}
@ -2882,9 +2910,9 @@ class InfoExtractor(object):
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs): def mark_watched(self, *args, **kwargs):
if (self._downloader.params.get('mark_watched', False) and if (self._downloader.params.get('mark_watched', False)
(self._get_login_info()[0] is not None or and (self._get_login_info()[0] is not None
self._downloader.params.get('cookiefile') is not None)): or self._downloader.params.get('cookiefile') is not None)):
self._mark_watched(*args, **kwargs) self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs): def _mark_watched(self, *args, **kwargs):

View File

@ -32,19 +32,19 @@ class CommonMistakesIE(InfoExtractor):
class UnicodeBOMIE(InfoExtractor): class UnicodeBOMIE(InfoExtractor):
IE_DESC = False IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
# Disable test for python 3.2 since BOM is broken in re in this version # Disable test for python 3.2 since BOM is broken in re in this version
# (see https://github.com/ytdl-org/youtube-dl/issues/9751) # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
_TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
real_url = self._match_id(url) real_url = self._match_id(url)
self.report_warning( self.report_warning(
'Your URL starts with a Byte Order Mark (BOM). ' 'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url) 'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url) return self.url_result(real_url)

View File

@ -1,39 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CriterionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
_TEST = {
'url': 'http://www.criterion.com/films/184-le-samourai',
'md5': 'bc51beba55685509883a9a7830919ec3',
'info_dict': {
'id': '184',
'ext': 'mp4',
'title': 'Le Samouraï',
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
final_url = self._search_regex(
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage)
description = self._html_search_meta('description', webpage)
thumbnail = self._search_regex(
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url')
return {
'id': video_id,
'url': final_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
}

View File

@ -45,8 +45,8 @@ class DailyMailIE(InfoExtractor):
sources_url = (try_get( sources_url = (try_get(
video_data, video_data,
(lambda x: x['plugins']['sources']['url'], (lambda x: x['plugins']['sources']['url'],
lambda x: x['sources']['url']), compat_str) or lambda x: x['sources']['url']), compat_str)
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
video_sources = self._download_json(sources_url, video_id) video_sources = self._download_json(sources_url, video_id)
body = video_sources.get('body') body = video_sources.get('body')

View File

@ -70,8 +70,8 @@ class DctpTvIE(InfoExtractor):
endpoint = next( endpoint = next(
server['endpoint'] server['endpoint']
for server in servers for server in servers
if url_or_none(server.get('endpoint')) and if url_or_none(server.get('endpoint'))
'cloudfront' in server['endpoint']) and 'cloudfront' in server['endpoint'])
else: else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/' endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'

View File

@ -82,8 +82,8 @@ class ExpressenIE(InfoExtractor):
title = info.get('titleRaw') or data['title'] title = info.get('titleRaw') or data['title']
description = info.get('descriptionRaw') description = info.get('descriptionRaw')
thumbnail = info.get('socialMediaImage') or data.get('image') thumbnail = info.get('socialMediaImage') or data.get('image')
duration = int_or_none(info.get('videoTotalSecondsDuration') or duration = int_or_none(info.get('videoTotalSecondsDuration')
data.get('totalSecondsDuration')) or data.get('totalSecondsDuration'))
timestamp = unified_timestamp(info.get('publishDate')) timestamp = unified_timestamp(info.get('publishDate'))
return { return {

View File

@ -240,7 +240,6 @@ from .condenast import CondeNastIE
from .corus import CorusIE from .corus import CorusIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .crackle import CrackleIE from .crackle import CrackleIE
from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import ( from .crunchyroll import (
CrunchyrollIE, CrunchyrollIE,
@ -772,13 +771,6 @@ from .nova import (
NovaEmbedIE, NovaEmbedIE,
NovaIE, NovaIE,
) )
from .novamov import (
AuroraVidIE,
CloudTimeIE,
NowVideoIE,
VideoWeedIE,
WholeCloudIE,
)
from .nowness import ( from .nowness import (
NownessIE, NownessIE,
NownessPlaylistIE, NownessPlaylistIE,
@ -833,7 +825,10 @@ from .ooyala import (
OoyalaIE, OoyalaIE,
OoyalaExternalIE, OoyalaExternalIE,
) )
from .openload import OpenloadIE from .openload import (
OpenloadIE,
VerystreamIE,
)
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
@ -893,7 +888,6 @@ from .polskieradio import (
from .popcorntv import PopcornTVIE from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE from .porn91 import Porn91IE
from .porncom import PornComIE from .porncom import PornComIE
from .pornflip import PornFlipIE
from .pornhd import PornHdIE from .pornhd import PornHdIE
from .pornhub import ( from .pornhub import (
PornHubIE, PornHubIE,
@ -1033,7 +1027,10 @@ from .skynewsarabia import (
SkyNewsArabiaIE, SkyNewsArabiaIE,
SkyNewsArabiaArticleIE, SkyNewsArabiaArticleIE,
) )
from .skysports import SkySportsIE from .sky import (
SkyNewsIE,
SkySportsIE,
)
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE from .slutload import SlutloadIE

View File

@ -22,8 +22,6 @@ from ..utils import (
class FourTubeBaseIE(InfoExtractor): class FourTubeBaseIE(InfoExtractor):
_TKN_HOST = 'tkn.kodicdn.com'
def _extract_formats(self, url, video_id, media_id, sources): def _extract_formats(self, url, video_id, media_id, sources):
token_url = 'https://%s/%s/desktop/%s' % ( token_url = 'https://%s/%s/desktop/%s' % (
self._TKN_HOST, media_id, '+'.join(sources)) self._TKN_HOST, media_id, '+'.join(sources))
@ -120,6 +118,7 @@ class FourTubeIE(FourTubeBaseIE):
IE_NAME = '4tube' IE_NAME = '4tube'
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video' _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
_TKN_HOST = 'token.4tube.com'
_TESTS = [{ _TESTS = [{
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
'md5': '6516c8ac63b03de06bc8eac14362db4f', 'md5': '6516c8ac63b03de06bc8eac14362db4f',
@ -149,6 +148,7 @@ class FourTubeIE(FourTubeBaseIE):
class FuxIE(FourTubeBaseIE): class FuxIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_URL_TEMPLATE = 'https://www.fux.com/video/%s/video' _URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
_TKN_HOST = 'token.fux.com'
_TESTS = [{ _TESTS = [{
'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow', 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
'info_dict': { 'info_dict': {
@ -280,6 +280,7 @@ class PornTubeIE(FourTubeBaseIE):
class PornerBrosIE(FourTubeBaseIE): class PornerBrosIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)' _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
_URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s' _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
_TKN_HOST = 'token.pornerbros.com'
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369', 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
'md5': '6516c8ac63b03de06bc8eac14362db4f', 'md5': '6516c8ac63b03de06bc8eac14362db4f',

View File

@ -371,12 +371,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
self.url_result(dailymotion_url, DailymotionIE.ie_key()) self.url_result(dailymotion_url, DailymotionIE.ie_key())
for dailymotion_url in dailymotion_urls]) for dailymotion_url in dailymotion_urls])
video_id, catalogue = self._search_regex( video_id = self._search_regex(
(r'id-video=([^@]+@[^"]+)', (r'player\.load[^;]+src:\s*["\']([^"\']+)',
r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
webpage, 'video id').split('@') webpage, 'video id')
return self._make_url_result(video_id, catalogue) return self._make_url_result(video_id)
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor): class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):

View File

@ -94,8 +94,8 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
chapter_number = None chapter_number = None
index = lesson.get('index') index = lesson.get('index')
element_index = lesson.get('elementIndex') element_index = lesson.get('elementIndex')
if (isinstance(index, int) and isinstance(element_index, int) and if (isinstance(index, int) and isinstance(element_index, int)
index < element_index): and index < element_index):
chapter_number = element_index - index chapter_number = element_index - index
chapter = (chapters[chapter_number - 1] chapter = (chapters[chapter_number - 1]
if chapter_number - 1 < len(chapters) else None) if chapter_number - 1 < len(chapters) else None)

View File

@ -89,7 +89,10 @@ from .piksel import PikselIE
from .videa import VideaIE from .videa import VideaIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import (
OpenloadIE,
VerystreamIE,
)
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE from .limelight import LimelightBaseIE
@ -2546,11 +2549,11 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
# Look for Ooyala videos # Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None: if mobj is not None:
embed_token = self._search_regex( embed_token = self._search_regex(
r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)', r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
@ -2580,19 +2583,6 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora') return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded NovaMov-based player
mobj = re.search(
r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
(?P<url>http://(?:(?:embed|www)\.)?
(?:novamov\.com|
nowvideo\.(?:ch|sx|eu|at|ag|co)|
videoweed\.(?:es|com)|
movshare\.(?:net|sx|ag)|
divxstage\.(?:eu|net|ch|co|at|ag))
/embed\.php.+?)\1''', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
facebook_urls = FacebookIE._extract_urls(webpage) facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_urls: if facebook_urls:
@ -3017,6 +3007,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
# Look for Verystream embeds
verystream_urls = VerystreamIE._extract_urls(webpage)
if verystream_urls:
return self.playlist_from_matches(
verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
# Look for VideoPress embeds # Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage) videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls: if videopress_urls:
@ -3212,8 +3208,8 @@ class GenericIE(InfoExtractor):
else: else:
formats.append({ formats.append({
'url': src, 'url': src,
'ext': (mimetype2ext(src_type) or 'ext': (mimetype2ext(src_type)
ext if ext in KNOWN_EXTENSIONS else 'mp4'), or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
}) })
if formats: if formats:
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -11,7 +11,7 @@ from ..utils import (
class GfycatIE(InfoExtractor): class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': { 'info_dict': {
@ -47,6 +47,9 @@ class GfycatIE(InfoExtractor):
}, { }, {
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
'only_matching': True 'only_matching': True
}, {
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -155,8 +155,8 @@ class HeiseIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image')
self._og_search_thumbnail(webpage)), or self._og_search_thumbnail(webpage)),
'timestamp': parse_iso8601( 'timestamp': parse_iso8601(
self._html_search_meta('date', webpage)), self._html_search_meta('date', webpage)),
'formats': formats, 'formats': formats,

View File

@ -58,8 +58,8 @@ class HitboxIE(InfoExtractor):
title = video_meta.get('media_status') title = video_meta.get('media_status')
alt_title = video_meta.get('media_title') alt_title = video_meta.get('media_title')
description = clean_html( description = clean_html(
video_meta.get('media_description') or video_meta.get('media_description')
video_meta.get('media_description_md')) or video_meta.get('media_description_md'))
duration = float_or_none(video_meta.get('media_duration')) duration = float_or_none(video_meta.get('media_duration'))
uploader = video_meta.get('media_user_name') uploader = video_meta.get('media_user_name')
views = int_or_none(video_meta.get('media_views')) views = int_or_none(video_meta.get('media_views'))

View File

@ -47,8 +47,8 @@ class HitRecordIE(InfoExtractor):
tags = [ tags = [
t['text'] t['text']
for t in tags_list for t in tags_list
if isinstance(t, dict) and t.get('text') and if isinstance(t, dict) and t.get('text')
isinstance(t['text'], compat_str)] and isinstance(t['text'], compat_str)]
return { return {
'id': video_id, 'id': video_id,

View File

@ -77,13 +77,13 @@ class HKETVIE(InfoExtractor):
title = ( title = (
self._html_search_meta( self._html_search_meta(
('ed_title', 'search.ed_title'), webpage, default=None) or ('ed_title', 'search.ed_title'), webpage, default=None)
self._search_regex( or self._search_regex(
r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1', r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'title', default=None, group='url') or webpage, 'title', default=None, group='url')
self._html_search_regex( or self._html_search_regex(
r'<h1>([^<]+)</h1>', webpage, 'title', default=None) or r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
self._og_search_title(webpage) or self._og_search_title(webpage)
) )
file_id = self._search_regex( file_id = self._search_regex(

View File

@ -4,40 +4,59 @@ from __future__ import unicode_literals
import hashlib import hashlib
import hmac import hmac
import time import time
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
str_or_none,
try_get, try_get,
url_or_none,
) )
class HotStarBaseIE(InfoExtractor): class HotStarBaseIE(InfoExtractor):
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
def _call_api(self, path, video_id, query_name='contentId'): def _call_api_impl(self, path, video_id, query):
st = int(time.time()) st = int(time.time())
exp = st + 6000 exp = st + 6000
auth = 'st=%d~exp=%d~acl=/*' % (st, exp) auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
response = self._download_json( response = self._download_json(
'https://api.hotstar.com/' + path, 'https://api.hotstar.com/' + path, video_id, headers={
video_id, headers={
'hotstarauth': auth, 'hotstarauth': auth,
'x-country-code': 'IN', 'x-country-code': 'IN',
'x-platform-code': 'JIO', 'x-platform-code': 'JIO',
}, query={ }, query=query)
query_name: video_id,
'tas': 10000,
})
if response['statusCode'] != 'OK': if response['statusCode'] != 'OK':
raise ExtractorError( raise ExtractorError(
response['body']['message'], expected=True) response['body']['message'], expected=True)
return response['body']['results'] return response['body']['results']
def _call_api(self, path, video_id, query_name='contentId'):
return self._call_api_impl(path, video_id, {
query_name: video_id,
'tas': 10000,
})
def _call_api_v2(self, path, video_id):
return self._call_api_impl(
'%s/in/contents/%s' % (path, video_id), video_id, {
'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
'client': 'mweb',
'clientVersion': '6.18.0',
'deviceId': compat_str(uuid.uuid4()),
'osName': 'Windows',
'osVersion': '10',
})
class HotStarIE(HotStarBaseIE): class HotStarIE(HotStarBaseIE):
IE_NAME = 'hotstar' IE_NAME = 'hotstar'
@ -68,6 +87,10 @@ class HotStarIE(HotStarBaseIE):
}, { }, {
'url': 'http://www.hotstar.com/1000000515', 'url': 'http://www.hotstar.com/1000000515',
'only_matching': True, 'only_matching': True,
}, {
# only available via api v2
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
'only_matching': True,
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
@ -95,26 +118,40 @@ class HotStarIE(HotStarBaseIE):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
formats = [] formats = []
format_data = self._call_api('h/v1/play', video_id)['item'] geo_restricted = False
format_url = format_data['playbackUrl'] playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
ext = determine_ext(format_url) for playback_set in playback_sets:
if ext == 'm3u8': if not isinstance(playback_set, dict):
continue
format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url:
continue
tags = str_or_none(playback_set.get('tagsCombination')) or ''
if tags and 'encryption:plain' not in tags:
continue
ext = determine_ext(format_url)
try: try:
formats.extend(self._extract_m3u8_formats( if 'package:hls' in tags or ext == 'm3u8':
format_url, video_id, 'mp4', m3u8_id='hls')) formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls'))
elif 'package:dash' in tags or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash'))
elif ext == 'f4m':
# produce broken files
pass
else:
formats.append({
'url': format_url,
'width': int_or_none(playback_set.get('width')),
'height': int_or_none(playback_set.get('height')),
})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_geo_restricted(countries=['IN']) geo_restricted = True
raise continue
elif ext == 'f4m': if not formats and geo_restricted:
# produce broken files self.raise_geo_restricted(countries=['IN'])
pass
else:
formats.append({
'url': format_url,
'width': int_or_none(format_data.get('width')),
'height': int_or_none(format_data.get('height')),
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -60,8 +60,8 @@ class HRTiBaseIE(InfoExtractor):
language=self._APP_LANGUAGE, language=self._APP_LANGUAGE,
application_id=self._APP_PUBLICATION_ID) application_id=self._APP_PUBLICATION_ID)
self._login_url = (modules['user']['resources']['login']['uri'] + self._login_url = (modules['user']['resources']['login']['uri']
'/format/json').format(session_id=self._session_id) + '/format/json').format(session_id=self._session_id)
self._logout_url = modules['user']['resources']['logout']['uri'] self._logout_url = modules['user']['resources']['logout']['uri']

View File

@ -122,9 +122,9 @@ class InfoQIE(BokeCCBaseIE):
formats = self._extract_bokecc_formats(webpage, video_id) formats = self._extract_bokecc_formats(webpage, video_id)
else: else:
formats = ( formats = (
self._extract_rtmp_video(webpage) + self._extract_rtmp_video(webpage)
self._extract_http_video(webpage) + + self._extract_http_video(webpage)
self._extract_http_audio(webpage, video_id)) + self._extract_http_audio(webpage, video_id))
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -383,9 +383,9 @@ class IqiyiIE(InfoExtractor):
self._sleep(5, video_id) self._sleep(5, video_id)
self._sort_formats(formats) self._sort_formats(formats)
title = (get_element_by_id('widget-videotitle', webpage) or title = (get_element_by_id('widget-videotitle', webpage)
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title')) or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
return { return {
'id': video_id, 'id': video_id,

View File

@ -77,10 +77,10 @@ class ITVIE(InfoExtractor):
return etree.SubElement(element, _add_ns(name)) return etree.SubElement(element, _add_ns(name))
production_id = ( production_id = (
params.get('data-video-autoplay-id') or params.get('data-video-autoplay-id')
'%s#001' % ( or '%s#001' % (
params.get('data-video-episode-id') or params.get('data-video-episode-id')
video_id.replace('a', '/'))) or video_id.replace('a', '/')))
req_env = etree.Element(_add_ns('soapenv:Envelope')) req_env = etree.Element(_add_ns('soapenv:Envelope'))
_add_sub_element(req_env, 'soapenv:Header') _add_sub_element(req_env, 'soapenv:Header')

View File

@ -118,8 +118,8 @@ class KalturaIE(InfoExtractor):
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or """, webpage)
re.search( or re.search(
r'''(?xs) r'''(?xs)
(?P<q1>["']) (?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@ -132,8 +132,8 @@ class KalturaIE(InfoExtractor):
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
) )
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage) or ''', webpage)
re.search( or re.search(
r'''(?xs) r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["']) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)

View File

@ -47,8 +47,8 @@ class KarriereVideosIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = (self._html_search_meta('title', webpage, default=None) or title = (self._html_search_meta('title', webpage, default=None)
self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title')) or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex( video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id') r'/config/video/(.+?)\.xml', webpage, 'video id')

View File

@ -80,8 +80,8 @@ class MotherlessIE(InfoExtractor):
video_url = (self._html_search_regex( video_url = (self._html_search_regex(
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'), r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'video URL', default=None, group='url') or webpage, 'video URL', default=None, group='url')
'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex( view_count = str_to_int(self._html_search_regex(
r'<strong>Views</strong>\s+([^<]+)<', r'<strong>Views</strong>\s+([^<]+)<',

View File

@ -84,8 +84,8 @@ class NDTVIE(InfoExtractor):
# '__title' does not contain extra words such as sub-site name, "Video" etc. # '__title' does not contain extra words such as sub-site name, "Video" etc.
title = compat_urllib_parse_unquote_plus( title = compat_urllib_parse_unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
self._og_search_title(webpage)) or self._og_search_title(webpage))
filename = self._search_regex( filename = self._search_regex(
r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename') r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')

View File

@ -180,8 +180,8 @@ class AppleDailyIE(NextMediaIE):
_URL_PATTERN = r'\{url: \'(.+)\'\}' _URL_PATTERN = r'\{url: \'(.+)\'\}'
def _fetch_title(self, page): def _fetch_title(self, page):
return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
self._html_search_meta('description', page, 'news title')) or self._html_search_meta('description', page, 'news title'))
def _fetch_thumbnail(self, page): def _fetch_thumbnail(self, page):
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False) return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)

View File

@ -369,14 +369,14 @@ class NiconicoIE(InfoExtractor):
video_detail = watch_api_data.get('videoDetail', {}) video_detail = watch_api_data.get('videoDetail', {})
thumbnail = ( thumbnail = (
get_video_info(['thumbnail_url', 'thumbnailURL']) or get_video_info(['thumbnail_url', 'thumbnailURL'])
self._html_search_meta('image', webpage, 'thumbnail', default=None) or or self._html_search_meta('image', webpage, 'thumbnail', default=None)
video_detail.get('thumbnail')) or video_detail.get('thumbnail'))
description = get_video_info('description') description = get_video_info('description')
timestamp = (parse_iso8601(get_video_info('first_retrieve')) or timestamp = (parse_iso8601(get_video_info('first_retrieve'))
unified_timestamp(get_video_info('postedDateTime'))) or unified_timestamp(get_video_info('postedDateTime')))
if not timestamp: if not timestamp:
match = self._html_search_meta('datePublished', webpage, 'date published', default=None) match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
if match: if match:
@ -395,9 +395,9 @@ class NiconicoIE(InfoExtractor):
view_count = int_or_none(match.replace(',', '')) view_count = int_or_none(match.replace(',', ''))
view_count = view_count or video_detail.get('viewCount') view_count = view_count or video_detail.get('viewCount')
comment_count = (int_or_none(get_video_info('comment_num')) or comment_count = (int_or_none(get_video_info('comment_num'))
video_detail.get('commentCount') or or video_detail.get('commentCount')
try_get(api_data, lambda x: x['thread']['commentCount'])) or try_get(api_data, lambda x: x['thread']['commentCount']))
if not comment_count: if not comment_count:
match = self._html_search_regex( match = self._html_search_regex(
r'>Comments: <strong[^>]*>([^<]+)</strong>', r'>Comments: <strong[^>]*>([^<]+)</strong>',
@ -406,11 +406,11 @@ class NiconicoIE(InfoExtractor):
comment_count = int_or_none(match.replace(',', '')) comment_count = int_or_none(match.replace(',', ''))
duration = (parse_duration( duration = (parse_duration(
get_video_info('length') or get_video_info('length')
self._html_search_meta( or self._html_search_meta(
'video:duration', webpage, 'video duration', default=None)) or 'video:duration', webpage, 'video duration', default=None))
video_detail.get('length') or or video_detail.get('length')
get_video_info('duration')) or get_video_info('duration'))
webpage_url = get_video_info('watch_url') or url webpage_url = get_video_info('watch_url') or url

View File

@ -1,212 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
NO_DEFAULT,
sanitized_Request,
urlencode_postdata,
)
class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov'
IE_DESC = 'NovaMov'
_VALID_URL_TEMPLATE = r'''(?x)
http://
(?:
(?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
(?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
)
(?P<id>[a-z\d]{13})
'''
_VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'}
_HOST = 'www.novamov.com'
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
_FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
_URL_TEMPLATE = 'http://%s/video/%s'
_TEST = None
def _check_existence(self, webpage, video_id):
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
url = self._URL_TEMPLATE % (self._HOST, video_id)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
self._check_existence(webpage, video_id)
def extract_filekey(default=NO_DEFAULT):
filekey = self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
return self._search_regex(
r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
else:
return filekey
filekey = extract_filekey(default=None)
if not filekey:
fields = self._hidden_inputs(webpage)
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
'post url', default=url, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(fields))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
self._check_existence(webpage, video_id)
filekey = extract_filekey()
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
api_response = self._download_webpage(
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
'Downloading video api response')
response = compat_urlparse.parse_qs(api_response)
if 'error_msg' in response:
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
video_url = response['url'][0]
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description
}
class WholeCloudIE(NovaMovIE):
IE_NAME = 'wholecloud'
IE_DESC = 'WholeCloud'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
_HOST = 'www.wholecloud.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.wholecloud.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}
class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
'md5': '12c82cad4f2084881d8bc60ee29df092',
'info_dict': {
'id': 'f1d6fce9a968b',
'ext': 'flv',
'title': 'youtubedl test video BaWjenozKc',
'description': 'Description',
},
}
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_URL_TEMPLATE = 'http://%s/file/%s'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}
class CloudTimeIE(NovaMovIE):
IE_NAME = 'cloudtime'
IE_DESC = 'CloudTime'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'}
_HOST = 'www.cloudtime.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
_TEST = None
class AuroraVidIE(NovaMovIE):
IE_NAME = 'auroravid'
IE_DESC = 'AuroraVid'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'}
_HOST = 'www.auroravid.to'
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
_TESTS = [{
'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
'md5': '7205f346a52bbeba427603ba10d4b935',
'info_dict': {
'id': '4rurhn9x446jj',
'ext': 'flv',
'title': 'search engine optimization',
'description': 'search engine optimization is used to rank the web page in the google search engine'
},
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
}, {
'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
'only_matching': True,
}]

View File

@ -45,8 +45,8 @@ class NRKBaseIE(InfoExtractor):
entries = [] entries = []
conviva = data.get('convivaStatistics') or {} conviva = data.get('convivaStatistics') or {}
live = (data.get('mediaElementType') == 'Live' or live = (data.get('mediaElementType') == 'Live'
data.get('isLive') is True or conviva.get('isLive')) or data.get('isLive') is True or conviva.get('isLive'))
def make_title(t): def make_title(t):
return self._live_title(t) if live else t return self._live_title(t) if live else t

View File

@ -31,8 +31,8 @@ class OoyalaBaseIE(InfoExtractor):
title = metadata['title'] title = metadata['title']
auth_data = self._download_json( auth_data = self._download_json(
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code)
compat_urllib_parse_urlencode({ + compat_urllib_parse_urlencode({
'domain': domain, 'domain': domain,
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
'embedToken': embed_token, 'embedToken': embed_token,

View File

@ -43,9 +43,9 @@ def cookie_to_dict(cookie):
if cookie.discard is not None: if cookie.discard is not None:
cookie_dict['discard'] = cookie.discard cookie_dict['discard'] = cookie.discard
try: try:
if (cookie.has_nonstandard_attr('httpOnly') or if (cookie.has_nonstandard_attr('httpOnly')
cookie.has_nonstandard_attr('httponly') or or cookie.has_nonstandard_attr('httponly')
cookie.has_nonstandard_attr('HttpOnly')): or cookie.has_nonstandard_attr('HttpOnly')):
cookie_dict['httponly'] = True cookie_dict['httponly'] = True
except TypeError: except TypeError:
pass pass
@ -244,7 +244,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)' _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?P<host> (?P<host>
@ -254,7 +254,10 @@ class OpenloadIE(InfoExtractor):
(?:f|embed)/ (?:f|embed)/
(?P<id>[a-zA-Z0-9-_]+) (?P<id>[a-zA-Z0-9-_]+)
''' % _DOMAINS ''' % _DOMAINS
_EMBED_WORD = 'embed'
_STREAM_WORD = 'f'
_REDIR_WORD = 'stream'
_URL_IDS = ('streamurl', 'streamuri', 'streamurj')
_TESTS = [{ _TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o', 'url': 'https://openload.co/f/kUEfGclsU9o',
'md5': 'bf1c059b004ebc7a256f89408e65c36e', 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
@ -354,6 +357,9 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.services/embed/bs1NWj1dCag/', 'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://oload.press/embed/drTBl1aOTvk/',
'only_matching': True,
}, { }, {
'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
'only_matching': True, 'only_matching': True,
@ -1948,11 +1954,16 @@ class OpenloadIE(InfoExtractor):
'69.0.3497.28', '69.0.3497.28',
) )
@staticmethod @classmethod
def _extract_urls(webpage): def _extract_urls(cls, webpage):
return re.findall( return re.findall(
r'<iframe[^>]+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)' r'<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
% OpenloadIE._DOMAINS, webpage) % (cls._DOMAINS, cls._EMBED_WORD), webpage)
def _extract_decrypted_page(self, page_url, webpage, video_id, headers):
phantom = PhantomJSwrapper(self, required_version='2.0')
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
return webpage
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -1964,9 +1975,9 @@ class OpenloadIE(InfoExtractor):
'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS),
} }
for path in ('embed', 'f'): for path in (self._EMBED_WORD, self._STREAM_WORD):
page_url = url_pattern % path page_url = url_pattern % path
last = path == 'f' last = path == self._STREAM_WORD
webpage = self._download_webpage( webpage = self._download_webpage(
page_url, video_id, 'Downloading %s webpage' % path, page_url, video_id, 'Downloading %s webpage' % path,
headers=headers, fatal=last) headers=headers, fatal=last)
@ -1978,21 +1989,20 @@ class OpenloadIE(InfoExtractor):
raise ExtractorError('File not found', expected=True, video_id=video_id) raise ExtractorError('File not found', expected=True, video_id=video_id)
break break
phantom = PhantomJSwrapper(self, required_version='2.0') webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers)
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) for element_id in self._URL_IDS:
decoded_id = get_element_by_id(element_id, webpage)
decoded_id = (get_element_by_id('streamurl', webpage) or if decoded_id:
get_element_by_id('streamuri', webpage) or break
get_element_by_id('streamurj', webpage) or if not decoded_id:
self._search_regex( decoded_id = self._search_regex(
(r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
'stream URL')) 'stream URL')
video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
title = self._og_search_title(webpage, default=None) or self._search_regex( title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@ -2012,3 +2022,38 @@ class OpenloadIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': headers, 'http_headers': headers,
} }
class VerystreamIE(OpenloadIE):
IE_NAME = 'verystream'
_DOMAINS = r'(?:verystream\.com)'
_VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
%s
)/
(?:stream|e)/
(?P<id>[a-zA-Z0-9-_]+)
''' % _DOMAINS
_EMBED_WORD = 'e'
_STREAM_WORD = 'stream'
_REDIR_WORD = 'gettoken'
_URL_IDS = ('videolink', )
_TESTS = [{
'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
'md5': 'd3e8c5628ccb9970b65fd65269886795',
'info_dict': {
'id': 'c1GWQ9ngBBx',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
'only_matching': True,
}]
def _extract_decrypted_page(self, page_url, webpage, video_id, headers):
return webpage # for Verystream, the webpage is already decrypted

View File

@ -50,8 +50,8 @@ class PodomaticIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
channel = mobj.group('channel') or mobj.group('channel_2') channel = mobj.group('channel') or mobj.group('channel_2')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + json_url = (('%s://%s.podomatic.com/entry/embed_params/%s'
'?permalink=true&rtmp=0') % + '?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id)) (mobj.group('proto'), channel, video_id))
data_json = self._download_webpage( data_json = self._download_webpage(
json_url, video_id, 'Downloading video info') json_url, video_id, 'Downloading video info')

View File

@ -1,101 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
)
from ..utils import (
int_or_none,
try_get,
unified_timestamp,
)
class PornFlipIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
'info_dict': {
'id': 'wz7DfNhMmep',
'ext': 'mp4',
'title': '2 Amateurs swallow make his dream cumshots true',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 112,
'timestamp': 1481655502,
'upload_date': '20161213',
'uploader_id': '106786',
'uploader': 'figifoto',
'view_count': int,
'age_limit': 18,
}
}, {
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.pornflip.com/v/%s' % video_id, video_id)
flashvars = compat_parse_qs(self._search_regex(
r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
webpage, 'flashvars', group='flashvars'))
title = flashvars['video_vars[title]'][0]
def flashvar(kind):
return try_get(
flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
formats = []
for key, value in flashvars.items():
if not (value and isinstance(value, list)):
continue
format_url = value[0]
if key == 'video_vars[hds_manifest]':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
continue
height = self._search_regex(
r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
if not height:
continue
formats.append({
'url': format_url,
'format_id': 'http-%s' % height,
'height': int_or_none(height),
})
self._sort_formats(formats)
uploader = self._html_search_regex(
(r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
webpage, 'uploader', fatal=False, group='uploader')
return {
'id': video_id,
'formats': formats,
'title': title,
'thumbnail': flashvar('big_thumb'),
'duration': int_or_none(flashvar('duration')),
'timestamp': unified_timestamp(self._html_search_meta(
'uploadDate', webpage, 'timestamp')),
'uploader_id': flashvar('author_id'),
'uploader': uploader,
'view_count': int_or_none(flashvar('views')),
'age_limit': 18,
}

View File

@ -170,7 +170,7 @@ class PornHubIE(PornHubBaseIE):
def dl_webpage(platform): def dl_webpage(platform):
self._set_cookie(host, 'platform', platform) self._set_cookie(host, 'platform', platform)
return self._download_webpage( return self._download_webpage(
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id), 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
video_id, 'Downloading %s webpage' % platform) video_id, 'Downloading %s webpage' % platform)
webpage = dl_webpage('pc') webpage = dl_webpage('pc')

View File

@ -91,8 +91,8 @@ class RuutuIE(InfoExtractor):
extract_formats(child) extract_formats(child)
elif child.tag.endswith('File'): elif child.tag.endswith('File'):
video_url = child.text video_url = child.text
if (not video_url or video_url in processed_urls or if (not video_url or video_url in processed_urls
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
continue continue
processed_urls.append(video_url) processed_urls.append(video_url)
ext = determine_ext(video_url) ext = determine_ext(video_url)

View File

@ -1,15 +1,18 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
std_headers,
urlencode_postdata,
update_url_query, update_url_query,
) )
@ -31,44 +34,52 @@ class SafariBaseIE(InfoExtractor):
if username is None: if username is None:
return return
headers = std_headers.copy() _, urlh = self._download_webpage_handle(
if 'Referer' not in headers: 'https://learning.oreilly.com/accounts/login-check/', None,
headers['Referer'] = self._LOGIN_URL 'Downloading login page')
login_page = self._download_webpage( def is_logged(urlh):
self._LOGIN_URL, None, 'Downloading login form', headers=headers) return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
def is_logged(webpage): if is_logged(urlh):
return any(re.search(p, webpage) for p in (
r'href=["\']/accounts/logout/', r'>Sign Out<'))
if is_logged(login_page):
self.LOGGED_IN = True self.LOGGED_IN = True
return return
csrf = self._html_search_regex( redirect_url = compat_str(urlh.geturl())
r"name='csrfmiddlewaretoken'\s+value='([^']+)'", parsed_url = compat_urlparse.urlparse(redirect_url)
login_page, 'csrf token') qs = compat_parse_qs(parsed_url.query)
next_uri = compat_urlparse.urljoin(
'https://api.oreilly.com', qs['next'][0])
login_form = { auth, urlh = self._download_json_handle(
'csrfmiddlewaretoken': csrf, 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
'email': username, data=json.dumps({
'password1': password, 'email': username,
'login': 'Sign In', 'password': password,
'next': '', 'redirect_uri': next_uri,
} }).encode(), headers={
'Content-Type': 'application/json',
'Referer': redirect_url,
}, expected_status=400)
request = sanitized_Request( credentials = auth.get('credentials')
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) if (not auth.get('logged_in') and not auth.get('redirect_uri')
login_page = self._download_webpage( and credentials):
request, None, 'Logging in')
if not is_logged(login_page):
raise ExtractorError( raise ExtractorError(
'Login failed; make sure your credentials are correct and try again.', 'Unable to login: %s' % credentials, expected=True)
expected=True)
self.LOGGED_IN = True # oreilly serves two same groot_sessionid cookies in Set-Cookie header
# and expects first one to be actually set
self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
_, urlh = self._download_webpage_handle(
auth.get('redirect_uri') or next_uri, None, 'Completing login',)
if is_logged(urlh):
self.LOGGED_IN = True
return
raise ExtractorError('Unable to log in')
class SafariIE(SafariBaseIE): class SafariIE(SafariBaseIE):
@ -76,7 +87,7 @@ class SafariIE(SafariBaseIE):
IE_DESC = 'safaribooksonline.com online video' IE_DESC = 'safaribooksonline.com online video'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?: (?:
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
@ -107,6 +118,9 @@ class SafariIE(SafariBaseIE):
}, { }, {
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
'only_matching': True,
}] }]
_PARTNER_ID = '1926081' _PARTNER_ID = '1926081'
@ -163,7 +177,7 @@ class SafariIE(SafariBaseIE):
class SafariApiIE(SafariBaseIE): class SafariApiIE(SafariBaseIE):
IE_NAME = 'safari:api' IE_NAME = 'safari:api'
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
@ -188,7 +202,7 @@ class SafariCourseIE(SafariBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?: (?:
library/view/[^/]+| library/view/[^/]+|
api/v1/book| api/v1/book|
@ -219,6 +233,9 @@ class SafariCourseIE(SafariBaseIE):
}, { }, {
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
'only_matching': True,
}] }]
@classmethod @classmethod

View File

@ -55,8 +55,8 @@ class SBSIE(InfoExtractor):
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
urls = player_params['releaseUrls'] urls = player_params['releaseUrls']
theplatform_url = (urls.get('progressive') or urls.get('html') or theplatform_url = (urls.get('progressive') or urls.get('html')
urls.get('standard') or player_params['relatedItemsURL']) or urls.get('standard') or player_params['relatedItemsURL'])
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View File

@ -3,8 +3,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
KNOWN_EXTENSIONS,
parse_filesize,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -22,10 +25,8 @@ class SharedBaseIE(InfoExtractor):
video_url = self._extract_video_url(webpage, video_id, url) video_url = self._extract_video_url(webpage, video_id, url)
title = compat_b64decode(self._html_search_meta( title = self._extract_title(webpage)
'full:title', webpage, 'title')).decode('utf-8') filesize = int_or_none(self._extract_filesize(webpage))
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
return { return {
'id': video_id, 'id': video_id,
@ -35,6 +36,14 @@ class SharedBaseIE(InfoExtractor):
'title': title, 'title': title,
} }
def _extract_title(self, webpage):
return compat_b64decode(self._html_search_meta(
'full:title', webpage, 'title')).decode('utf-8')
def _extract_filesize(self, webpage):
return self._html_search_meta(
'full:size', webpage, 'file size', fatal=False)
class SharedIE(SharedBaseIE): class SharedIE(SharedBaseIE):
IE_DESC = 'shared.sx' IE_DESC = 'shared.sx'
@ -82,11 +91,27 @@ class VivoIE(SharedBaseIE):
'id': 'd7ddda0e78', 'id': 'd7ddda0e78',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Chicken', 'title': 'Chicken',
'filesize': 528031, 'filesize': 515659,
}, },
} }
def _extract_video_url(self, webpage, video_id, *args): def _extract_title(self, webpage):
title = self._html_search_regex(
r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
'title', default=None, group='title')
if title:
ext = determine_ext(title)
if ext.lower() in KNOWN_EXTENSIONS:
title = title.rpartition('.' + ext)[0]
return title
return self._og_search_title(webpage)
def _extract_filesize(self, webpage):
return parse_filesize(self._search_regex(
r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
webpage, 'filesize', fatal=False))
def _extract_video_url(self, webpage, video_id, url):
def decode_url(encoded_url): def decode_url(encoded_url):
return compat_b64decode(encoded_url).decode('utf-8') return compat_b64decode(encoded_url).decode('utf-8')

View File

@ -10,34 +10,25 @@ from ..utils import (
) )
class SkySportsIE(InfoExtractor): class SkyBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
'info_dict': {
'id': '10328419',
'ext': 'mp4',
'title': 'Bale: It\'s our time to shine',
'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex( video_data = extract_attributes(self._search_regex(
r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data')) r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
webpage, 'video data'))
video_url = 'ooyala:%s' % video_data['data-video-id'] video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true': if video_data.get('data-token-required') == 'true':
token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {} token_fetch_options = self._parse_json(video_data.get(
'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url') token_fetch_url = token_fetch_options.get('url')
if token_fetch_url: if token_fetch_url:
embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False) embed_token = self._download_webpage(urljoin(
url, token_fetch_url), video_id, fatal=False)
if embed_token: if embed_token:
video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')}) video_url = smuggle_url(
video_url, {'embed_token': embed_token.strip('"')})
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -47,3 +38,33 @@ class SkySportsIE(InfoExtractor):
'description': strip_or_none(self._og_search_description(webpage)), 'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala', 'ie_key': 'Ooyala',
} }
class SkySportsIE(SkyBaseIE):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
'info_dict': {
'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ',
'ext': 'mp4',
'title': 'Bale: It\'s our time to shine',
'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
},
'add_ie': ['Ooyala'],
}
class SkyNewsIE(SkyBaseIE):
_VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
_TEST = {
'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
'md5': 'd6327e581473cea9976a3236ded370cd',
'info_dict': {
'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
'ext': 'mp4',
'title': 'Russian plane inspected after deadly fire',
'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
},
'add_ie': ['Ooyala'],
}

View File

@ -106,7 +106,16 @@ class SRGSSRIE(InfoExtractor):
class SRGSSRPlayIE(InfoExtractor): class SRGSSRPlayIE(InfoExtractor):
IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
_VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+\?id=(?P<id>[0-9a-f\-]{36}|\d+)' _VALID_URL = r'''(?x)
https?://
(?:(?:www|play)\.)?
(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
(?:
[^/]+/(?P<type>video|audio)/[^?]+|
popup(?P<type_2>video|audio)player
)
\?id=(?P<id>[0-9a-f\-]{36}|\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
@ -163,9 +172,15 @@ class SRGSSRPlayIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
bu, media_type, media_id = re.match(self._VALID_URL, url).groups() mobj = re.match(self._VALID_URL, url)
bu = mobj.group('bu')
media_type = mobj.group('type') or mobj.group('type_2')
media_id = mobj.group('id')
# other info can be extracted from url + '&layout=json' # other info can be extracted from url + '&layout=json'
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')

View File

@ -45,7 +45,7 @@ class StreamcloudIE(InfoExtractor):
value="([^"]*)" value="([^"]*)"
''', orig_webpage) ''', orig_webpage)
self._sleep(12, video_id) self._sleep(6, video_id)
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, data=urlencode_postdata(fields), headers={ url, video_id, data=urlencode_postdata(fields), headers={

View File

@ -185,7 +185,7 @@ class SVTPlayIE(SVTPlayBaseIE):
def _extract_by_video_id(self, video_id, webpage=None): def _extract_by_video_id(self, video_id, webpage=None):
data = self._download_json( data = self._download_json(
'https://api.svt.se/video/%s' % video_id, 'https://api.svt.se/videoplayer-api/video/%s' % video_id,
video_id, headers=self.geo_verification_headers()) video_id, headers=self.geo_verification_headers())
info_dict = self._extract_video(data, video_id) info_dict = self._extract_video(data, video_id)
if not info_dict.get('title'): if not info_dict.get('title'):

View File

@ -7,7 +7,7 @@ from ..compat import compat_urlparse
class Tele5IE(InfoExtractor): class Tele5IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)' _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
'info_dict': { 'info_dict': {
@ -21,10 +21,22 @@ class Tele5IE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.tele5.de/tv/dark-matter/videos', 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
'only_matching': True,
}, {
'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
'only_matching': True,
}, {
'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
'only_matching': True,
}, {
'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
'only_matching': True,
}, {
'url': 'https://www.tele5.de/anders-ist-sevda/',
'only_matching': True, 'only_matching': True,
}] }]
@ -36,8 +48,9 @@ class Tele5IE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)', (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
webpage, 'video id') r'\s+id\s*=\s*["\']player_(\d{6,})',
r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id')
return self.url_result( return self.url_result(
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,

View File

@ -207,7 +207,7 @@ class TVNowNewBaseIE(InfoExtractor):
return result return result
""" r"""
TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
when api.tvnow.de is shut down. This version can't bypass premium checks though. when api.tvnow.de is shut down. This version can't bypass premium checks though.
class TVNowIE(TVNowNewBaseIE): class TVNowIE(TVNowNewBaseIE):

View File

@ -14,7 +14,18 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor): class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video' IE_NAME = '24video'
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?P<host>
(?:(?:www|porno)\.)?24video\.
(?:net|me|xxx|sexy?|tube|adult|site)
)/
(?:
video/(?:(?:view|xml)/)?|
player/new24_play\.swf\?id=
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.24video.net/video/view/1044982', 'url': 'http://www.24video.net/video/view/1044982',
@ -42,6 +53,12 @@ class TwentyFourVideoIE(InfoExtractor):
}, { }, {
'url': 'http://www.24video.tube/video/view/2363750', 'url': 'http://www.24video.tube/video/view/2363750',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.24video.site/video/view/2640421',
'only_matching': True,
}, {
'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -275,8 +275,8 @@ class VevoIE(VevoBaseIE):
genres = video_info.get('genres') genres = video_info.get('genres')
genre = ( genre = (
genres[0] if genres and isinstance(genres, list) and genres[0] if genres and isinstance(genres, list)
isinstance(genres[0], compat_str) else None) and isinstance(genres[0], compat_str) else None)
is_explicit = video_info.get('isExplicit') is_explicit = video_info.get('isExplicit')
if is_explicit is True: if is_explicit is True:

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import collections import collections
import re import re
import sys
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor):
'pass': password.encode('cp1251'), 'pass': password.encode('cp1251'),
}) })
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header # vk serves two same remixlhk cookies in Set-Cookie header and expects
# and expects the first one to be set rather than second (see # first one to be actually set
# https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201). self._apply_first_set_cookie_header(url_handle, 'remixlhk')
# As of RFC6265 the newer one cookie should be set into cookie store
# what actually happens.
# We will workaround this VK issue by resetting the remixlhk cookie to
# the first one manually.
for header, cookies in url_handle.headers.items():
if header.lower() != 'set-cookie':
continue
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
if remixlhk:
value, domain = remixlhk.groups()
self._set_cookie(domain, 'remixlhk', value)
break
login_page = self._download_webpage( login_page = self._download_webpage(
'https://login.vk.com/?act=login', None, 'https://login.vk.com/?act=login', None,
@ -443,8 +427,8 @@ class VKIE(VKBaseIE):
format_url = url_or_none(format_url) format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')): if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
continue continue
if (format_id.startswith(('url', 'cache')) or if (format_id.startswith(('url', 'cache'))
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')): or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
formats.append({ formats.append({

View File

@ -5,150 +5,83 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
extract_attributes,
float_or_none, float_or_none,
get_element_by_class,
strip_or_none,
unified_timestamp,
) )
class VRTIE(InfoExtractor): class VRTIE(InfoExtractor):
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be' IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*' _VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [ _TESTS = [{
# deredactie.be 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
{ 'md5': 'e1663accf5cf13f375f3cd0d10476669',
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL', 'info_dict': {
'md5': '4cebde1eb60a53782d4f3992cbd46ec8', 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
'info_dict': { 'ext': 'mp4',
'id': '2129880', 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
'ext': 'flv', 'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
'title': 'Het journaal L - 25/10/14', 'timestamp': 1557924660,
'description': None, 'upload_date': '20190515',
'timestamp': 1414271750.949, 'duration': 31.2,
'upload_date': '20141025',
'duration': 929,
},
'skip': 'HTTP Error 404: Not Found',
}, },
# sporza.be }, {
{ 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time', 'md5': '910bba927566e9ab992278f647eb4b75',
'md5': '11f53088da9bf8e7cfc42456697953ff', 'info_dict': {
'info_dict': { 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
'id': '2124639', 'ext': 'mp4',
'ext': 'flv', 'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
'title': 'Bekijk Extra Time van 20 oktober', 'timestamp': 1557923760,
'description': 'md5:83ac5415a4f1816c6a93f8138aef2426', 'upload_date': '20190515',
'timestamp': 1413835980.560, 'duration': 115.17,
'upload_date': '20141020',
'duration': 3238,
},
'skip': 'HTTP Error 404: Not Found',
}, },
# cobra.be }, {
{ 'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari', 'only_matching': True,
'md5': '78a2b060a5083c4f055449a72477409d', }, {
'info_dict': { 'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
'id': '2126050', 'only_matching': True,
'ext': 'flv', }]
'title': 'Bret Easton Ellis in Café Corsari', _CLIENT_MAP = {
'description': 'md5:f699986e823f32fd6036c1855a724ee9', 'vrt.be/vrtnws': 'vrtnieuws',
'timestamp': 1413967500.494, 'sporza.be': 'sporza',
'upload_date': '20141022', }
'duration': 661,
},
'skip': 'HTTP Error 404: Not Found',
},
{
# YouTube video
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
'md5': 'b8b93da1df1cea6c8556255a796b7d61',
'info_dict': {
'id': 'Wji-BZ0oCwg',
'ext': 'mp4',
'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
'description': 'md5:8e468944dce15567a786a67f74262583',
'uploader': 'Star Wars',
'uploader_id': 'starwars',
'upload_date': '20160407',
},
'add_ie': ['Youtube'],
},
{
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
'info_dict': {
'id': '2377055',
'ext': 'mp4',
'title': 'Cafe Derby',
'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
'upload_date': '20150626',
'timestamp': 1435305240.769,
},
'params': {
# m3u8 download
'skip_download': True,
}
}
]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
attrs = extract_attributes(self._search_regex(
r'(<[^>]+class="vrtvideo"[^>]*>)', webpage, 'vrt video'))
webpage = self._download_webpage(url, video_id) asset_id = attrs['data-videoid']
publication_id = attrs.get('data-publicationid')
if publication_id:
asset_id = publication_id + '$' + asset_id
client = attrs.get('data-client') or self._CLIENT_MAP[site]
video_id = self._search_regex( title = strip_or_none(get_element_by_class(
r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False) 'vrt-title', webpage) or self._html_search_meta(
['og:title', 'twitter:title', 'name'], webpage))
src = self._search_regex( description = self._html_search_meta(
r'data-video-src="([^"]+)"', webpage, 'video src', default=None) ['og:description', 'twitter:description', 'description'], webpage)
if description == '':
video_type = self._search_regex( description = None
r'data-video-type="([^"]+)"', webpage, 'video type', default=None) timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage))
if video_type == 'YouTubeVideo':
return self.url_result(src, 'Youtube')
formats = []
mobj = re.search(
r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
webpage)
if mobj:
formats.extend(self._extract_m3u8_formats(
'%s/%s' % (mobj.group('server'), mobj.group('path')),
video_id, 'mp4', m3u8_id='hls', fatal=False))
if src:
formats = self._extract_wowza_formats(src, video_id)
if 'data-video-geoblocking="true"' not in webpage:
for f in formats:
if f['url'].startswith('rtsp://'):
http_format = f.copy()
http_format.update({
'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': f['format_id'].replace('rtsp', 'http'),
'protocol': 'http',
})
formats.append(http_format)
if not formats and 'data-video-geoblocking="true"' in webpage:
self.raise_geo_restricted('This video is only available in Belgium')
self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(webpage)
timestamp = float_or_none(self._search_regex(
r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000)
duration = float_or_none(self._search_regex(
r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000)
return { return {
'id': video_id, '_type': 'url_transparent',
'id': asset_id,
'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': attrs.get('data-posterimage'),
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': float_or_none(attrs.get('data-duration'), 1000),
'formats': formats, 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id),
'ie_key': 'Canvas',
} }

View File

@ -198,14 +198,15 @@ class VRVIE(VRVBaseIE):
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}
for subtitle in streams_json.get('subtitles', {}).values(): for k in ('captions', 'subtitles'):
subtitle_url = subtitle.get('url') for subtitle in streams_json.get(k, {}).values():
if not subtitle_url: subtitle_url = subtitle.get('url')
continue if not subtitle_url:
subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({ continue
'url': subtitle_url, subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
'ext': subtitle.get('format', 'ass'), 'url': subtitle_url,
}) 'ext': subtitle.get('format', 'ass'),
})
thumbnails = [] thumbnails = []
for thumbnail in video_data.get('images', {}).get('thumbnails', []): for thumbnail in video_data.get('images', {}).get('thumbnails', []):

View File

@ -511,6 +511,8 @@ class YahooGyaOPlayerIE(InfoExtractor):
'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id, 'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id,
video_id, query={ video_id, query={
'fields': 'longDescription,title,videoId', 'fields': 'longDescription,title,videoId',
}, headers={
'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web',
}) })
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -526,7 +528,7 @@ class YahooGyaOPlayerIE(InfoExtractor):
class YahooGyaOIE(InfoExtractor): class YahooGyaOIE(InfoExtractor):
IE_NAME = 'yahoo:gyao' IE_NAME = 'yahoo:gyao'
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/p|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+)' _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title/[^/]+)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{ _TESTS = [{
'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/', 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
'info_dict': { 'info_dict': {
@ -536,6 +538,9 @@ class YahooGyaOIE(InfoExtractor):
}, { }, {
'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/', 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -70,9 +70,9 @@ class YandexVideoIE(InfoExtractor):
description = content.get('description') description = content.get('description')
thumbnail = content.get('thumbnail') thumbnail = content.get('thumbnail')
timestamp = (int_or_none(content.get('release_date')) or timestamp = (int_or_none(content.get('release_date'))
int_or_none(content.get('release_date_ut')) or or int_or_none(content.get('release_date_ut'))
int_or_none(content.get('start_time'))) or int_or_none(content.get('start_time')))
duration = int_or_none(content.get('duration')) duration = int_or_none(content.get('duration'))
series = content.get('program_title') series = content.get('program_title')
age_limit = int_or_none(content.get('restriction_age')) age_limit = int_or_none(content.get('restriction_age'))

View File

@ -258,8 +258,8 @@ class YoukuShowIE(InfoExtractor):
transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html') transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
if playlist_data is None: if playlist_data is None:
return [None, None] return [None, None]
drama_list = (get_element_by_class('p-drama-grid', playlist_data) or drama_list = (get_element_by_class('p-drama-grid', playlist_data)
get_element_by_class('p-drama-half-row', playlist_data)) or get_element_by_class('p-drama-half-row', playlist_data))
if drama_list is None: if drama_list is None:
raise ExtractorError('No episodes found') raise ExtractorError('No episodes found')
video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list) video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)

View File

@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter from ..swfinterp import SWFInterpreter
from ..compat import ( from ..compat import (
compat_chr, compat_chr,
compat_HTTPError,
compat_kwargs, compat_kwargs,
compat_parse_qs, compat_parse_qs,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
@ -288,10 +289,25 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
if not mobj: if not mobj:
break break
more = self._download_json( count = 0
'https://youtube.com/%s' % mobj.group('more'), playlist_id, retries = 3
'Downloading page #%s' % page_num, while count <= retries:
transform_source=uppercase_escape) try:
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
more = self._download_json(
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s%s'
% (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape)
break
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
count += 1
if count <= retries:
continue
raise
content_html = more['content_html'] content_html = more['content_html']
if not content_html.strip(): if not content_html.strip():
# Some webpages show a "Load more" button but they don't # Some webpages show a "Load more" button but they don't
@ -1773,9 +1789,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError( raise ExtractorError(
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
if video_info.get('license_info'):
raise ExtractorError('This video is DRM protected.', expected=True)
video_details = try_get( video_details = try_get(
player_response, lambda x: x['videoDetails'], dict) or {} player_response, lambda x: x['videoDetails'], dict) or {}
@ -1911,7 +1924,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats = [] formats = []
for url_data_str in encoded_url_map.split(','): for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str) url_data = compat_parse_qs(url_data_str)
if 'itag' not in url_data or 'url' not in url_data: if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
continue continue
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
# Unsupported FORMAT_STREAM_TYPE_OTF # Unsupported FORMAT_STREAM_TYPE_OTF
@ -1971,7 +1984,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
signature = self._decrypt_signature( signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate) encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
url += '&%s=%s' % (sp, signature)
if 'ratebypass' not in url: if 'ratebypass' not in url:
url += '&ratebypass=yes' url += '&ratebypass=yes'
@ -2035,8 +2049,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
url_or_none(try_get( url_or_none(try_get(
player_response, player_response,
lambda x: x['streamingData']['hlsManifestUrl'], lambda x: x['streamingData']['hlsManifestUrl'],
compat_str)) or compat_str))
url_or_none(try_get( or url_or_none(try_get(
video_info, lambda x: x['hlsvp'][0], compat_str))) video_info, lambda x: x['hlsvp'][0], compat_str)))
if manifest_url: if manifest_url:
formats = [] formats = []
@ -2084,8 +2098,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
self._downloader.report_warning('unable to extract uploader nickname') self._downloader.report_warning('unable to extract uploader nickname')
channel_id = self._html_search_meta( channel_id = (
'channelId', video_webpage, 'channel id') str_or_none(video_details.get('channelId'))
or self._html_search_meta(
'channelId', video_webpage, 'channel id', default=None)
or self._search_regex(
r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
video_webpage, 'channel id', default=None, group='id'))
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
# thumbnail image # thumbnail image
@ -2301,6 +2320,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'"token" parameter not in video info for unknown reason', '"token" parameter not in video info for unknown reason',
video_id=video_id) video_id=video_id)
if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
self.mark_watched(video_id, video_info, player_response) self.mark_watched(video_id, video_info, player_response)
@ -2542,9 +2564,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = ( title_span = (
search_title('playlist-title') or search_title('playlist-title')
search_title('title long-title') or or search_title('title long-title')
search_title('title')) or search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
return self.playlist_result(url_results, playlist_id, title) return self.playlist_result(url_results, playlist_id, title)

View File

@ -86,8 +86,8 @@ class ZattooPlatformBaseIE(InfoExtractor):
return next( return next(
chan['cid'] for chan in channel_list chan['cid'] for chan in channel_list
if chan.get('cid') and ( if chan.get('cid') and (
chan.get('display_alias') == channel_name or chan.get('display_alias') == channel_name
chan.get('cid') == channel_name)) or chan.get('cid') == channel_name))
except StopIteration: except StopIteration:
raise ExtractorError('Could not extract channel id') raise ExtractorError('Could not extract channel id')

View File

@ -221,9 +221,9 @@ class FFmpegPostProcessor(PostProcessor):
# avconv does not have repeat option # avconv does not have repeat option
if self.basename == 'ffmpeg': if self.basename == 'ffmpeg':
cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
cmd += (files_cmd + cmd += (files_cmd
[encodeArgument(o) for o in opts] + + [encodeArgument(o) for o in opts]
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
@ -326,8 +326,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
information['ext'] = extension information['ext'] = extension
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
if (new_path == path or if (new_path == path
(self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path) self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
return [], information return [], information

View File

@ -64,8 +64,8 @@ class XAttrMetadataPP(PostProcessor):
except XAttrMetadataError as e: except XAttrMetadataError as e:
if e.reason == 'NO_SPACE': if e.reason == 'NO_SPACE':
self._downloader.report_warning( self._downloader.report_warning(
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' + 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
(('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
elif e.reason == 'VALUE_TOO_LONG': elif e.reason == 'VALUE_TOO_LONG':
self._downloader.report_warning( self._downloader.report_warning(
'Unable to write extended attributes due to too long values.') 'Unable to write extended attributes due to too long values.')

View File

@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = (
# needed for sanitizing filenames in restricted mode # needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = ( DATE_FORMATS = (
'%d %B %Y', '%d %B %Y',
@ -861,8 +861,8 @@ class XAttrMetadataError(YoutubeDLError):
self.msg = msg self.msg = msg
# Parsing code and msg # Parsing code and msg
if (self.code in (errno.ENOSPC, errno.EDQUOT) or if (self.code in (errno.ENOSPC, errno.EDQUOT)
'No space left' in self.msg or 'Disk quota excedded' in self.msg): or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
self.reason = 'NO_SPACE' self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG' self.reason = 'VALUE_TOO_LONG'
@ -1453,8 +1453,8 @@ def _windows_write_string(s, out):
def not_a_console(handle): def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None: if handle == INVALID_HANDLE_VALUE or handle is None:
return True return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h): if not_a_console(h):
return False return False
@ -1490,8 +1490,8 @@ def write_string(s, out=None, encoding=None):
if _windows_write_string(s, out): if _windows_write_string(s, out):
return return
if ('b' in getattr(out, 'mode', '') or if ('b' in getattr(out, 'mode', '')
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
byt = s.encode(encoding or preferredencoding(), 'ignore') byt = s.encode(encoding or preferredencoding(), 'ignore')
out.write(byt) out.write(byt)
elif hasattr(out, 'buffer'): elif hasattr(out, 'buffer'):
@ -1951,8 +1951,8 @@ def bool_or_none(v, default=None):
return v if isinstance(v, bool) else default return v if isinstance(v, bool) else default
def strip_or_none(v): def strip_or_none(v, default=None):
return None if v is None else v.strip() return v.strip() if isinstance(v, compat_str) else default
def url_or_none(url): def url_or_none(url):
@ -2328,10 +2328,10 @@ def merge_dicts(*dicts):
for k, v in a_dict.items(): for k, v in a_dict.items():
if v is None: if v is None:
continue continue
if (k not in merged or if (k not in merged
(isinstance(v, compat_str) and v and or (isinstance(v, compat_str) and v
isinstance(merged[k], compat_str) and and isinstance(merged[k], compat_str)
not merged[k])): and not merged[k])):
merged[k] = v merged[k] = v
return merged return merged
@ -2657,14 +2657,14 @@ def _match_one(filter_part, dct):
if m: if m:
op = COMPARISON_OPERATORS[m.group('op')] op = COMPARISON_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key')) actual_value = dct.get(m.group('key'))
if (m.group('quotedstrval') is not None or if (m.group('quotedstrval') is not None
m.group('strval') is not None or or m.group('strval') is not None
# If the original field is a string and matching comparisonvalue is # If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field # a number we should respect the origin of the original field
# and process comparison value as a string (see # and process comparison value as a string (see
# https://github.com/ytdl-org/youtube-dl/issues/11082). # https://github.com/ytdl-org/youtube-dl/issues/11082).
actual_value is not None and m.group('intval') is not None and or actual_value is not None and m.group('intval') is not None
isinstance(actual_value, compat_str)): and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='): if m.group('op') not in ('=', '!='):
raise ValueError( raise ValueError(
'Operator %s does not support string values!' % m.group('op')) 'Operator %s does not support string values!' % m.group('op'))
@ -3973,9 +3973,9 @@ def write_xattr(path, key, value):
executable = 'xattr' executable = 'xattr'
opts = ['-w', key, value] opts = ['-w', key, value]
cmd = ([encodeFilename(executable, True)] + cmd = ([encodeFilename(executable, True)]
[encodeArgument(o) for o in opts] + + [encodeArgument(o) for o in opts]
[encodeFilename(path, True)]) + [encodeFilename(path, True)])
try: try:
p = subprocess.Popen( p = subprocess.Popen(

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.04.30' __version__ = '2019.05.20'