1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-24 01:14:32 +01:00

Merge pull request #7 from ytdl-org/master

update
This commit is contained in:
tsia 2019-10-11 15:55:10 +02:00 committed by GitHub
commit b51ee09de1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 1279 additions and 839 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.08.13 [debug] youtube-dl version 2019.09.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.08.13 [debug] youtube-dl version 2019.09.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.13. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.08.13** - [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,3 +1,68 @@
version 2019.09.28
Core
* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
Extractors
* [vk] Fix extraction (#22522)
* [heise] Fix kaltura embeds extraction (#22514)
* [ted] Check for resources validity and extract subtitled downloads (#22513)
+ [youtube] Add support for
owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
+ [nhk] Add support for clips
* [nhk] Fix video extraction (#22249, #22353)
* [byutv] Fix extraction (#22070)
+ [openload] Add support for oload.online (#22304)
+ [youtube] Add support for invidious.drycat.fr (#22451)
* [jwplatfom] Do not match video URLs (#20596, #22148)
* [youtube:playlist] Unescape playlist uploader (#22483)
+ [bilibili] Add support audio albums and songs (#21094)
+ [instagram] Add support for tv URLs
+ [mixcloud] Allow uppercase letters in format URLs (#19280)
* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
#12842, #13912, #15669, #16303)
* [hotstar] Use native HLS downloader by default
+ [hotstar] Extract more formats (#22323)
* [9now] Fix extraction (#22361)
* [zdf] Bypass geo restriction
+ [tv4] Extract series metadata
* [tv4] Fix extraction (#22443)
version 2019.09.12.1
Extractors
* [youtube] Remove quality and tbr for itag 43 (#22372)
version 2019.09.12
Extractors
* [youtube] Quick extraction tempfix (#22367, #22163)
version 2019.09.01
Core
+ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
#21859)
+ [downloader/external] Respect mtime option for aria2c (#22242)
Extractors
+ [xhamster:user] Add support for user pages (#16330, #18454)
+ [xhamster] Add support for more domains
+ [verystream] Add support for woof.tube (#22217)
+ [dailymotion] Add support for lequipe.fr (#21328, #22152)
+ [openload] Add support for oload.vip (#22205)
+ [bbccouk] Extend URL regular expression (#19200)
+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
* [safari] Fix authentication (#22161, #22184)
* [usanetwork] Fix extraction (#22105)
+ [einthusan] Add support for einthusan.ca (#22171)
* [youtube] Improve unavailable message extraction (#22117)
+ [piksel] Extract subtitles (#20506)
version 2019.08.13 version 2019.08.13
Core Core

View File

@ -98,6 +98,8 @@
- **Bigflix** - **Bigflix**
- **Bild**: Bild.de - **Bild**: Bild.de
- **BiliBili** - **BiliBili**
- **BilibiliAudio**
- **BilibiliAudioAlbum**
- **BioBioChileTV** - **BioBioChileTV**
- **BIQLE** - **BIQLE**
- **BitChute** - **BitChute**
@ -1100,6 +1102,7 @@
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **XHamsterUser**
- **xiami:album**: 虾米音乐 - 专辑 - **xiami:album**: 虾米音乐 - 专辑
- **xiami:artist**: 虾米音乐 - 歌手 - **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集 - **xiami:collection**: 虾米音乐 - 精选集

View File

@ -852,8 +852,9 @@ class YoutubeDL(object):
extract_flat = self.params.get('extract_flat', False) extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True): or extract_flat is True):
if self.params.get('forcejson', False): self.__forced_printings(
self.to_stdout(json.dumps(ie_result)) ie_result, self.prepare_filename(ie_result),
incomplete=True)
return ie_result return ie_result
if result_type == 'video': if result_type == 'video':
@ -1693,6 +1694,36 @@ class YoutubeDL(object):
subs[lang] = f subs[lang] = f
return subs return subs
def __forced_printings(self, info_dict, filename, incomplete):
def print_mandatory(field):
if (self.params.get('force%s' % field, False)
and (not incomplete or info_dict.get(field) is not None)):
self.to_stdout(info_dict[field])
def print_optional(field):
if (self.params.get('force%s' % field, False)
and info_dict.get(field) is not None):
self.to_stdout(info_dict[field])
print_mandatory('title')
print_mandatory('id')
if self.params.get('forceurl', False) and not incomplete:
if info_dict.get('requested_formats') is not None:
for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
print_optional('thumbnail')
print_optional('description')
if self.params.get('forcefilename', False) and filename is not None:
self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(info_dict))
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single resolved IE result.""" """Process a single resolved IE result."""
@ -1703,9 +1734,8 @@ class YoutubeDL(object):
if self._num_downloads >= int(max_downloads): if self._num_downloads >= int(max_downloads):
raise MaxDownloadsReached() raise MaxDownloadsReached()
# TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title'] info_dict['fulltitle'] = info_dict['title']
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
if 'format' not in info_dict: if 'format' not in info_dict:
info_dict['format'] = info_dict['ext'] info_dict['format'] = info_dict['ext']
@ -1720,29 +1750,7 @@ class YoutubeDL(object):
info_dict['_filename'] = filename = self.prepare_filename(info_dict) info_dict['_filename'] = filename = self.prepare_filename(info_dict)
# Forced printings # Forced printings
if self.params.get('forcetitle', False): self.__forced_printings(info_dict, filename, incomplete=False)
self.to_stdout(info_dict['fulltitle'])
if self.params.get('forceid', False):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
if info_dict.get('requested_formats') is not None:
for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
self.to_stdout(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
if self.params.get('forceformat', False):
self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(info_dict))
# Do nothing else if in simulate mode # Do nothing else if in simulate mode
if self.params.get('simulate', False): if self.params.get('simulate', False):

View File

@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD):
cmd += self._option('--interface', 'source_address') cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy') cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd

View File

@ -15,6 +15,7 @@ from ..utils import (
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
smuggle_url, smuggle_url,
str_or_none,
strip_jsonp, strip_jsonp,
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
entries, bangumi_id, entries, bangumi_id,
season_info.get('bangumi_title'), season_info.get('evaluate')) season_info.get('bangumi_title'), season_info.get('evaluate'))
class BilibiliAudioBaseIE(InfoExtractor):
def _call_api(self, path, sid, query=None):
if not query:
query = {'sid': sid}
return self._download_json(
'https://www.bilibili.com/audio/music-service-c/web/' + path,
sid, query=query)['data']
class BilibiliAudioIE(BilibiliAudioBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
_TEST = {
'url': 'https://www.bilibili.com/audio/au1003142',
'md5': 'fec4987014ec94ef9e666d4d158ad03b',
'info_dict': {
'id': '1003142',
'ext': 'm4a',
'title': '【tsukimi】YELLOW / 神山羊',
'artist': 'tsukimi',
'comment_count': int,
'description': 'YELLOW的mp3版',
'duration': 183,
'subtitles': {
'origin': [{
'ext': 'lrc',
}],
},
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1564836614,
'upload_date': '20190803',
'uploader': 'tsukimi-つきみぐー',
'view_count': int,
},
}
def _real_extract(self, url):
au_id = self._match_id(url)
play_data = self._call_api('url', au_id)
formats = [{
'url': play_data['cdns'][0],
'filesize': int_or_none(play_data.get('size')),
}]
song = self._call_api('song/info', au_id)
title = song['title']
statistic = song.get('statistic') or {}
subtitles = None
lyric = song.get('lyric')
if lyric:
subtitles = {
'origin': [{
'url': lyric,
}]
}
return {
'id': au_id,
'title': title,
'formats': formats,
'artist': song.get('author'),
'comment_count': int_or_none(statistic.get('comment')),
'description': song.get('intro'),
'duration': int_or_none(song.get('duration')),
'subtitles': subtitles,
'thumbnail': song.get('cover'),
'timestamp': int_or_none(song.get('passtime')),
'uploader': song.get('uname'),
'view_count': int_or_none(statistic.get('play')),
}
class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
_TEST = {
'url': 'https://www.bilibili.com/audio/am10624',
'info_dict': {
'id': '10624',
'title': '每日新曲推荐每日11:00更新',
'description': '每天11:00更新为你推送最新音乐',
},
'playlist_count': 19,
}
def _real_extract(self, url):
am_id = self._match_id(url)
songs = self._call_api(
'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
entries = []
for song in songs:
sid = str_or_none(song.get('id'))
if not sid:
continue
entries.append(self.url_result(
'https://www.bilibili.com/audio/au' + sid,
BilibiliAudioIE.ie_key(), sid))
if entries:
album_data = self._call_api('menu/info', am_id) or {}
album_title = album_data.get('title')
if album_title:
for entry in entries:
entry['album'] = album_title
return self.playlist_result(
entries, am_id, album_title, album_data.get('intro'))
return self.playlist_result(entries, am_id)

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
import json
import re import re
import struct import struct
@ -11,14 +10,12 @@ from .adobepass import AdobePassIE
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_parse_qs, compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
compat_HTTPError, compat_HTTPError,
) )
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
extract_attributes, extract_attributes,
find_xpath_attr, find_xpath_attr,
@ -27,18 +24,19 @@ from ..utils import (
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
smuggle_url,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
update_url_query, update_url_query,
clean_html, clean_html,
mimetype2ext, mimetype2ext,
UnsupportedError,
) )
class BrightcoveLegacyIE(InfoExtractor): class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy' IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
_TESTS = [ _TESTS = [
{ {
@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor):
'timestamp': 1368213670, 'timestamp': 1368213670,
'upload_date': '20130510', 'upload_date': '20130510',
'uploader_id': '1589608506001', 'uploader_id': '1589608506001',
} },
'skip': 'The player has been deactivated by the content owner',
}, },
{ {
# From http://medianetwork.oracle.com/video/player/1785452137001 # From http://medianetwork.oracle.com/video/player/1785452137001
@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'upload_date': '20120814', 'upload_date': '20120814',
'uploader_id': '1460825906', 'uploader_id': '1460825906',
}, },
'skip': 'video not playable',
}, },
{ {
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'This Bracelet Acts as a Personal Thermostat', 'title': 'This Bracelet Acts as a Personal Thermostat',
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
'uploader': 'Mashable', # 'uploader': 'Mashable',
'timestamp': 1382041798, 'timestamp': 1382041798,
'upload_date': '20131017', 'upload_date': '20131017',
'uploader_id': '1130468786001', 'uploader_id': '1130468786001',
@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'id': '3550319591001', 'id': '3550319591001',
}, },
'playlist_mincount': 7, 'playlist_mincount': 7,
'skip': 'Unsupported URL',
}, },
{ {
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'title': 'Lesson 08', 'title': 'Lesson 08',
}, },
'playlist_mincount': 10, 'playlist_mincount': 10,
'skip': 'Unsupported URL',
}, },
{ {
# playerID inferred from bcpid # playerID inferred from bcpid
@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor):
'only_matching': True, # Tested in GenericIE 'only_matching': True, # Tested in GenericIE
} }
] ]
FLV_VCODECS = {
1: 'SORENSON',
2: 'ON2',
3: 'H264',
4: 'VP8',
}
@classmethod @classmethod
def _build_brighcove_url(cls, object_str): def _build_brighcove_url(cls, object_str):
@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod @classmethod
def _make_brightcove_url(cls, params): def _make_brightcove_url(cls, params):
return update_url_query(cls._FEDERATED_URL, params) return update_url_query(
'http://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor):
videoPlayer = query.get('@videoPlayer') videoPlayer = query.get('@videoPlayer')
if videoPlayer: if videoPlayer:
# We set the original url as the default 'Referer' header # We set the original url as the default 'Referer' header
referer = smuggled_data.get('Referer', url) referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
video_id = videoPlayer[0]
if 'playerID' not in query: if 'playerID' not in query:
mobj = re.search(r'/bcpid(\d+)', url) mobj = re.search(r'/bcpid(\d+)', url)
if mobj is not None: if mobj is not None:
query['playerID'] = [mobj.group(1)] query['playerID'] = [mobj.group(1)]
return self._get_video_info(
videoPlayer[0], query, referer=referer)
elif 'playerKey' in query:
player_key = query['playerKey']
return self._get_playlist_info(player_key[0])
else:
raise ExtractorError(
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
expected=True)
def _brightcove_new_url_result(self, publisher_id, video_id):
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
def _get_video_info(self, video_id, query, referer=None):
headers = {}
linkBase = query.get('linkBaseURL')
if linkBase is not None:
referer = linkBase[0]
if referer is not None:
headers['Referer'] = referer
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
error_msg = self._html_search_regex(
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
'error message', default=None)
if error_msg is not None:
publisher_id = query.get('publisherId') publisher_id = query.get('publisherId')
if publisher_id and publisher_id[0].isdigit(): if publisher_id and publisher_id[0].isdigit():
publisher_id = publisher_id[0] publisher_id = publisher_id[0]
@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor):
else: else:
player_id = query.get('playerID') player_id = query.get('playerID')
if player_id and player_id[0].isdigit(): if player_id and player_id[0].isdigit():
headers = {}
if referer:
headers['Referer'] = referer
player_page = self._download_webpage( player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0], 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False) video_id, headers=headers, fatal=False)
@ -350,135 +324,15 @@ class BrightcoveLegacyIE(InfoExtractor):
enc_pub_id = player_key.split(',')[1].replace('~', '=') enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id: if publisher_id:
return self._brightcove_new_url_result(publisher_id, video_id) brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
raise ExtractorError( if referer:
'brightcove said: %s' % error_msg, expected=True) brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
self.report_extraction(video_id) # TODO: figure out if it's possible to extract playlistId from playerKey
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') # elif 'playerKey' in query:
info = json.loads(info)['data'] # player_key = query['playerKey']
video_info = info['programmedContent']['videoPlayer']['mediaDTO'] # return self._get_playlist_info(player_key[0])
video_info['_youtubedl_adServerURL'] = info.get('adServerURL') raise UnsupportedError(url)
return self._extract_video_info(video_info)
def _get_playlist_info(self, player_key):
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
playlist_info = self._download_webpage(
info_url, player_key, 'Downloading playlist information')
json_data = json.loads(playlist_info)
if 'videoList' in json_data:
playlist_info = json_data['videoList']
playlist_dto = playlist_info['mediaCollectionDTO']
elif 'playlistTabs' in json_data:
playlist_info = json_data['playlistTabs']
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
else:
raise ExtractorError('Empty playlist')
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
playlist_title=playlist_dto['displayName'])
def _extract_video_info(self, video_info):
video_id = compat_str(video_info['id'])
publisher_id = video_info.get('publisherId')
info = {
'id': video_id,
'title': video_info['displayName'].strip(),
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
'uploader': video_info.get('publisherName'),
'uploader_id': compat_str(publisher_id) if publisher_id else None,
'duration': float_or_none(video_info.get('length'), 1000),
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
}
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
if renditions:
formats = []
for rend in renditions:
url = rend['defaultURL']
if not url:
continue
ext = None
if rend['remote']:
url_comp = compat_urllib_parse_urlparse(url)
if url_comp.path.endswith('.m3u8'):
formats.extend(
self._extract_m3u8_formats(
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
continue
elif 'akamaihd.net' in url_comp.netloc:
# This type of renditions are served through
# akamaihd.net, but they don't use f4m manifests
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
ext = 'flv'
if ext is None:
ext = determine_ext(url)
tbr = int_or_none(rend.get('encodingRate'), 1000)
a_format = {
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
'url': url,
'ext': ext,
'filesize': int_or_none(rend.get('size')) or None,
'tbr': tbr,
}
if rend.get('audioOnly'):
a_format.update({
'vcodec': 'none',
})
else:
a_format.update({
'height': int_or_none(rend.get('frameHeight')),
'width': int_or_none(rend.get('frameWidth')),
'vcodec': rend.get('videoCodec'),
})
# m3u8 manifests with remote == false are media playlists
# Not calling _extract_m3u8_formats here to save network traffic
if ext == 'm3u8':
a_format.update({
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
'ext': 'mp4',
'protocol': 'm3u8_native',
})
formats.append(a_format)
self._sort_formats(formats)
info['formats'] = formats
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
'filesize': int_or_none(video_info.get('FLVFullSize')),
})
if self._downloader.params.get('include_ads', False):
adServerURL = video_info.get('_youtubedl_adServerURL')
if adServerURL:
ad_info = {
'_type': 'url',
'url': adServerURL,
}
if 'url' in info:
return {
'_type': 'playlist',
'title': info['title'],
'entries': [ad_info, info],
}
else:
return ad_info
if not info.get('url') and not info.get('formats'):
uploader_id = info.get('uploader_id')
if uploader_id:
info.update(self._brightcove_new_url_result(uploader_id, video_id))
else:
raise ExtractorError('Unable to extract video url for %s' % video_id)
return info
class BrightcoveNewIE(AdobePassIE): class BrightcoveNewIE(AdobePassIE):

View File

@ -3,7 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_duration from ..utils import (
determine_ext,
merge_dicts,
parse_duration,
url_or_none,
)
class BYUtvIE(InfoExtractor): class BYUtvIE(InfoExtractor):
@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
info = self._download_json( video = self._download_json(
'https://api.byutv.org/api3/catalog/getvideosforcontent', 'https://api.byutv.org/api3/catalog/getvideosforcontent',
display_id, query={ display_id, query={
'contentid': video_id, 'contentid': video_id,
@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
'x-byutv-platformkey': 'xsaaw9c7y5', 'x-byutv-platformkey': 'xsaaw9c7y5',
}) })
ep = info.get('ooyalaVOD') ep = video.get('ooyalaVOD')
if ep: if ep:
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
'thumbnail': ep.get('imageThumbnail'), 'thumbnail': ep.get('imageThumbnail'),
} }
ep = info['dvr'] info = {}
title = ep['title'] formats = []
formats = self._extract_m3u8_formats( for format_id, ep in video.items():
ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native', if not isinstance(ep, dict):
m3u8_id='hls') continue
self._sort_formats(formats) video_url = url_or_none(ep.get('videoUrl'))
return { if not video_url:
'id': video_id, continue
'display_id': display_id, ext = determine_ext(video_url)
'title': title, if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': video_url,
'format_id': format_id,
})
merge_dicts(info, {
'title': ep.get('title'),
'description': ep.get('description'), 'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'), 'thumbnail': ep.get('imageThumbnail'),
'duration': parse_duration(ep.get('length')), 'duration': parse_duration(ep.get('length')),
})
self._sort_formats(formats)
return merge_dicts(info, {
'id': video_id,
'display_id': display_id,
'title': display_id,
'formats': formats, 'formats': formats,
} })

View File

@ -7,7 +7,7 @@ from ..utils import ExtractorError
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -21,6 +21,9 @@ class ChaturbateIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Room is offline', 'skip': 'Room is offline',
}, {
'url': 'https://chaturbate.com/fullvideo/?b=caylin',
'only_matching': True,
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
@ -32,7 +35,8 @@ class ChaturbateIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, headers=self.geo_verification_headers()) 'https://chaturbate.com/%s/' % video_id, video_id,
headers=self.geo_verification_headers())
m3u8_urls = [] m3u8_urls = []

View File

@ -1424,12 +1424,10 @@ class InfoExtractor(object):
try: try:
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True return True
except ExtractorError as e: except ExtractorError:
if isinstance(e.cause, compat_urllib_error.URLError):
self.to_screen( self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, item)) '%s: %s URL is invalid, skipping' % (video_id, item))
return False return False
raise
def http_scheme(self): def http_scheme(self):
""" Either "http:" or "https:", depending on the user's preferences """ """ Either "http:" or "https:", depending on the user's preferences """

View File

@ -48,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)' _VALID_URL = r'''(?ix)
https?://
(?:
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
(?:www\.)?lequipe\.fr/video
)
/(?P<id>[^/?_]+)
'''
IE_NAME = 'dailymotion' IE_NAME = 'dailymotion'
_FORMATS = [ _FORMATS = [
@ -133,6 +140,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, { }, {
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun', 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.lequipe.fr/video/x791mem',
'only_matching': True,
}, {
'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -104,6 +104,8 @@ from .bild import BildIE
from .bilibili import ( from .bilibili import (
BiliBiliIE, BiliBiliIE,
BiliBiliBangumiIE, BiliBiliBangumiIE,
BilibiliAudioIE,
BilibiliAudioAlbumIE,
) )
from .biobiochiletv import BioBioChileTVIE from .biobiochiletv import BioBioChileTVIE
from .bitchute import ( from .bitchute import (
@ -895,7 +897,6 @@ from .puhutv import (
PuhuTVSerieIE, PuhuTVSerieIE,
) )
from .presstv import PressTVIE from .presstv import PressTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE from .puls4 import Puls4IE
from .pyvideo import PyvideoIE from .pyvideo import PyvideoIE
@ -1131,6 +1132,7 @@ from .telegraaf import TelegraafIE
from .telemb import TeleMBIE from .telemb import TeleMBIE
from .telequebec import ( from .telequebec import (
TeleQuebecIE, TeleQuebecIE,
TeleQuebecSquatIE,
TeleQuebecEmissionIE, TeleQuebecEmissionIE,
TeleQuebecLiveIE, TeleQuebecLiveIE,
) )
@ -1284,7 +1286,6 @@ from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veehd import VeeHDIE from .veehd import VeeHDIE
from .veoh import VeohIE from .veoh import VeohIE
from .vessel import VesselIE
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,
@ -1415,7 +1416,6 @@ from .weibo import (
WeiboMobileIE WeiboMobileIE
) )
from .weiqitv import WeiqiTVIE from .weiqitv import WeiqiTVIE
from .wimp import WimpIE
from .wistia import WistiaIE from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .wsj import ( from .wsj import (
@ -1429,6 +1429,7 @@ from .xfileshare import XFileShareIE
from .xhamster import ( from .xhamster import (
XHamsterIE, XHamsterIE,
XHamsterEmbedIE, XHamsterEmbedIE,
XHamsterUserIE,
) )
from .xiami import ( from .xiami import (
XiamiSongIE, XiamiSongIE,

View File

@ -77,7 +77,6 @@ from .instagram import InstagramIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .vessel import VesselIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE from .facebook import FacebookIE
@ -2075,6 +2074,22 @@ class GenericIE(InfoExtractor):
}, },
'playlist_count': 6, 'playlist_count': 6,
}, },
{
# Squarespace video embed, 2019-08-28
'url': 'http://ootboxford.com',
'info_dict': {
'id': 'Tc7b_JGdZfw',
'title': 'Out of the Blue, at Childish Things 10',
'ext': 'mp4',
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
'uploader_id': 'helendouglashouse',
'uploader': 'Helen & Douglas House',
'upload_date': '20140328',
},
'params': {
'skip_download': True,
},
},
{ {
# Zype embed # Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
@ -2395,6 +2410,12 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way # Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage) webpage = compat_urllib_parse_unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
webpage = re.sub(
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
lambda x: unescapeHTML(x.group(0)), webpage)
# it's tempting to parse this further, but you would # it's tempting to parse this further, but you would
# have to take into account all the variations like # have to take into account all the variations like
# Video Title - Site Name # Video Title - Site Name
@ -2469,11 +2490,6 @@ class GenericIE(InfoExtractor):
if tp_urls: if tp_urls:
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
# Look for Vessel embeds
vessel_urls = VesselIE._extract_urls(webpage)
if vessel_urls:
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',

View File

@ -11,7 +11,7 @@ from ..utils import (
class GfycatIE(InfoExtractor): class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#]+)' _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': { 'info_dict': {
@ -53,6 +53,12 @@ class GfycatIE(InfoExtractor):
}, { }, {
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball', 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
'only_matching': True 'only_matching': True
}, {
'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
'only_matching': True
}, {
'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -96,6 +96,8 @@ class GloboIE(InfoExtractor):
video = self._download_json( video = self._download_json(
'http://api.globovideos.com/videos/%s/playlist' % video_id, 'http://api.globovideos.com/videos/%s/playlist' % video_id,
video_id)['videos'][0] video_id)['videos'][0]
if video.get('encrypted') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
title = video['title'] title = video['title']
@ -109,8 +111,8 @@ class GloboIE(InfoExtractor):
security = self._download_json( security = self._download_json(
'http://security.video.globo.com/videos/%s/hash' % video_id, 'http://security.video.globo.com/videos/%s/hash' % video_id,
video_id, 'Downloading security hash for %s' % resource_id, query={ video_id, 'Downloading security hash for %s' % resource_id, query={
'player': 'flash', 'player': 'desktop',
'version': '17.0.0.132', 'version': '5.19.1',
'resource_id': resource_id, 'resource_id': resource_id,
}) })
@ -122,19 +124,18 @@ class GloboIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, message), expected=True) '%s returned error: %s' % (self.IE_NAME, message), expected=True)
continue continue
hash_code = security_hash[:2] assert security_hash[:2] in ('04', '14')
received_time = security_hash[2:12] received_time = security_hash[3:13]
received_random = security_hash[12:22] received_md5 = security_hash[24:]
received_md5 = security_hash[22:]
sign_time = compat_str(int(received_time) + 86400) sign_time = compat_str(int(received_time) + 86400)
padding = '%010d' % random.randint(1, 10000000000) padding = '%010d' % random.randint(1, 10000000000)
md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode() md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode()
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5 signed_hash = security_hash[:23] + sign_time + padding + signed_md5
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',

View File

@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
webpage, default=None) or self._html_search_meta( webpage, default=None) or self._html_search_meta(
'description', webpage) 'description', webpage)
kaltura_url = KalturaIE._extract_url(webpage) def _make_kaltura_result(kaltura_url):
if kaltura_url:
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(kaltura_url, {'source_url': url}), 'url': smuggle_url(kaltura_url, {'source_url': url}),
@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
'description': description, 'description': description,
} }
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
return _make_kaltura_result(kaltura_url)
kaltura_id = self._search_regex(
r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
default=None, group='id')
if kaltura_id:
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
yt_urls = YoutubeIE._extract_urls(webpage) yt_urls = YoutubeIE._extract_urls(webpage)
if yt_urls: if yt_urls:
return self.playlist_from_matches( return self.playlist_from_matches(

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import hashlib import hashlib
import hmac import hmac
import re
import time import time
import uuid import uuid
@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE):
format_url = url_or_none(playback_set.get('playbackUrl')) format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url: if not format_url:
continue continue
format_url = re.sub(
r'(?<=//staragvod)(\d)', r'web\1', format_url)
tags = str_or_none(playback_set.get('tagsCombination')) or '' tags = str_or_none(playback_set.get('tagsCombination')) or ''
if tags and 'encryption:plain' not in tags: if tags and 'encryption:plain' not in tags:
continue continue
@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE):
try: try:
if 'package:hls' in tags or ext == 'm3u8': if 'package:hls' in tags or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls')) format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls'))
elif 'package:dash' in tags or ext == 'mpd': elif 'package:dash' in tags or ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash')) format_url, video_id, mpd_id='dash'))

View File

@ -22,7 +22,7 @@ from ..utils import (
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))' _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516', 'md5': '0d2da106a9d2631273e192b372806516',
@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor):
}, { }, {
'url': 'http://instagram.com/p/9o6LshA7zy/embed/', 'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.instagram.com/tv/aye83DjauH/',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class JWPlatformIE(InfoExtractor): class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{ _TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',

View File

@ -151,14 +151,15 @@ class KalturaIE(InfoExtractor):
if mobj: if mobj:
embed_info = mobj.groupdict() embed_info = mobj.groupdict()
for k, v in embed_info.items(): for k, v in embed_info.items():
if v:
embed_info[k] = v.strip() embed_info[k] = v.strip()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id']) escaped_pid = re.escape(embed_info['partner_id'])
service_url = re.search( service_mobj = re.search(
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage) webpage)
if service_url: if service_mobj:
url = smuggle_url(url, {'service_url': service_url.group(1)}) url = smuggle_url(url, {'service_url': service_mobj.group('id')})
return url return url
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):

View File

@ -164,7 +164,7 @@ class MixcloudIE(InfoExtractor):
def decrypt_url(f_url): def decrypt_url(f_url):
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
decrypted_url = self._decrypt_xor_cipher(k, f_url) decrypted_url = self._decrypt_xor_cipher(k, f_url)
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
return decrypted_url return decrypted_url
for url_key in ('url', 'hlsUrl', 'dashUrl'): for url_key in ('url', 'hlsUrl', 'dashUrl'):

View File

@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor):
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
# clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
'md5': '256a1be14f48d960a7e61e2532d95ec3',
'info_dict': {
'id': 'a95j5iza',
'ext': 'mp4',
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
'timestamp': 1565965194,
'upload_date': '20190816',
},
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
'only_matching': True, 'only_matching': True,
}, { }, {
@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor):
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
'only_matching': True, 'only_matching': True,
}] }]
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json' _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
def _real_extract(self, url): def _real_extract(self, url):
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor):
is_video = m_type == 'video' is_video = m_type == 'video'
episode = self._download_json( episode = self._download_json(
self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''), self._API_URL_TEMPLATE % (
'v' if is_video else 'r',
'clip' if episode_id[:4] == '9999' else 'esd',
episode_id, lang, '/all' if is_video else ''),
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0] episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
title = episode.get('sub_title_clean') or episode['sub_title'] title = episode.get('sub_title_clean') or episode['sub_title']
@ -60,8 +75,8 @@ class NhkVodIE(InfoExtractor):
if is_video: if is_video:
info.update({ info.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'Ooyala', 'ie_key': 'Piksel',
'url': 'ooyala:' + episode['vod_id'], 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
}) })
else: else:
audio = episode['audio'] audio = episode['audio']

View File

@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor):
https?:// https?://
(?: (?:
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br| (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
(?:www\.)?nickjr\.[a-z]{2} (?:www\.)?nickjr\.[a-z]{2}|
(?:www\.)?nickelodeonjunior\.fr
) )
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+) /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
''' '''
@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/', 'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
page_data = self._parse_json(self._search_regex( page_data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.*?});', webpage, r'window\.__data\s*=\s*({.*?});', webpage,
'page data'), display_id) 'page data', default='{}'), display_id, fatal=False)
if not page_data:
page_data = self._parse_json(self._parse_json(self._search_regex(
r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
webpage, 'page data'), display_id), display_id)
for kind in ('episode', 'clip'): for kind in ('episode', 'clip'):
current_key = page_data.get(kind, {}).get( current_key = page_data.get(kind, {}).get(

View File

@ -25,9 +25,14 @@ class NonkTubeIE(NuevoBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
info = self._extract_nuevo( webpage = self._download_webpage(url, video_id)
'https://www.nonktube.com/media/nuevo/econfig.php?key=%s'
% video_id, video_id)
info['age_limit'] = 18 title = self._og_search_title(webpage)
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
info.update({
'id': video_id,
'title': title,
'age_limit': 18,
})
return info return info

View File

@ -406,7 +406,7 @@ class NRKTVSerieBaseIE(InfoExtractor):
def _extract_series(self, webpage, display_id, fatal=True): def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json( config = self._parse_json(
self._search_regex( self._search_regex(
(r'INITIAL_DATA_*\s*=\s*({.+?})\s*;', (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'), r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT), webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False) display_id, fatal=False)

View File

@ -243,11 +243,12 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_DOMAINS = r'''(?x) _DOMAINS = r'''
(?: (?:
openload\.(?:co|io|link|pw)| openload\.(?:co|io|link|pw)|
oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)| oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)|
oladblock\.(?:services|xyz|me)|openloed\.co) oladblock\.(?:services|xyz|me)|openloed\.co
)
''' '''
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
@ -361,6 +362,12 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.services/embed/bs1NWj1dCag/', 'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://oload.online/f/W8o2UfN1vNY/',
'only_matching': True,
}, {
'url': 'https://oload.monster/f/W8o2UfN1vNY/',
'only_matching': True,
}, { }, {
'url': 'https://oload.press/embed/drTBl1aOTvk/', 'url': 'https://oload.press/embed/drTBl1aOTvk/',
'only_matching': True, 'only_matching': True,
@ -396,7 +403,7 @@ class OpenloadIE(InfoExtractor):
@classmethod @classmethod
def _extract_urls(cls, webpage): def _extract_urls(cls, webpage):
return re.findall( return re.findall(
r'<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
% (cls._DOMAINS, cls._EMBED_WORD), webpage) % (cls._DOMAINS, cls._EMBED_WORD), webpage)
def _extract_decrypted_page(self, page_url, webpage, video_id): def _extract_decrypted_page(self, page_url, webpage, video_id):
@ -462,7 +469,7 @@ class OpenloadIE(InfoExtractor):
class VerystreamIE(OpenloadIE): class VerystreamIE(OpenloadIE):
IE_NAME = 'verystream' IE_NAME = 'verystream'
_DOMAINS = r'(?:verystream\.com)' _DOMAINS = r'(?:verystream\.com|woof\.tube)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?P<host> (?P<host>

View File

@ -86,12 +86,13 @@ class ORFTVthekIE(InfoExtractor):
if value: if value:
format_id_list.append(value) format_id_list.append(value)
format_id = '-'.join(format_id_list) format_id = '-'.join(format_id_list)
if determine_ext(fd['src']) == 'm3u8': ext = determine_ext(src)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
fd['src'], video_id, 'mp4', m3u8_id=format_id)) src, video_id, 'mp4', m3u8_id=format_id, fatal=False))
elif determine_ext(fd['src']) == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
fd['src'], video_id, f4m_id=format_id)) src, video_id, f4m_id=format_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,

View File

@ -18,81 +18,385 @@ from ..utils import (
class PeerTubeIE(InfoExtractor): class PeerTubeIE(InfoExtractor):
_INSTANCES_RE = r'''(?: _INSTANCES_RE = r'''(?:
# Taken from https://instances.joinpeertube.org/instances # Taken from https://instances.joinpeertube.org/instances
peertube\.rainbowswingers\.net|
tube\.stanisic\.nl|
peer\.suiri\.us|
medias\.libox\.fr|
videomensoif\.ynh\.fr|
peertube\.travelpandas\.eu|
peertube\.rachetjay\.fr|
peertube\.montecsys\.fr|
tube\.eskuero\.me|
peer\.tube|
peertube\.umeahackerspace\.se|
tube\.nx-pod\.de|
video\.monsieurbidouille\.fr|
tube\.openalgeria\.org| tube\.openalgeria\.org|
peertube\.pointsecu\.fr| vid\.lelux\.fi|
video\.anormallostpod\.ovh|
tube\.crapaud-fou\.org|
peertube\.stemy\.me|
lostpod\.space|
exode\.me|
peertube\.snargol\.com|
vis\.ion\.ovh|
videosdulib\.re|
v\.mbius\.io|
videos\.judrey\.eu|
peertube\.osureplayviewer\.xyz|
peertube\.mathieufamily\.ovh|
www\.videos-libr\.es|
fightforinfo\.com|
peertube\.fediverse\.ru|
peertube\.oiseauroch\.fr|
video\.nesven\.eu|
v\.bearvideo\.win|
video\.qoto\.org|
justporn\.cc|
video\.vny\.fr|
peervideo\.club|
tube\.taker\.fr|
peertube\.chantierlibre\.org|
tube\.ipfixe\.info|
tube\.kicou\.info|
tube\.dodsorf\.as|
videobit\.cc|
video\.yukari\.moe|
videos\.elbinario\.net|
hkvideo\.live|
pt\.tux\.tf|
www\.hkvideo\.live|
FIGHTFORINFO\.com|
pt\.765racing\.com|
peertube\.gnumeria\.eu\.org|
nordenmedia\.com|
peertube\.co\.uk|
tube\.darfweb\.eu|
tube\.kalah-france\.org|
0ch\.in|
vod\.mochi\.academy|
film\.node9\.org|
peertube\.hatthieves\.es|
video\.fitchfamily\.org|
peertube\.ddns\.net|
video\.ifuncle\.kr|
video\.fdlibre\.eu|
tube\.22decembre\.eu|
peertube\.harmoniescreatives\.com|
tube\.fabrigli\.fr|
video\.thedwyers\.co|
video\.bruitbruit\.com|
peertube\.foxfam\.club|
peer\.philoxweb\.be|
videos\.bugs\.social|
peertube\.malbert\.xyz|
peertube\.bilange\.ca|
libretube\.net|
diytelevision\.com|
peertube\.fedilab\.app|
libre\.video|
video\.mstddntfdn\.online|
us\.tv|
peertube\.sl-network\.fr|
peertube\.dynlinux\.io|
peertube\.david\.durieux\.family|
peertube\.linuxrocks\.online|
peerwatch\.xyz|
v\.kretschmann\.social|
tube\.otter\.sh|
yt\.is\.nota\.live|
tube\.dragonpsi\.xyz|
peertube\.boneheadmedia\.com|
videos\.funkwhale\.audio|
watch\.44con\.com|
peertube\.gcaillaut\.fr|
peertube\.icu|
pony\.tube|
spacepub\.space|
tube\.stbr\.io|
v\.mom-gay\.faith|
tube\.port0\.xyz|
peertube\.simounet\.net|
play\.jergefelt\.se|
peertube\.zeteo\.me|
tube\.danq\.me|
peertube\.kerenon\.com|
tube\.fab-l3\.org|
tube\.calculate\.social|
peertube\.mckillop\.org|
tube\.netzspielplatz\.de|
vod\.ksite\.de|
peertube\.laas\.fr|
tube\.govital\.net|
peertube\.stephenson\.cc|
bistule\.nohost\.me|
peertube\.kajalinifi\.de|
video\.ploud\.jp|
video\.omniatv\.com|
peertube\.ffs2play\.fr|
peertube\.leboulaire\.ovh|
peertube\.tronic-studio\.com|
peertube\.public\.cat|
peertube\.metalbanana\.net|
video\.1000i100\.fr|
peertube\.alter-nativ-voll\.de|
tube\.pasa\.tf|
tube\.worldofhauru\.xyz|
pt\.kamp\.site|
peertube\.teleassist\.fr|
videos\.mleduc\.xyz|
conf\.tube|
media\.privacyinternational\.org|
pt\.forty-two\.nl|
video\.halle-leaks\.de|
video\.grosskopfgames\.de|
peertube\.schaeferit\.de|
peertube\.jackbot\.fr|
tube\.extinctionrebellion\.fr|
peertube\.f-si\.org|
video\.subak\.ovh|
videos\.koweb\.fr|
peertube\.zergy\.net|
peertube\.roflcopter\.fr|
peertube\.floss-marketing-school\.com|
vloggers\.social|
peertube\.iriseden\.eu|
videos\.ubuntu-paris\.org|
peertube\.mastodon\.host|
armstube\.com|
peertube\.s2s\.video|
peertube\.lol|
tube\.open-plug\.eu|
open\.tube|
peertube\.ch|
peertube\.normandie-libre\.fr|
peertube\.slat\.org|
video\.lacaveatonton\.ovh|
peertube\.uno|
peertube\.servebeer\.com|
peertube\.fedi\.quebec|
tube\.h3z\.jp|
tube\.plus200\.com|
peertube\.eric\.ovh|
tube\.metadocs\.cc|
tube\.unmondemeilleur\.eu|
gouttedeau\.space|
video\.antirep\.net|
nrop\.cant\.at|
tube\.ksl-bmx\.de|
tube\.plaf\.fr|
tube\.tchncs\.de|
video\.devinberg\.com|
hitchtube\.fr|
peertube\.kosebamse\.com|
yunopeertube\.myddns\.me|
peertube\.varney\.fr|
peertube\.anon-kenkai\.com|
tube\.maiti\.info|
tubee\.fr|
videos\.dinofly\.com|
toobnix\.org|
videotape\.me|
voca\.tube|
video\.heromuster\.com|
video\.lemediatv\.fr|
video\.up\.edu\.ph|
balafon\.video|
video\.ivel\.fr|
thickrips\.cloud|
pt\.laurentkruger\.fr|
video\.monarch-pass\.net|
peertube\.artica\.center|
video\.alternanet\.fr|
indymotion\.fr|
fanvid\.stopthatimp\.net|
video\.farci\.org|
v\.lesterpig\.com|
video\.okaris\.de|
tube\.pawelko\.net|
peertube\.mablr\.org|
tube\.fede\.re|
pytu\.be|
evertron\.tv|
devtube\.dev-wiki\.de|
raptube\.antipub\.org|
video\.selea\.se|
peertube\.mygaia\.org|
video\.oh14\.de|
peertube\.livingutopia\.org|
peertube\.the-penguin\.de|
tube\.thechangebook\.org|
tube\.anjara\.eu|
pt\.pube\.tk|
video\.samedi\.pm|
mplayer\.demouliere\.eu|
widemus\.de|
peertube\.me|
peertube\.zapashcanon\.fr|
video\.latavernedejohnjohn\.fr|
peertube\.pcservice46\.fr|
peertube\.mazzonetto\.eu|
video\.irem\.univ-paris-diderot\.fr|
video\.livecchi\.cloud|
alttube\.fr|
video\.coop\.tools|
video\.cabane-libre\.org|
peertube\.openstreetmap\.fr|
videos\.alolise\.org|
irrsinn\.video|
video\.antopie\.org|
scitech\.video|
tube2\.nemsia\.org|
video\.amic37\.fr|
peertube\.freeforge\.eu|
video\.arbitrarion\.com|
video\.datsemultimedia\.com|
stoptrackingus\.tv|
peertube\.ricostrongxxx\.com|
docker\.videos\.lecygnenoir\.info|
peertube\.togart\.de|
tube\.postblue\.info|
videos\.domainepublic\.net|
peertube\.cyber-tribal\.com|
video\.gresille\.org|
peertube\.dsmouse\.net|
cinema\.yunohost\.support|
tube\.theocevaer\.fr|
repro\.video|
tube\.4aem\.com|
quaziinc\.com|
peertube\.metawurst\.space|
videos\.wakapo\.com|
video\.ploud\.fr|
video\.freeradical\.zone|
tube\.valinor\.fr|
refuznik\.video|
pt\.kircheneuenburg\.de|
peertube\.asrun\.eu|
peertube\.lagob\.fr|
videos\.side-ways\.net|
91video\.online|
video\.valme\.io|
video\.taboulisme\.com|
videos-libr\.es|
tv\.mooh\.fr|
nuage\.acostey\.fr|
video\.monsieur-a\.fr|
peertube\.librelois\.fr|
videos\.pair2jeux\.tube|
videos\.pueseso\.club|
peer\.mathdacloud\.ovh|
media\.assassinate-you\.net|
vidcommons\.org|
ptube\.rousset\.nom\.fr|
tube\.cyano\.at|
videos\.squat\.net|
video\.iphodase\.fr|
peertube\.makotoworkshop\.org|
peertube\.serveur\.slv-valbonne\.fr|
vault\.mle\.party|
hostyour\.tv|
videos\.hack2g2\.fr|
libre\.tube|
pire\.artisanlogiciel\.net|
videos\.numerique-en-commun\.fr|
video\.netsyms\.com|
video\.die-partei\.social|
video\.writeas\.org|
peertube\.swarm\.solvingmaz\.es|
tube\.pericoloso\.ovh|
watching\.cypherpunk\.observer|
videos\.adhocmusic\.com|
tube\.rfc1149\.net|
peertube\.librelabucm\.org|
videos\.numericoop\.fr|
peertube\.koehn\.com|
peertube\.anarchmusicall\.net|
tube\.kampftoast\.de|
vid\.y-y\.li|
peertube\.xtenz\.xyz|
diode\.zone|
tube\.egf\.mn|
peertube\.nomagic\.uk|
visionon\.tv|
videos\.koumoul\.com|
video\.rastapuls\.com|
video\.mantlepro\.com|
video\.deadsuperhero\.com|
peertube\.musicstudio\.pro|
peertube\.we-keys\.fr|
artitube\.artifaille\.fr|
peertube\.ethernia\.net|
tube\.midov\.pl|
peertube\.fr|
watch\.snoot\.tube|
peertube\.donnadieu\.fr|
argos\.aquilenet\.fr|
tube\.nemsia\.org|
tube\.bruniau\.net|
videos\.darckoune\.moe|
tube\.traydent\.info|
dev\.videos\.lecygnenoir\.info|
peertube\.nayya\.org|
peertube\.live|
peertube\.mofgao\.space|
video\.lequerrec\.eu|
peertube\.amicale\.net|
aperi\.tube|
tube\.ac-lyon\.fr|
video\.lw1\.at|
www\.yiny\.org|
videos\.pofilo\.fr|
tube\.lou\.lt|
choob\.h\.etbus\.ch|
tube\.hoga\.fr|
peertube\.heberge\.fr|
video\.obermui\.de|
videos\.cloudfrancois\.fr|
betamax\.video|
video\.typica\.us|
tube\.piweb\.be|
video\.blender\.org|
peertube\.cat|
tube\.kdy\.ch|
pe\.ertu\.be|
peertube\.social|
videos\.lescommuns\.org|
tv\.datamol\.org|
videonaute\.fr|
dialup\.express|
peertube\.nogafa\.org| peertube\.nogafa\.org|
peertube\.pl|
megatube\.lilomoino\.fr| megatube\.lilomoino\.fr|
peertube\.tamanoir\.foucry\.net| peertube\.tamanoir\.foucry\.net|
peertube\.inapurna\.org|
peertube\.netzspielplatz\.de|
video\.deadsuperhero\.com|
peertube\.devosi\.org| peertube\.devosi\.org|
peertube\.1312\.media| peertube\.1312\.media|
tube\.worldofhauru\.xyz|
tube\.bootlicker\.party| tube\.bootlicker\.party|
skeptikon\.fr| skeptikon\.fr|
peertube\.geekshell\.fr|
tube\.opportunis\.me|
peertube\.peshane\.net|
video\.blueline\.mg| video\.blueline\.mg|
tube\.homecomputing\.fr| tube\.homecomputing\.fr|
videos\.cloudfrancois\.fr|
peertube\.viviers-fibre\.net|
tube\.ouahpiti\.info| tube\.ouahpiti\.info|
video\.tedomum\.net| video\.tedomum\.net|
video\.g3l\.org| video\.g3l\.org|
fontube\.fr| fontube\.fr|
peertube\.gaialabs\.ch| peertube\.gaialabs\.ch|
peertube\.extremely\.online|
peertube\.public-infrastructure\.eu|
tube\.kher\.nl| tube\.kher\.nl|
peertube\.qtg\.fr| peertube\.qtg\.fr|
tube\.22decembre\.eu|
facegirl\.me|
video\.migennes\.net| video\.migennes\.net|
janny\.moe|
tube\.p2p\.legal| tube\.p2p\.legal|
video\.atlanti\.se|
troll\.tv| troll\.tv|
peertube\.geekael\.fr|
vid\.leotindall\.com|
video\.anormallostpod\.ovh|
p-tube\.h3z\.jp|
tube\.darfweb\.eu|
videos\.iut-orsay\.fr| videos\.iut-orsay\.fr|
peertube\.solidev\.net| peertube\.solidev\.net|
videos\.symphonie-of-code\.fr|
testtube\.ortg\.de|
videos\.cemea\.org| videos\.cemea\.org|
peertube\.gwendalavir\.eu|
video\.passageenseine\.fr| video\.passageenseine\.fr|
videos\.festivalparminous\.org| videos\.festivalparminous\.org|
peertube\.touhoppai\.moe| peertube\.touhoppai\.moe|
peertube\.duckdns\.org|
sikke\.fi| sikke\.fi|
peertube\.mastodon\.host|
firedragonvideos\.com|
vidz\.dou\.bet|
peertube\.koehn\.com|
peer\.hostux\.social| peer\.hostux\.social|
share\.tube| share\.tube|
peertube\.walkingmountains\.fr| peertube\.walkingmountains\.fr|
medias\.libox\.fr|
peertube\.moe|
peertube\.xyz|
jp\.peertube\.network|
videos\.benpro\.fr| videos\.benpro\.fr|
tube\.otter\.sh|
peertube\.angristan\.xyz|
peertube\.parleur\.net| peertube\.parleur\.net|
peer\.ecutsa\.fr|
peertube\.heraut\.eu| peertube\.heraut\.eu|
peertube\.tifox\.fr|
peertube\.maly\.io|
vod\.mochi\.academy|
exode\.me|
coste\.video|
tube\.aquilenet\.fr| tube\.aquilenet\.fr|
peertube\.gegeweb\.eu| peertube\.gegeweb\.eu|
framatube\.org| framatube\.org|
@ -100,18 +404,11 @@ class PeerTubeIE(InfoExtractor):
tube\.conferences-gesticulees\.net| tube\.conferences-gesticulees\.net|
peertube\.datagueule\.tv| peertube\.datagueule\.tv|
video\.lqdn\.fr| video\.lqdn\.fr|
meilleurtube\.delire\.party|
tube\.mochi\.academy| tube\.mochi\.academy|
peertube\.dav\.li|
media\.zat\.im| media\.zat\.im|
pytu\.be|
peertube\.valvin\.fr|
peertube\.nsa\.ovh|
video\.colibris-outilslibres\.org| video\.colibris-outilslibres\.org|
video\.hispagatos\.org|
tube\.svnet\.fr| tube\.svnet\.fr|
peertube\.video| peertube\.video|
videos\.lecygnenoir\.info|
peertube3\.cpy\.re| peertube3\.cpy\.re|
peertube2\.cpy\.re| peertube2\.cpy\.re|
videos\.tcit\.fr| videos\.tcit\.fr|
@ -126,7 +423,7 @@ class PeerTubeIE(InfoExtractor):
(?P<id>%s) (?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE) ''' % (_INSTANCES_RE, _UUID_RE)
_TESTS = [{ _TESTS = [{
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
'md5': '80f24ff364cc9d333529506a263e7feb', 'md5': '80f24ff364cc9d333529506a263e7feb',
'info_dict': { 'info_dict': {
'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',

View File

@ -15,7 +15,7 @@ from ..utils import (
class PikselIE(InfoExtractor): class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)' _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://player.piksel.com/v/ums2867l', 'url': 'http://player.piksel.com/v/ums2867l',
@ -40,6 +40,11 @@ class PikselIE(InfoExtractor):
'timestamp': 1486171129, 'timestamp': 1486171129,
'upload_date': '20170204' 'upload_date': '20170204'
} }
},
{
# https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
'only_matching': True,
} }
] ]
@ -52,8 +57,11 @@ class PikselIE(InfoExtractor):
return mobj.group('url') return mobj.group('url')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
webpage, 'program uuid', default=display_id)
app_token = self._search_regex([ app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"', r'clientAPI\s*:\s*"([^"]+)"',
r'data-de-api-key\s*=\s*"([^"]+)"' r'data-de-api-key\s*=\s*"([^"]+)"'

View File

@ -18,43 +18,10 @@ from ..utils import (
) )
class PlatziIE(InfoExtractor): class PlatziBaseIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
platzi\.com/clases| # es version
courses\.platzi\.com/classes # en version
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
'''
_LOGIN_URL = 'https://platzi.com/login/' _LOGIN_URL = 'https://platzi.com/login/'
_NETRC_MACHINE = 'platzi' _NETRC_MACHINE = 'platzi'
_TESTS = [{
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
'md5': '8f56448241005b561c10f11a595b37e3',
'info_dict': {
'id': '12074',
'ext': 'mp4',
'title': 'Creando nuestra primera página',
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
'duration': 420,
},
'skip': 'Requires platzi account credentials',
}, {
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
'info_dict': {
'id': '13430',
'ext': 'mp4',
'title': 'Background',
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
'duration': 360,
},
'skip': 'Requires platzi account credentials',
'params': {
'skip_download': True,
},
}]
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor):
'Unable to login: %s' % error, expected=True) 'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
class PlatziIE(PlatziBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
platzi\.com/clases| # es version
courses\.platzi\.com/classes # en version
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
'''
_TESTS = [{
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
'md5': '8f56448241005b561c10f11a595b37e3',
'info_dict': {
'id': '12074',
'ext': 'mp4',
'title': 'Creando nuestra primera página',
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
'duration': 420,
},
'skip': 'Requires platzi account credentials',
}, {
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
'info_dict': {
'id': '13430',
'ext': 'mp4',
'title': 'Background',
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
'duration': 360,
},
'skip': 'Requires platzi account credentials',
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
lecture_id = self._match_id(url) lecture_id = self._match_id(url)
@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor):
data = self._parse_json( data = self._parse_json(
self._search_regex( self._search_regex(
r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), # client_data may contain "};" so that we have to try more
# strict regex first
(r'client_data\s*=\s*({.+?})\s*;\s*\n',
r'client_data\s*=\s*({.+?})\s*;'),
webpage, 'client data'),
lecture_id) lecture_id)
material = data['initialState']['material'] material = data['initialState']['material']
@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor):
} }
class PlatziCourseIE(InfoExtractor): class PlatziCourseIE(PlatziBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:

View File

@ -403,6 +403,15 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@staticmethod
def _has_more(webpage):
return re.search(
r'''(?x)
<li[^>]+\bclass=["\']page_next|
<link[^>]+\brel=["\']next|
<button[^>]+\bid=["\']moreDataBtn
''', webpage) is not None
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') host = mobj.group('host')
@ -411,13 +420,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page = int_or_none(self._search_regex( page = int_or_none(self._search_regex(
r'\bpage=(\d+)', url, 'page', default=None)) r'\bpage=(\d+)', url, 'page', default=None))
page_url = self._make_page_url(url)
entries = [] entries = []
for page_num in (page, ) if page is not None else itertools.count(1): for page_num in (page, ) if page is not None else itertools.count(1):
try: try:
webpage = self._download_webpage( webpage = self._download_webpage(
page_url, item_id, 'Downloading page %d' % page_num, url, item_id, 'Downloading page %d' % page_num,
query={'page': page_num}) query={'page': page_num})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
@ -547,18 +554,6 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
else super(PornHubPagedVideoListIE, cls).suitable(url)) else super(PornHubPagedVideoListIE, cls).suitable(url))
def _make_page_url(self, url):
return url
@staticmethod
def _has_more(webpage):
return re.search(
r'''(?x)
<li[^>]+\bclass=["\']page_next|
<link[^>]+\brel=["\']next|
<button[^>]+\bid=["\']moreDataBtn
''', webpage) is not None
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
@ -572,11 +567,3 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True, 'only_matching': True,
}] }]
def _make_page_url(self, url):
mobj = re.match(self._VALID_URL, url)
return '%s/ajax' % mobj.group('url')
@staticmethod
def _has_more(webpage):
return True

View File

@ -1,70 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
urlencode_postdata,
)
class PromptFileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
_TEST = {
'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
'info_dict': {
'id': '86D1CE8462-576CAAE416',
'ext': 'mp4',
'title': 'oceans.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
chash = self._search_regex(
r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash')
fields = self._hidden_inputs(webpage)
keys = list(fields.keys())
chash_key = keys[0] if len(keys) == 1 else next(
key for key in keys if key.startswith('cha'))
fields[chash_key] = chash + fields[chash_key]
webpage = self._download_webpage(
url, video_id, 'Downloading video page',
data=urlencode_postdata(fields),
headers={'Content-type': 'application/x-www-form-urlencoded'})
video_url = self._search_regex(
(r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File',
r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'),
webpage, 'video url', group='url')
title = self._html_search_regex(
r'<span.+title="([^"]+)">', webpage, 'title')
thumbnail = self._html_search_regex(
r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
formats = [{
'format_id': 'sd',
'url': video_url,
'ext': determine_ext(title),
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
merge_dicts,
str_to_int, str_to_int,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
@ -45,7 +46,10 @@ class RedTubeIE(InfoExtractor):
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
raise ExtractorError('Video %s has been removed' % video_id, expected=True) raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex( info = self._search_json_ld(webpage, video_id, default={})
if not info.get('title'):
info['title'] = self._html_search_regex(
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
webpage, 'title', group='title', webpage, 'title', group='title',
@ -88,28 +92,28 @@ class RedTubeIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex( upload_date = unified_strdate(self._search_regex(
r'<span[^>]+>ADDED ([^<]+)<', r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
webpage, 'upload date', fatal=False)) webpage, 'upload date', default=None))
duration = int_or_none(self._og_search_property( duration = int_or_none(self._og_search_property(
'video:duration', webpage, default=None) or self._search_regex( 'video:duration', webpage, default=None) or self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
(r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'), r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
webpage, 'view count', fatal=False)) r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
webpage, 'view count', default=None))
# No self-labeling, but they describe themselves as # No self-labeling, but they describe themselves as
# "Home of Videos Porno" # "Home of Videos Porno"
age_limit = 18 age_limit = 18
return { return merge_dicts(info, {
'id': video_id, 'id': video_id,
'ext': 'mp4', 'ext': 'mp4',
'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
} })

View File

@ -48,6 +48,16 @@ class TeachableBaseIE(InfoExtractor):
'https://%s/sign_in' % site, None, 'https://%s/sign_in' % site, None,
'Downloading %s login page' % site) 'Downloading %s login page' % site)
def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'class=["\']user-signout',
r'<a[^>]+\bhref=["\']/sign_out',
r'Log\s+[Oo]ut\s*<'))
if is_logged(login_page):
self._logged_in = True
return
login_url = compat_str(urlh.geturl()) login_url = compat_str(urlh.geturl())
login_form = self._hidden_inputs(login_page) login_form = self._hidden_inputs(login_page)
@ -78,10 +88,7 @@ class TeachableBaseIE(InfoExtractor):
'Go to https://%s/ and accept.' % (site, site), expected=True) 'Go to https://%s/ and accept.' % (site, site), expected=True)
# Successful login # Successful login
if any(re.search(p, response) for p in ( if is_logged(response):
r'class=["\']user-signout',
r'<a[^>]+\bhref=["\']/sign_out',
r'>\s*Log out\s*<')):
self._logged_in = True self._logged_in = True
return return

View File

@ -182,20 +182,29 @@ class TEDIE(InfoExtractor):
title = talk_info['title'].strip() title = talk_info['title'].strip()
native_downloads = try_get( downloads = talk_info.get('downloads') or {}
talk_info, native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
(lambda x: x['downloads']['nativeDownloads'],
lambda x: x['nativeDownloads']),
dict) or {}
formats = [{ formats = [{
'url': format_url, 'url': format_url,
'format_id': format_id, 'format_id': format_id,
'format': format_id,
} for (format_id, format_url) in native_downloads.items() if format_url is not None] } for (format_id, format_url) in native_downloads.items() if format_url is not None]
subtitled_downloads = downloads.get('subtitledDownloads') or {}
for lang, subtitled_download in subtitled_downloads.items():
for q in self._NATIVE_FORMATS:
q_url = subtitled_download.get(q)
if not q_url:
continue
formats.append({
'url': q_url,
'format_id': '%s-%s' % (q, lang),
'language': lang,
})
if formats: if formats:
for f in formats: for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id']) finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
if finfo: if finfo:
f.update(finfo) f.update(finfo)
@ -215,6 +224,18 @@ class TEDIE(InfoExtractor):
http_url = None http_url = None
for format_id, resources in resources_.items(): for format_id, resources in resources_.items():
if format_id == 'hls':
if not isinstance(resources, dict):
continue
stream_url = url_or_none(resources.get('stream'))
if not stream_url:
continue
formats.extend(self._extract_m3u8_formats(
stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
else:
if not isinstance(resources, list):
continue
if format_id == 'h264': if format_id == 'h264':
for resource in resources: for resource in resources:
h264_url = resource.get('file') h264_url = resource.get('file')
@ -242,15 +263,6 @@ class TEDIE(InfoExtractor):
'height': int_or_none(resource.get('height')), 'height': int_or_none(resource.get('height')),
'tbr': int_or_none(resource.get('bitrate')), 'tbr': int_or_none(resource.get('bitrate')),
}) })
elif format_id == 'hls':
if not isinstance(resources, dict):
continue
stream_url = url_or_none(resources.get('stream'))
if not stream_url:
continue
formats.extend(self._extract_m3u8_formats(
stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
m3u8_formats = list(filter( m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',

View File

@ -7,6 +7,7 @@ from ..utils import (
int_or_none, int_or_none,
smuggle_url, smuggle_url,
try_get, try_get,
unified_timestamp,
) )
@ -22,7 +23,13 @@ class TeleQuebecBaseIE(InfoExtractor):
class TeleQuebecIE(TeleQuebecBaseIE): class TeleQuebecIE(TeleQuebecBaseIE):
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:
zonevideo\.telequebec\.tv/media|
coucou\.telequebec\.tv/videos
)/(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
# available till 01.01.2023 # available till 01.01.2023
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
@ -41,6 +48,9 @@ class TeleQuebecIE(TeleQuebecBaseIE):
# no description # no description
'url': 'http://zonevideo.telequebec.tv/media/30261', 'url': 'http://zonevideo.telequebec.tv/media/30261',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -61,6 +71,52 @@ class TeleQuebecIE(TeleQuebecBaseIE):
return info return info
class TeleQuebecSquatIE(InfoExtractor):
_VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
_TESTS = [{
'url': 'https://squat.telequebec.tv/videos/9314',
'info_dict': {
'id': 'd59ae78112d542e793d83cc9d3a5b530',
'ext': 'mp4',
'title': 'Poupeflekta',
'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
'duration': 1351,
'timestamp': 1569057600,
'upload_date': '20190921',
'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
'season': 'Saison 3',
'season_number': 3,
'episode_number': 57,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
video_id)
media_id = video['sourceId']
return {
'_type': 'url_transparent',
'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
'ie_key': TeleQuebecIE.ie_key(),
'id': media_id,
'title': video.get('titre'),
'description': video.get('description'),
'timestamp': unified_timestamp(video.get('datePublication')),
'series': video.get('container'),
'season': video.get('saison'),
'season_number': int_or_none(video.get('noSaison')),
'episode_number': int_or_none(video.get('episode')),
}
class TeleQuebecEmissionIE(TeleQuebecBaseIE): class TeleQuebecEmissionIE(TeleQuebecBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://

View File

@ -72,8 +72,13 @@ class TV4IE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
info = self._download_json( info = self._download_json(
'http://www.tv4play.se/player/assets/%s.json' % video_id, 'https://playback-api.b17g.net/asset/%s' % video_id,
video_id, 'Downloading video info JSON') video_id, 'Downloading video info JSON', query={
'service': 'tv4',
'device': 'browser',
'protocol': 'hls,dash',
'drm': 'widevine',
})['metadata']
title = info['title'] title = info['title']
@ -111,5 +116,9 @@ class TV4IE(InfoExtractor):
'timestamp': parse_iso8601(info.get('broadcast_date_time')), 'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')), 'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'), 'thumbnail': info.get('image'),
'is_live': info.get('is_live') is True, 'is_live': info.get('isLive') is True,
'series': info.get('seriesTitle'),
'season_number': int_or_none(info.get('seasonNumber')),
'episode': info.get('episodeTitle'),
'episode_number': int_or_none(info.get('episodeNumber')),
} }

View File

@ -1,157 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
sanitized_Request,
)
class VesselIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
_NETRC_MACHINE = 'vessel'
_TESTS = [{
'url': 'https://www.vessel.com/videos/HDN7G5UMs',
'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
'info_dict': {
'id': 'HDN7G5UMs',
'ext': 'mp4',
'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150317',
'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
'timestamp': int,
},
}, {
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
'only_matching': True,
}, {
'url': 'https://www.vessel.com/videos/F01_dsLj1',
'only_matching': True,
}, {
'url': 'https://www.vessel.com/videos/RRX-sir-J',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
webpage)]
@staticmethod
def make_json_request(url, data):
payload = json.dumps(data).encode('utf-8')
req = sanitized_Request(url, payload)
req.add_header('Content-Type', 'application/json; charset=utf-8')
return req
@staticmethod
def find_assets(data, asset_type, asset_id=None):
for asset in data.get('assets', []):
if not asset.get('type') == asset_type:
continue
elif asset_id is not None and not asset.get('id') == asset_id:
continue
else:
yield asset
def _check_access_rights(self, data):
access_info = data.get('__view', {})
if not access_info.get('allow_access', True):
err_code = access_info.get('error_code') or ''
if err_code == 'ITEM_PAID_ONLY':
raise ExtractorError(
'This video requires subscription.', expected=True)
else:
raise ExtractorError(
'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
def _login(self):
username, password = self._get_login_info()
if username is None:
return
self.report_login()
data = {
'client_id': 'web',
'type': 'password',
'user_key': username,
'password': password,
}
login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _real_initialize(self):
self._login()
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex(
r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
asset_id = data['model']['data']['id']
req = VesselIE.make_json_request(
self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
data = self._download_json(req, video_id)
video_asset_id = data.get('main_video_asset')
self._check_access_rights(data)
try:
video_asset = next(
VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
except StopIteration:
raise ExtractorError('No video assets found')
formats = []
for f in video_asset.get('sources', []):
location = f.get('location')
if not location:
continue
name = f.get('name')
if name == 'hls-index':
formats.extend(self._extract_m3u8_formats(
location, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False))
elif name == 'dash-index':
formats.extend(self._extract_mpd_formats(
location, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'format_id': name,
'tbr': f.get('bitrate'),
'height': f.get('height'),
'width': f.get('width'),
'url': location,
})
self._sort_formats(formats)
thumbnails = []
for im_asset in VesselIE.find_assets(data, 'image'):
thumbnails.append({
'url': im_asset['location'],
'width': im_asset.get('width', 0),
'height': im_asset.get('height', 0),
})
return {
'id': video_id,
'title': data['title'],
'formats': formats,
'thumbnails': thumbnails,
'description': data.get('short_description'),
'duration': data.get('duration'),
'comment_count': data.get('comment_count'),
'like_count': data.get('like_count'),
'view_count': data.get('view_count'),
'timestamp': parse_iso8601(data.get('released_at')),
}

View File

@ -13,11 +13,12 @@ from ..utils import (
js_to_json, js_to_json,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
try_get,
) )
class ViewLiftBaseIE(InfoExtractor): class ViewLiftBaseIE(InfoExtractor):
_DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv' _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE): class ViewLiftEmbedIE(ViewLiftBaseIE):
@ -113,7 +114,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
class ViewLiftIE(ViewLiftBaseIE): class ViewLiftIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942', 'md5': '19844f897b35af219773fd63bdec2942',
@ -128,7 +129,7 @@ class ViewLiftIE(ViewLiftBaseIE):
'categories': 'mincount:3', 'categories': 'mincount:3',
'age_limit': 14, 'age_limit': 14,
'upload_date': '20150421', 'upload_date': '20150421',
'timestamp': 1429656819, 'timestamp': 1429656820,
} }
}, { }, {
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india', 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
@ -141,10 +142,26 @@ class ViewLiftIE(ViewLiftBaseIE):
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f', 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 979, 'duration': 979,
'categories': 'mincount:2',
'timestamp': 1399478279, 'timestamp': 1399478279,
'upload_date': '20140507', 'upload_date': '20140507',
} }
}, {
'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
'info_dict': {
'id': '00000148-7b53-de26-a9fb-fbf306f70020',
'display_id': 'augie_alone/s_2_ep_12_love',
'ext': 'mp4',
'title': 'Augie, Alone:S. 2 Ep. 12 - Love',
'description': 'md5:db2a5c72d994f16a780c1eb353a8f403',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 107,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://main.snagfilms.com/films/title/the_freebie',
'only_matching': True,
}, { }, {
# Film is not playable in your area. # Film is not playable in your area.
'url': 'http://www.snagfilms.com/films/title/inside_mecca', 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
@ -162,6 +179,10 @@ class ViewLiftIE(ViewLiftBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
domain, display_id = re.match(self._VALID_URL, url).groups() domain, display_id = re.match(self._VALID_URL, url).groups()
@ -181,7 +202,21 @@ class ViewLiftIE(ViewLiftBaseIE):
gist = content_data['gist'] gist = content_data['gist']
film_id = gist['id'] film_id = gist['id']
title = gist['title'] title = gist['title']
video_assets = content_data['streamingInfo']['videoAssets'] video_assets = try_get(
content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
if not video_assets:
token = self._download_json(
'https://prod-api.viewlift.com/identity/anonymous-token',
film_id, 'Downloading authorization token',
query={'site': 'snagfilms'})['authorizationToken']
video_assets = self._download_json(
'https://prod-api.viewlift.com/entitlement/video/status',
film_id, headers={
'Authorization': token,
'Referer': url,
}, query={
'id': film_id
})['video']['streamingInfo']['videoAssets']
formats = [] formats = []
mpeg_video_assets = video_assets.get('mpeg') or [] mpeg_video_assets = video_assets.get('mpeg') or []
@ -241,8 +276,9 @@ class ViewLiftIE(ViewLiftBaseIE):
if category.get('title')] if category.get('title')]
break break
else: else:
title = self._search_regex( title = self._html_search_regex(
r'itemprop="title">([^<]+)<', webpage, 'title') (r'itemprop="title">([^<]+)<',
r'(?s)itemprop="title">(.+?)<div'), webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>', r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
webpage, 'description', default=None) or self._og_search_description(webpage) webpage, 'description', default=None) or self._og_search_description(webpage)

View File

@ -403,8 +403,17 @@ class VKIE(VKBaseIE):
data = self._parse_json( data = self._parse_json(
self._search_regex( self._search_regex(
r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page, r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
'player params'), 'player params', default='{}'),
video_id)['params'][0] video_id)
if data:
data = data['params'][0]
# <!--{...}
if not data:
data = self._parse_json(
self._search_regex(
r'<!--\s*({.+})', info_page, 'payload'),
video_id)['payload'][-1][-1]['player']['params'][0]
title = unescapeHTML(data['md_title']) title = unescapeHTML(data['md_title'])

View File

@ -1,54 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
class WimpIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.wimp.com/maru-is-exhausted/',
'md5': 'ee21217ffd66d058e8b16be340b74883',
'info_dict': {
'id': 'maru-is-exhausted',
'ext': 'mp4',
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}, {
'url': 'http://www.wimp.com/clowncar/',
'md5': '5c31ad862a90dc5b1f023956faec13fe',
'info_dict': {
'id': 'cG4CEr2aiSg',
'ext': 'webm',
'title': 'Basset hound clown car...incredible!',
'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
'upload_date': '20140303',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
youtube_id = self._search_regex(
(r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
r'data-id=["\']([0-9A-Za-z_-]{11})'),
webpage, 'video URL', default=None)
if youtube_id:
return self.url_result(youtube_id, YoutubeIE.ie_key())
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
info_dict.update({
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
})
return info_dict

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -8,6 +9,7 @@ from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
dict_get, dict_get,
extract_attributes,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
@ -18,21 +20,21 @@ from ..utils import (
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:.+?\.)?xhamster\.(?:com|one)/ (?:.+?\.)?%s/
(?: (?:
movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html| movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+) videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
) )
''' ''' % _DOMAINS
_TESTS = [{ _TESTS = [{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '8281348b8d3c53d39fffb377d24eac4e', 'md5': '98b4687efb1ffd331c4197854dc09e8f',
'info_dict': { 'info_dict': {
'id': '1509445', 'id': '1509445',
'display_id': 'femaleagent_shy_beauty_takes_the_bait', 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
'ext': 'mp4', 'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait', 'title': 'FemaleAgent Shy beauty takes the bait',
'timestamp': 1350194821, 'timestamp': 1350194821,
@ -40,13 +42,12 @@ class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011', 'uploader': 'Ruseful2011',
'duration': 893, 'duration': 893,
'age_limit': 18, 'age_limit': 18,
'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
}, },
}, { }, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
'info_dict': { 'info_dict': {
'id': '2221348', 'id': '2221348',
'display_id': 'britney_spears_sexy_booty', 'display_id': 'britney-spears-sexy-booty',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Britney Spears Sexy Booty', 'title': 'Britney Spears Sexy Booty',
'timestamp': 1379123460, 'timestamp': 1379123460,
@ -54,13 +55,12 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400', 'uploader': 'jojo747400',
'duration': 200, 'duration': 200,
'age_limit': 18, 'age_limit': 18,
'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# empty seo # empty seo, unavailable via new URL schema
'url': 'http://xhamster.com/movies/5667973/.html', 'url': 'http://xhamster.com/movies/5667973/.html',
'info_dict': { 'info_dict': {
'id': '5667973', 'id': '5667973',
@ -71,7 +71,6 @@ class XHamsterIE(InfoExtractor):
'uploader': 'parejafree', 'uploader': 'parejafree',
'duration': 72, 'duration': 72,
'age_limit': 18, 'age_limit': 18,
'categories': ['Amateur', 'Blowjobs'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -94,6 +93,18 @@ class XHamsterIE(InfoExtractor):
}, { }, {
'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'only_matching': True,
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -285,7 +296,7 @@ class XHamsterIE(InfoExtractor):
class XHamsterEmbedIE(InfoExtractor): class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)' _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
_TEST = { _TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539', 'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': { 'info_dict': {
@ -322,3 +333,49 @@ class XHamsterEmbedIE(InfoExtractor):
video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl')) video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
return self.url_result(video_url, 'XHamster') return self.url_result(video_url, 'XHamster')
class XHamsterUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
_TESTS = [{
# Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos',
'info_dict': {
'id': 'netvideogirls',
},
'playlist_mincount': 267,
}, {
# Non-paginated user profile
'url': 'https://xhamster.com/users/firatkaan/videos',
'info_dict': {
'id': 'firatkaan',
},
'playlist_mincount': 1,
}]
def _entries(self, user_id):
next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
for pagenum in itertools.count(1):
page = self._download_webpage(
next_page_url, user_id, 'Downloading page %s' % pagenum)
for video_tag in re.findall(
r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
page):
video = extract_attributes(video_tag)
video_url = url_or_none(video.get('href'))
if not video_url or not XHamsterIE.suitable(video_url):
continue
video_id = XHamsterIE._match_id(video_url)
yield self.url_result(
video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
if not mobj:
break
next_page = extract_attributes(mobj.group(0))
next_page_url = url_or_none(next_page.get('href'))
if not next_page_url:
break
def _real_extract(self, url):
user_id = self._match_id(url)
return self.playlist_result(self._entries(user_id), user_id)

View File

@ -17,7 +17,8 @@ class XVideosIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?xvideos\.com/video| (?:[^/]+\.)?xvideos2?\.com/video|
(?:www\.)?xvideos\.es/video|
flashservice\.xvideos\.com/embedframe/| flashservice\.xvideos\.com/embedframe/|
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
) )
@ -39,6 +40,42 @@ class XVideosIE(InfoExtractor):
}, { }, {
'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'https://xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'https://xvideos.es/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'http://xvideos.es/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}, {
'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -41,7 +41,6 @@ from ..utils import (
orderedSet, orderedSet,
parse_codecs, parse_codecs,
parse_duration, parse_duration,
qualities,
remove_quotes, remove_quotes,
remove_start, remove_start,
smuggle_url, smuggle_url,
@ -384,13 +383,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:(?:www|no)\.)?invidiou\.sh/| (?:(?:www|no)\.)?invidiou\.sh/|
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
(?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.kabi\.tk/|
(?:www\.)?invidious\.enkirton\.net/|
(?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.mastodon\.host/| (?:www\.)?invidious\.mastodon\.host/|
(?:www\.)?invidious\.nixnet\.xyz/| (?:www\.)?invidious\.nixnet\.xyz/|
(?:www\.)?invidious\.drycat\.fr/|
(?:www\.)?tube\.poal\.co/| (?:www\.)?tube\.poal\.co/|
(?:www\.)?vid\.wxzm\.sx/| (?:www\.)?vid\.wxzm\.sx/|
(?:www\.)?yt\.elukerio\.org/| (?:www\.)?yt\.elukerio\.org/|
(?:www\.)?yt\.lelux\.fi/|
(?:www\.)?kgg2m7yk5aybusll\.onion/|
(?:www\.)?qklhadlycap4cnod\.onion/|
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls (?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID: (?: # the various things that can precede the ID:
@ -1909,6 +1916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return int_or_none(self._search_regex( return int_or_none(self._search_regex(
r'\bclen[=/](\d+)', media_url, 'filesize', default=None)) r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download() self.report_rtmp_download()
formats = [{ formats = [{
@ -1917,10 +1927,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': video_info['conn'][0], 'url': video_info['conn'][0],
'player_url': player_url, 'player_url': player_url,
}] }]
elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map: if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True) raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
formats = []
formats_spec = {} formats_spec = {}
fmt_list = video_info.get('fmt_list', [''])[0] fmt_list = video_info.get('fmt_list', [''])[0]
if fmt_list: if fmt_list:
@ -1934,9 +1945,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'width': int_or_none(width_height[0]), 'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]), 'height': int_or_none(width_height[1]),
} }
q = qualities(['small', 'medium', 'hd720'])
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
if streaming_formats:
for fmt in streaming_formats: for fmt in streaming_formats:
itag = str_or_none(fmt.get('itag')) itag = str_or_none(fmt.get('itag'))
if not itag: if not itag:
@ -1949,23 +1957,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format_note': quality_label, 'format_note': quality_label,
'fps': int_or_none(fmt.get('fps')), 'fps': int_or_none(fmt.get('fps')),
'height': int_or_none(fmt.get('height')), 'height': int_or_none(fmt.get('height')),
'quality': q(quality),
# bitrate for itag 43 is always 2147483647 # bitrate for itag 43 is always 2147483647
'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None, 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
'width': int_or_none(fmt.get('width')), 'width': int_or_none(fmt.get('width')),
} }
formats = []
for url_data_str in encoded_url_map.split(','): for fmt in streaming_formats:
url_data = compat_parse_qs(url_data_str) if fmt.get('drm_families'):
if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
continue continue
url = url_or_none(fmt.get('url'))
if not url:
cipher = fmt.get('cipher')
if not cipher:
continue
url_data = compat_parse_qs(cipher)
url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
if not url:
continue
else:
cipher = None
url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
# Unsupported FORMAT_STREAM_TYPE_OTF # Unsupported FORMAT_STREAM_TYPE_OTF
if stream_type == 3: if stream_type == 3:
continue continue
format_id = url_data['itag'][0]
url = url_data['url'][0]
format_id = fmt.get('itag') or url_data['itag'][0]
if not format_id:
continue
format_id = compat_str(format_id)
if cipher:
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json = self._search_regex( jsplayer_url_json = self._search_regex(
@ -2038,24 +2062,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0]) mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
if width is None:
width = int_or_none(fmt.get('width'))
if height is None:
height = int_or_none(fmt.get('height'))
filesize = int_or_none(url_data.get( filesize = int_or_none(url_data.get(
'clen', [None])[0]) or _extract_filesize(url) 'clen', [None])[0]) or _extract_filesize(url)
quality = url_data.get('quality', [None])[0] quality = url_data.get('quality', [None])[0] or fmt.get('quality')
quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
more_fields = { more_fields = {
'filesize': filesize, 'filesize': filesize,
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000), 'tbr': tbr,
'width': width, 'width': width,
'height': height, 'height': height,
'fps': int_or_none(url_data.get('fps', [None])[0]), 'fps': fps,
'format_note': url_data.get('quality_label', [None])[0] or quality, 'format_note': quality_label or quality,
'quality': q(quality),
} }
for key, value in more_fields.items(): for key, value in more_fields.items():
if value: if value:
dct[key] = value dct[key] = value
type_ = url_data.get('type', [None])[0] type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
if type_: if type_:
type_split = type_.split(';') type_split = type_.split(';')
kind_ext = type_split[0].split('/') kind_ext = type_split[0].split('/')
@ -2709,7 +2742,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
page, 'title', default=None) page, 'title', default=None)
_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref=' _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
uploader = self._search_regex( uploader = self._html_search_regex(
r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE, r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
page, 'uploader', default=None) page, 'uploader', default=None)
mobj = re.search( mobj = re.search(

View File

@ -41,6 +41,7 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE): class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html' _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
_GEO_COUNTRIES = ['DE']
_TESTS = [{ _TESTS = [{
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.08.13' __version__ = '2019.09.28'