diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 4c75c8d5d..2fea0120e 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.08.13** +- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.08.13 + [debug] youtube-dl version 2019.09.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8e8c43c47..6116acc79 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.08.13** +- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index df719a29c..79d1a7f3c 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.08.13** +- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 3616db1a7..9bda3d440 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.08.13** +- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.08.13 + [debug] youtube-dl version 2019.09.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 0fa37aef1..581344917 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.08.13** +- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 9b9e2e149..80681a9ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,68 @@ +version 2019.09.28 + +Core +* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493) + +Extractors +* [vk] Fix extraction (#22522) +* [heise] Fix kaltura embeds extraction (#22514) +* [ted] Check for resources validity and extract subtitled downloads (#22513) ++ [youtube] Add support for + owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292) ++ [nhk] Add support for clips +* [nhk] Fix video extraction (#22249, #22353) +* [byutv] Fix extraction (#22070) ++ [openload] Add support for oload.online (#22304) ++ [youtube] Add support for invidious.drycat.fr (#22451) +* [jwplatfom] Do not match video URLs (#20596, #22148) +* [youtube:playlist] Unescape playlist uploader (#22483) ++ [bilibili] Add support audio albums and songs (#21094) ++ [instagram] Add support for tv URLs ++ [mixcloud] Allow uppercase letters in format URLs (#19280) +* [brightcove] Delegate all supported legacy URLs to new extractor (#11523, + #12842, #13912, #15669, #16303) +* [hotstar] Use native HLS downloader by default ++ [hotstar] Extract more formats (#22323) +* [9now] Fix extraction (#22361) +* [zdf] Bypass geo restriction ++ [tv4] Extract series metadata +* [tv4] Fix extraction (#22443) + + +version 2019.09.12.1 + +Extractors +* [youtube] Remove quality and tbr for itag 43 (#22372) + + +version 2019.09.12 + +Extractors +* [youtube] Quick extraction tempfix (#22367, #22163) + + +version 2019.09.01 + +Core ++ [extractor/generic] Add support for squarespace embeds (#21294, #21802, + #21859) ++ [downloader/external] Respect mtime option for aria2c (#22242) + +Extractors ++ [xhamster:user] Add support for user pages (#16330, #18454) ++ [xhamster] Add support for more domains ++ [verystream] Add support for woof.tube (#22217) ++ [dailymotion] Add support for lequipe.fr (#21328, #22152) ++ [openload] Add support for oload.vip (#22205) ++ [bbccouk] Extend URL regular expression (#19200) ++ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223) +* [safari] Fix authentication (#22161, #22184) +* [usanetwork] Fix extraction (#22105) ++ [einthusan] Add support for einthusan.ca (#22171) +* [youtube] Improve unavailable message extraction (#22117) ++ [piksel] Extract subtitles (#20506) + + version 2019.08.13 Core diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7cf60eefe..35275278b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,8 @@ - **Bigflix** - **Bild**: Bild.de - **BiliBili** + - **BilibiliAudio** + - **BilibiliAudioAlbum** - **BioBioChileTV** - **BIQLE** - **BitChute** @@ -1100,6 +1102,7 @@ - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XHamster** - **XHamsterEmbed** + - **XHamsterUser** - **xiami:album**: 虾米音乐 - 专辑 - **xiami:artist**: 虾米音乐 - 歌手 - **xiami:collection**: 虾米音乐 - 精选集 diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6a44bc7ba..c3d1407f9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -852,8 +852,9 @@ class YoutubeDL(object): extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(ie_result)) + self.__forced_printings( + ie_result, self.prepare_filename(ie_result), + incomplete=True) return ie_result if result_type == 'video': @@ -1693,6 +1694,36 @@ class YoutubeDL(object): subs[lang] = f return subs + def __forced_printings(self, info_dict, filename, incomplete): + def print_mandatory(field): + if (self.params.get('force%s' % field, False) + and (not incomplete or info_dict.get(field) is not None)): + self.to_stdout(info_dict[field]) + + def print_optional(field): + if (self.params.get('force%s' % field, False) + and info_dict.get(field) is not None): + self.to_stdout(info_dict[field]) + + print_mandatory('title') + print_mandatory('id') + if self.params.get('forceurl', False) and not incomplete: + if info_dict.get('requested_formats') is not None: + for f in info_dict['requested_formats']: + self.to_stdout(f['url'] + f.get('play_path', '')) + else: + # For RTMP URLs, also include the playpath + self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + print_optional('thumbnail') + print_optional('description') + if self.params.get('forcefilename', False) and filename is not None: + self.to_stdout(filename) + if self.params.get('forceduration', False) and info_dict.get('duration') is not None: + self.to_stdout(formatSeconds(info_dict['duration'])) + print_mandatory('format') + if self.params.get('forcejson', False): + self.to_stdout(json.dumps(info_dict)) + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -1703,9 +1734,8 @@ class YoutubeDL(object): if self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() + # TODO: backward compatibility, to be removed info_dict['fulltitle'] = info_dict['title'] - if len(info_dict['title']) > 200: - info_dict['title'] = info_dict['title'][:197] + '...' if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] @@ -1720,29 +1750,7 @@ class YoutubeDL(object): info_dict['_filename'] = filename = self.prepare_filename(info_dict) # Forced printings - if self.params.get('forcetitle', False): - self.to_stdout(info_dict['fulltitle']) - if self.params.get('forceid', False): - self.to_stdout(info_dict['id']) - if self.params.get('forceurl', False): - if info_dict.get('requested_formats') is not None: - for f in info_dict['requested_formats']: - self.to_stdout(f['url'] + f.get('play_path', '')) - else: - # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) - if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: - self.to_stdout(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and info_dict.get('description') is not None: - self.to_stdout(info_dict['description']) - if self.params.get('forcefilename', False) and filename is not None: - self.to_stdout(filename) - if self.params.get('forceduration', False) and info_dict.get('duration') is not None: - self.to_stdout(formatSeconds(info_dict['duration'])) - if self.params.get('forceformat', False): - self.to_stdout(info_dict['format']) - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(info_dict)) + self.__forced_printings(info_dict, filename, incomplete=False) # Do nothing else if in simulate mode if self.params.get('simulate', False): diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index acdb27712..c31f8910a 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD): cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') + cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += ['--', info_dict['url']] return cmd diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 3746671d3..80bd696e2 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -15,6 +15,7 @@ from ..utils import ( float_or_none, parse_iso8601, smuggle_url, + str_or_none, strip_jsonp, unified_timestamp, unsmuggle_url, @@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor): return self.playlist_result( entries, bangumi_id, season_info.get('bangumi_title'), season_info.get('evaluate')) + + +class BilibiliAudioBaseIE(InfoExtractor): + def _call_api(self, path, sid, query=None): + if not query: + query = {'sid': sid} + return self._download_json( + 'https://www.bilibili.com/audio/music-service-c/web/' + path, + sid, query=query)['data'] + + +class BilibiliAudioIE(BilibiliAudioBaseIE): + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P\d+)' + _TEST = { + 'url': 'https://www.bilibili.com/audio/au1003142', + 'md5': 'fec4987014ec94ef9e666d4d158ad03b', + 'info_dict': { + 'id': '1003142', + 'ext': 'm4a', + 'title': '【tsukimi】YELLOW / 神山羊', + 'artist': 'tsukimi', + 'comment_count': int, + 'description': 'YELLOW的mp3版!', + 'duration': 183, + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + 'thumbnail': r're:^https?://.+\.jpg', + 'timestamp': 1564836614, + 'upload_date': '20190803', + 'uploader': 'tsukimi-つきみぐー', + 'view_count': int, + }, + } + + def _real_extract(self, url): + au_id = self._match_id(url) + + play_data = self._call_api('url', au_id) + formats = [{ + 'url': play_data['cdns'][0], + 'filesize': int_or_none(play_data.get('size')), + }] + + song = self._call_api('song/info', au_id) + title = song['title'] + statistic = song.get('statistic') or {} + + subtitles = None + lyric = song.get('lyric') + if lyric: + subtitles = { + 'origin': [{ + 'url': lyric, + }] + } + + return { + 'id': au_id, + 'title': title, + 'formats': formats, + 'artist': song.get('author'), + 'comment_count': int_or_none(statistic.get('comment')), + 'description': song.get('intro'), + 'duration': int_or_none(song.get('duration')), + 'subtitles': subtitles, + 'thumbnail': song.get('cover'), + 'timestamp': int_or_none(song.get('passtime')), + 'uploader': song.get('uname'), + 'view_count': int_or_none(statistic.get('play')), + } + + +class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P\d+)' + _TEST = { + 'url': 'https://www.bilibili.com/audio/am10624', + 'info_dict': { + 'id': '10624', + 'title': '每日新曲推荐(每日11:00更新)', + 'description': '每天11:00更新,为你推送最新音乐', + }, + 'playlist_count': 19, + } + + def _real_extract(self, url): + am_id = self._match_id(url) + + songs = self._call_api( + 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data'] + + entries = [] + for song in songs: + sid = str_or_none(song.get('id')) + if not sid: + continue + entries.append(self.url_result( + 'https://www.bilibili.com/audio/au' + sid, + BilibiliAudioIE.ie_key(), sid)) + + if entries: + album_data = self._call_api('menu/info', am_id) or {} + album_title = album_data.get('title') + if album_title: + for entry in entries: + entry['album'] = album_title + return self.playlist_result( + entries, am_id, album_title, album_data.get('intro')) + + return self.playlist_result(entries, am_id) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 58ec5c979..8e2f7217a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import base64 -import json import re import struct @@ -11,14 +10,12 @@ from .adobepass import AdobePassIE from ..compat import ( compat_etree_fromstring, compat_parse_qs, - compat_str, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, compat_HTTPError, ) from ..utils import ( - determine_ext, ExtractorError, extract_attributes, find_xpath_attr, @@ -27,18 +24,19 @@ from ..utils import ( js_to_json, int_or_none, parse_iso8601, + smuggle_url, unescapeHTML, unsmuggle_url, update_url_query, clean_html, mimetype2ext, + UnsupportedError, ) class BrightcoveLegacyIE(InfoExtractor): IE_NAME = 'brightcove:legacy' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P.*)' - _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated' _TESTS = [ { @@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor): 'timestamp': 1368213670, 'upload_date': '20130510', 'uploader_id': '1589608506001', - } + }, + 'skip': 'The player has been deactivated by the content owner', }, { # From http://medianetwork.oracle.com/video/player/1785452137001 @@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'upload_date': '20120814', 'uploader_id': '1460825906', }, + 'skip': 'video not playable', }, { # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ @@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'ext': 'mp4', 'title': 'This Bracelet Acts as a Personal Thermostat', 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', - 'uploader': 'Mashable', + # 'uploader': 'Mashable', 'timestamp': 1382041798, 'upload_date': '20131017', 'uploader_id': '1130468786001', @@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'id': '3550319591001', }, 'playlist_mincount': 7, + 'skip': 'Unsupported URL', }, { # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) @@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'title': 'Lesson 08', }, 'playlist_mincount': 10, + 'skip': 'Unsupported URL', }, { # playerID inferred from bcpid @@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor): 'only_matching': True, # Tested in GenericIE } ] - FLV_VCODECS = { - 1: 'SORENSON', - 2: 'ON2', - 3: 'H264', - 4: 'VP8', - } @classmethod def _build_brighcove_url(cls, object_str): @@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor): @classmethod def _make_brightcove_url(cls, params): - return update_url_query(cls._FEDERATED_URL, params) + return update_url_query( + 'http://c.brightcove.com/services/viewer/htmlFederated', params) @classmethod def _extract_brightcove_url(cls, webpage): @@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor): videoPlayer = query.get('@videoPlayer') if videoPlayer: # We set the original url as the default 'Referer' header - referer = smuggled_data.get('Referer', url) + referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) + video_id = videoPlayer[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: query['playerID'] = [mobj.group(1)] - return self._get_video_info( - videoPlayer[0], query, referer=referer) - elif 'playerKey' in query: - player_key = query['playerKey'] - return self._get_playlist_info(player_key[0]) - else: - raise ExtractorError( - 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', - expected=True) - - def _brightcove_new_url_result(self, publisher_id, video_id): - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) - return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) - - def _get_video_info(self, video_id, query, referer=None): - headers = {} - linkBase = query.get('linkBaseURL') - if linkBase is not None: - referer = linkBase[0] - if referer is not None: - headers['Referer'] = referer - webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query) - - error_msg = self._html_search_regex( - r"

We're sorry.

([\s\n]*

.*?

)+", webpage, - 'error message', default=None) - if error_msg is not None: publisher_id = query.get('publisherId') if publisher_id and publisher_id[0].isdigit(): publisher_id = publisher_id[0] @@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor): else: player_id = query.get('playerID') if player_id and player_id[0].isdigit(): + headers = {} + if referer: + headers['Referer'] = referer player_page = self._download_webpage( 'http://link.brightcove.com/services/player/bcpid' + player_id[0], video_id, headers=headers, fatal=False) @@ -349,136 +323,16 @@ class BrightcoveLegacyIE(InfoExtractor): if player_key: enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] - if publisher_id: - return self._brightcove_new_url_result(publisher_id, video_id) - raise ExtractorError( - 'brightcove said: %s' % error_msg, expected=True) - - self.report_extraction(video_id) - info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') - info = json.loads(info)['data'] - video_info = info['programmedContent']['videoPlayer']['mediaDTO'] - video_info['_youtubedl_adServerURL'] = info.get('adServerURL') - - return self._extract_video_info(video_info) - - def _get_playlist_info(self, player_key): - info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key - playlist_info = self._download_webpage( - info_url, player_key, 'Downloading playlist information') - - json_data = json.loads(playlist_info) - if 'videoList' in json_data: - playlist_info = json_data['videoList'] - playlist_dto = playlist_info['mediaCollectionDTO'] - elif 'playlistTabs' in json_data: - playlist_info = json_data['playlistTabs'] - playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0] - else: - raise ExtractorError('Empty playlist') - - videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']] - - return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], - playlist_title=playlist_dto['displayName']) - - def _extract_video_info(self, video_info): - video_id = compat_str(video_info['id']) - publisher_id = video_info.get('publisherId') - info = { - 'id': video_id, - 'title': video_info['displayName'].strip(), - 'description': video_info.get('shortDescription'), - 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), - 'uploader': video_info.get('publisherName'), - 'uploader_id': compat_str(publisher_id) if publisher_id else None, - 'duration': float_or_none(video_info.get('length'), 1000), - 'timestamp': int_or_none(video_info.get('creationDate'), 1000), - } - - renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) - if renditions: - formats = [] - for rend in renditions: - url = rend['defaultURL'] - if not url: - continue - ext = None - if rend['remote']: - url_comp = compat_urllib_parse_urlparse(url) - if url_comp.path.endswith('.m3u8'): - formats.extend( - self._extract_m3u8_formats( - url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - continue - elif 'akamaihd.net' in url_comp.netloc: - # This type of renditions are served through - # akamaihd.net, but they don't use f4m manifests - url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' - ext = 'flv' - if ext is None: - ext = determine_ext(url) - tbr = int_or_none(rend.get('encodingRate'), 1000) - a_format = { - 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), - 'url': url, - 'ext': ext, - 'filesize': int_or_none(rend.get('size')) or None, - 'tbr': tbr, - } - if rend.get('audioOnly'): - a_format.update({ - 'vcodec': 'none', - }) - else: - a_format.update({ - 'height': int_or_none(rend.get('frameHeight')), - 'width': int_or_none(rend.get('frameWidth')), - 'vcodec': rend.get('videoCodec'), - }) - - # m3u8 manifests with remote == false are media playlists - # Not calling _extract_m3u8_formats here to save network traffic - if ext == 'm3u8': - a_format.update({ - 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), - 'ext': 'mp4', - 'protocol': 'm3u8_native', - }) - - formats.append(a_format) - self._sort_formats(formats) - info['formats'] = formats - elif video_info.get('FLVFullLengthURL') is not None: - info.update({ - 'url': video_info['FLVFullLengthURL'], - 'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')), - 'filesize': int_or_none(video_info.get('FLVFullSize')), - }) - - if self._downloader.params.get('include_ads', False): - adServerURL = video_info.get('_youtubedl_adServerURL') - if adServerURL: - ad_info = { - '_type': 'url', - 'url': adServerURL, - } - if 'url' in info: - return { - '_type': 'playlist', - 'title': info['title'], - 'entries': [ad_info, info], - } - else: - return ad_info - - if not info.get('url') and not info.get('formats'): - uploader_id = info.get('uploader_id') - if uploader_id: - info.update(self._brightcove_new_url_result(uploader_id, video_id)) - else: - raise ExtractorError('Unable to extract video url for %s' % video_id) - return info + if publisher_id: + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + if referer: + brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + # TODO: figure out if it's possible to extract playlistId from playerKey + # elif 'playerKey' in query: + # player_key = query['playerKey'] + # return self._get_playlist_info(player_key[0]) + raise UnsupportedError(url) class BrightcoveNewIE(AdobePassIE): diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 562c83af9..0b11bf11f 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,7 +3,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import parse_duration +from ..utils import ( + determine_ext, + merge_dicts, + parse_duration, + url_or_none, +) class BYUtvIE(InfoExtractor): @@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id - info = self._download_json( + video = self._download_json( 'https://api.byutv.org/api3/catalog/getvideosforcontent', display_id, query={ 'contentid': video_id, @@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor): 'x-byutv-platformkey': 'xsaaw9c7y5', }) - ep = info.get('ooyalaVOD') + ep = video.get('ooyalaVOD') if ep: return { '_type': 'url_transparent', @@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor): 'thumbnail': ep.get('imageThumbnail'), } - ep = info['dvr'] - title = ep['title'] - formats = self._extract_m3u8_formats( - ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + info = {} + formats = [] + for format_id, ep in video.items(): + if not isinstance(ep, dict): + continue + video_url = url_or_none(ep.get('videoUrl')) + if not video_url: + continue + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) + merge_dicts(info, { + 'title': ep.get('title'), + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + 'duration': parse_duration(ep.get('length')), + }) self._sort_formats(formats) - return { + + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, - 'title': title, - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - 'duration': parse_duration(ep.get('length')), + 'title': display_id, 'formats': formats, - } + }) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index e2b828d8a..656e715ae 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class ChaturbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.chaturbate.com/siswet19/', 'info_dict': { @@ -21,6 +21,9 @@ class ChaturbateIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Room is offline', + }, { + 'url': 'https://chaturbate.com/fullvideo/?b=caylin', + 'only_matching': True, }, { 'url': 'https://en.chaturbate.com/siswet19/', 'only_matching': True, @@ -32,7 +35,8 @@ class ChaturbateIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=self.geo_verification_headers()) + 'https://chaturbate.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) m3u8_urls = [] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 859786617..50d48c40d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1424,12 +1424,10 @@ class InfoExtractor(object): try: self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) return True - except ExtractorError as e: - if isinstance(e.cause, compat_urllib_error.URLError): - self.to_screen( - '%s: %s URL is invalid, skipping' % (video_id, item)) - return False - raise + except ExtractorError: + self.to_screen( + '%s: %s URL is invalid, skipping' % (video_id, item)) + return False def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 3d3d78041..745971900 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -48,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor): - _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P[^/?_]+)' + _VALID_URL = r'''(?ix) + https?:// + (?: + (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| + (?:www\.)?lequipe\.fr/video + ) + /(?P[^/?_]+) + ''' IE_NAME = 'dailymotion' _FORMATS = [ @@ -133,6 +140,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor): }, { 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun', 'only_matching': True, + }, { + 'url': 'https://www.lequipe.fr/video/x791mem', + 'only_matching': True, + }, { + 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', + 'only_matching': True, }] @staticmethod diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9a964feac..299bdf8b0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -104,6 +104,8 @@ from .bild import BildIE from .bilibili import ( BiliBiliIE, BiliBiliBangumiIE, + BilibiliAudioIE, + BilibiliAudioAlbumIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( @@ -895,7 +897,6 @@ from .puhutv import ( PuhuTVSerieIE, ) from .presstv import PressTVIE -from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE @@ -1131,6 +1132,7 @@ from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .telequebec import ( TeleQuebecIE, + TeleQuebecSquatIE, TeleQuebecEmissionIE, TeleQuebecLiveIE, ) @@ -1284,7 +1286,6 @@ from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE -from .vessel import VesselIE from .vesti import VestiIE from .vevo import ( VevoIE, @@ -1415,7 +1416,6 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE -from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wsj import ( @@ -1429,6 +1429,7 @@ from .xfileshare import XFileShareIE from .xhamster import ( XHamsterIE, XHamsterEmbedIE, + XHamsterUserIE, ) from .xiami import ( XiamiSongIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d34fc4b15..ec43c5ae4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -77,7 +77,6 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE -from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE @@ -2075,6 +2074,22 @@ class GenericIE(InfoExtractor): }, 'playlist_count': 6, }, + { + # Squarespace video embed, 2019-08-28 + 'url': 'http://ootboxford.com', + 'info_dict': { + 'id': 'Tc7b_JGdZfw', + 'title': 'Out of the Blue, at Childish Things 10', + 'ext': 'mp4', + 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f', + 'uploader_id': 'helendouglashouse', + 'uploader': 'Helen & Douglas House', + 'upload_date': '20140328', + }, + 'params': { + 'skip_download': True, + }, + }, { # Zype embed 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', @@ -2395,6 +2410,12 @@ class GenericIE(InfoExtractor): # Unescaping the whole page allows to handle those cases in a generic way webpage = compat_urllib_parse_unquote(webpage) + # Unescape squarespace embeds to be detected by generic extractor, + # see https://github.com/ytdl-org/youtube-dl/issues/21294 + webpage = re.sub( + r']+class=[^>]*?\bsqs-video-wrapper\b[^>]*>', + lambda x: unescapeHTML(x.group(0)), webpage) + # it's tempting to parse this further, but you would # have to take into account all the variations like # Video Title - Site Name @@ -2469,11 +2490,6 @@ class GenericIE(InfoExtractor): if tp_urls: return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') - # Look for Vessel embeds - vessel_urls = VesselIE._extract_urls(webpage) - if vessel_urls: - return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()) - # Look for embedded rtl.nl player matches = re.findall( r']+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index bbe3cb283..18a30fe67 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#\.]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -53,6 +53,12 @@ class GfycatIE(InfoExtractor): }, { 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball', 'only_matching': True + }, { + 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif', + 'only_matching': True + }, { + 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4', + 'only_matching': True }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index fb8f7679b..b9c400a57 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -96,6 +96,8 @@ class GloboIE(InfoExtractor): video = self._download_json( 'http://api.globovideos.com/videos/%s/playlist' % video_id, video_id)['videos'][0] + if video.get('encrypted') is True: + raise ExtractorError('This video is DRM protected.', expected=True) title = video['title'] @@ -109,8 +111,8 @@ class GloboIE(InfoExtractor): security = self._download_json( 'http://security.video.globo.com/videos/%s/hash' % video_id, video_id, 'Downloading security hash for %s' % resource_id, query={ - 'player': 'flash', - 'version': '17.0.0.132', + 'player': 'desktop', + 'version': '5.19.1', 'resource_id': resource_id, }) @@ -122,19 +124,18 @@ class GloboIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, message), expected=True) continue - hash_code = security_hash[:2] - received_time = security_hash[2:12] - received_random = security_hash[12:22] - received_md5 = security_hash[22:] + assert security_hash[:2] in ('04', '14') + received_time = security_hash[3:13] + received_md5 = security_hash[24:] sign_time = compat_str(int(received_time) + 86400) padding = '%010d' % random.randint(1, 10000000000) - md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode() + md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5 + signed_hash = security_hash[:23] + sign_time + padding + signed_md5 - signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') + signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index d8a2f9d76..cbe564a3c 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor): webpage, default=None) or self._html_search_meta( 'description', webpage) - kaltura_url = KalturaIE._extract_url(webpage) - if kaltura_url: + def _make_kaltura_result(kaltura_url): return { '_type': 'url_transparent', 'url': smuggle_url(kaltura_url, {'source_url': url}), @@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor): 'description': description, } + kaltura_url = KalturaIE._extract_url(webpage) + if kaltura_url: + return _make_kaltura_result(kaltura_url) + + kaltura_id = self._search_regex( + r'entry-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura id', + default=None, group='id') + if kaltura_id: + return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id) + yt_urls = YoutubeIE._extract_urls(webpage) if yt_urls: return self.playlist_from_matches( diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 79d5bbb2e..f9f7c5a64 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import hashlib import hmac +import re import time import uuid @@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE): format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue + format_url = re.sub( + r'(?<=//staragvod)(\d)', r'web\1', format_url) tags = str_or_none(playback_set.get('tagsCombination')) or '' if tags and 'encryption:plain' not in tags: continue @@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE): try: if 'package:hls' in tags or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id='hls')) + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls')) elif 'package:dash' in tags or ext == 'mpd': formats.extend(self._extract_mpd_formats( format_url, video_id, mpd_id='dash')) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index ffd87b55f..b061850a1 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -22,7 +22,7 @@ from ..utils import ( class InstagramIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/p/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv)/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor): }, { 'url': 'http://instagram.com/p/9o6LshA7zy/embed/', 'only_matching': True, + }, { + 'url': 'https://www.instagram.com/tv/aye83DjauH/', + 'only_matching': True, }] @staticmethod diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 647b905f1..2aabd98b5 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 0a733424c..2d38b758b 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -151,14 +151,15 @@ class KalturaIE(InfoExtractor): if mobj: embed_info = mobj.groupdict() for k, v in embed_info.items(): - embed_info[k] = v.strip() + if v: + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) - service_url = re.search( - r']+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), + service_mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), webpage) - if service_url: - url = smuggle_url(url, {'service_url': service_url.group(1)}) + if service_mobj: + url = smuggle_url(url, {'service_url': service_mobj.group('id')}) return url def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bcac13ec5..bf5353ef9 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -164,7 +164,7 @@ class MixcloudIE(InfoExtractor): def decrypt_url(f_url): for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): decrypted_url = self._decrypt_xor_cipher(k, f_url) - if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): + if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): return decrypted_url for url_key in ('url', 'hlsUrl', 'dashUrl'): diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 241412f98..6a2c6cb7b 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor): # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ + # clip + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', + 'md5': '256a1be14f48d960a7e61e2532d95ec3', + 'info_dict': { + 'id': 'a95j5iza', + 'ext': 'mp4', + 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU", + 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', + 'timestamp': 1565965194, + 'upload_date': '20190816', + }, + }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'only_matching': True, }, { @@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor): 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'only_matching': True, }] - _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json' + _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json' def _real_extract(self, url): lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() @@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor): is_video = m_type == 'video' episode = self._download_json( - self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''), + self._API_URL_TEMPLATE % ( + 'v' if is_video else 'r', + 'clip' if episode_id[:4] == '9999' else 'esd', + episode_id, lang, '/all' if is_video else ''), episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0] title = episode.get('sub_title_clean') or episode['sub_title'] @@ -60,8 +75,8 @@ class NhkVodIE(InfoExtractor): if is_video: info.update({ '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:' + episode['vod_id'], + 'ie_key': 'Piksel', + 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'], }) else: audio = episode['audio'] diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 5e34d776b..2e8b302ac 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor): https?:// (?: (?P(?:www\.)?nickjr|mundonick\.uol)\.com\.br| - (?:www\.)?nickjr\.[a-z]{2} + (?:www\.)?nickjr\.[a-z]{2}| + (?:www\.)?nickelodeonjunior\.fr ) /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P[^/?\#.]+) ''' @@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/', 'only_matching': True, + }, { + 'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index f32f530f7..6157dc7c1 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor): webpage = self._download_webpage(url, display_id) page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, - 'page data'), display_id) + 'page data', default='{}'), display_id, fatal=False) + if not page_data: + page_data = self._parse_json(self._parse_json(self._search_regex( + r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;', + webpage, 'page data'), display_id), display_id) for kind in ('episode', 'clip'): current_key = page_data.get(kind, {}).get( diff --git a/youtube_dl/extractor/nonktube.py b/youtube_dl/extractor/nonktube.py index 63e58aae2..ca1424e06 100644 --- a/youtube_dl/extractor/nonktube.py +++ b/youtube_dl/extractor/nonktube.py @@ -25,9 +25,14 @@ class NonkTubeIE(NuevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_nuevo( - 'https://www.nonktube.com/media/nuevo/econfig.php?key=%s' - % video_id, video_id) + webpage = self._download_webpage(url, video_id) - info['age_limit'] = 18 + title = self._og_search_title(webpage) + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + + info.update({ + 'id': video_id, + 'title': title, + 'age_limit': 18, + }) return info diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 5f43e692f..60933f069 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -406,7 +406,7 @@ class NRKTVSerieBaseIE(InfoExtractor): def _extract_series(self, webpage, display_id, fatal=True): config = self._parse_json( self._search_regex( - (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;', + (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', r'({.+?})\s*,\s*"[^"]+"\s*\)\s*'), webpage, 'config', default='{}' if not fatal else NO_DEFAULT), display_id, fatal=False) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index ab4980d4d..66e38cdb4 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,12 +243,13 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'''(?x) + _DOMAINS = r''' (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)| - oladblock\.(?:services|xyz|me)|openloed\.co) - ''' + oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)| + oladblock\.(?:services|xyz|me)|openloed\.co + ) + ''' _VALID_URL = r'''(?x) https?:// (?P @@ -361,6 +362,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.services/embed/bs1NWj1dCag/', 'only_matching': True, + }, { + 'url': 'https://oload.online/f/W8o2UfN1vNY/', + 'only_matching': True, + }, { + 'url': 'https://oload.monster/f/W8o2UfN1vNY/', + 'only_matching': True, }, { 'url': 'https://oload.press/embed/drTBl1aOTvk/', 'only_matching': True, @@ -396,7 +403,7 @@ class OpenloadIE(InfoExtractor): @classmethod def _extract_urls(cls, webpage): return re.findall( - r']+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' + r'(?x)]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' % (cls._DOMAINS, cls._EMBED_WORD), webpage) def _extract_decrypted_page(self, page_url, webpage, video_id): @@ -462,7 +469,7 @@ class OpenloadIE(InfoExtractor): class VerystreamIE(OpenloadIE): IE_NAME = 'verystream' - _DOMAINS = r'(?:verystream\.com)' + _DOMAINS = r'(?:verystream\.com|woof\.tube)' _VALID_URL = r'''(?x) https?:// (?P diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 499be0029..3425f7602 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -86,12 +86,13 @@ class ORFTVthekIE(InfoExtractor): if value: format_id_list.append(value) format_id = '-'.join(format_id_list) - if determine_ext(fd['src']) == 'm3u8': + ext = determine_ext(src) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - fd['src'], video_id, 'mp4', m3u8_id=format_id)) - elif determine_ext(fd['src']) == 'f4m': + src, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': formats.extend(self._extract_f4m_formats( - fd['src'], video_id, f4m_id=format_id)) + src, video_id, f4m_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index b50543e32..d3a83ea2b 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -18,81 +18,385 @@ from ..utils import ( class PeerTubeIE(InfoExtractor): _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances + peertube\.rainbowswingers\.net| + tube\.stanisic\.nl| + peer\.suiri\.us| + medias\.libox\.fr| + videomensoif\.ynh\.fr| + peertube\.travelpandas\.eu| + peertube\.rachetjay\.fr| + peertube\.montecsys\.fr| + tube\.eskuero\.me| + peer\.tube| + peertube\.umeahackerspace\.se| + tube\.nx-pod\.de| + video\.monsieurbidouille\.fr| tube\.openalgeria\.org| - peertube\.pointsecu\.fr| + vid\.lelux\.fi| + video\.anormallostpod\.ovh| + tube\.crapaud-fou\.org| + peertube\.stemy\.me| + lostpod\.space| + exode\.me| + peertube\.snargol\.com| + vis\.ion\.ovh| + videosdulib\.re| + v\.mbius\.io| + videos\.judrey\.eu| + peertube\.osureplayviewer\.xyz| + peertube\.mathieufamily\.ovh| + www\.videos-libr\.es| + fightforinfo\.com| + peertube\.fediverse\.ru| + peertube\.oiseauroch\.fr| + video\.nesven\.eu| + v\.bearvideo\.win| + video\.qoto\.org| + justporn\.cc| + video\.vny\.fr| + peervideo\.club| + tube\.taker\.fr| + peertube\.chantierlibre\.org| + tube\.ipfixe\.info| + tube\.kicou\.info| + tube\.dodsorf\.as| + videobit\.cc| + video\.yukari\.moe| + videos\.elbinario\.net| + hkvideo\.live| + pt\.tux\.tf| + www\.hkvideo\.live| + FIGHTFORINFO\.com| + pt\.765racing\.com| + peertube\.gnumeria\.eu\.org| + nordenmedia\.com| + peertube\.co\.uk| + tube\.darfweb\.eu| + tube\.kalah-france\.org| + 0ch\.in| + vod\.mochi\.academy| + film\.node9\.org| + peertube\.hatthieves\.es| + video\.fitchfamily\.org| + peertube\.ddns\.net| + video\.ifuncle\.kr| + video\.fdlibre\.eu| + tube\.22decembre\.eu| + peertube\.harmoniescreatives\.com| + tube\.fabrigli\.fr| + video\.thedwyers\.co| + video\.bruitbruit\.com| + peertube\.foxfam\.club| + peer\.philoxweb\.be| + videos\.bugs\.social| + peertube\.malbert\.xyz| + peertube\.bilange\.ca| + libretube\.net| + diytelevision\.com| + peertube\.fedilab\.app| + libre\.video| + video\.mstddntfdn\.online| + us\.tv| + peertube\.sl-network\.fr| + peertube\.dynlinux\.io| + peertube\.david\.durieux\.family| + peertube\.linuxrocks\.online| + peerwatch\.xyz| + v\.kretschmann\.social| + tube\.otter\.sh| + yt\.is\.nota\.live| + tube\.dragonpsi\.xyz| + peertube\.boneheadmedia\.com| + videos\.funkwhale\.audio| + watch\.44con\.com| + peertube\.gcaillaut\.fr| + peertube\.icu| + pony\.tube| + spacepub\.space| + tube\.stbr\.io| + v\.mom-gay\.faith| + tube\.port0\.xyz| + peertube\.simounet\.net| + play\.jergefelt\.se| + peertube\.zeteo\.me| + tube\.danq\.me| + peertube\.kerenon\.com| + tube\.fab-l3\.org| + tube\.calculate\.social| + peertube\.mckillop\.org| + tube\.netzspielplatz\.de| + vod\.ksite\.de| + peertube\.laas\.fr| + tube\.govital\.net| + peertube\.stephenson\.cc| + bistule\.nohost\.me| + peertube\.kajalinifi\.de| + video\.ploud\.jp| + video\.omniatv\.com| + peertube\.ffs2play\.fr| + peertube\.leboulaire\.ovh| + peertube\.tronic-studio\.com| + peertube\.public\.cat| + peertube\.metalbanana\.net| + video\.1000i100\.fr| + peertube\.alter-nativ-voll\.de| + tube\.pasa\.tf| + tube\.worldofhauru\.xyz| + pt\.kamp\.site| + peertube\.teleassist\.fr| + videos\.mleduc\.xyz| + conf\.tube| + media\.privacyinternational\.org| + pt\.forty-two\.nl| + video\.halle-leaks\.de| + video\.grosskopfgames\.de| + peertube\.schaeferit\.de| + peertube\.jackbot\.fr| + tube\.extinctionrebellion\.fr| + peertube\.f-si\.org| + video\.subak\.ovh| + videos\.koweb\.fr| + peertube\.zergy\.net| + peertube\.roflcopter\.fr| + peertube\.floss-marketing-school\.com| + vloggers\.social| + peertube\.iriseden\.eu| + videos\.ubuntu-paris\.org| + peertube\.mastodon\.host| + armstube\.com| + peertube\.s2s\.video| + peertube\.lol| + tube\.open-plug\.eu| + open\.tube| + peertube\.ch| + peertube\.normandie-libre\.fr| + peertube\.slat\.org| + video\.lacaveatonton\.ovh| + peertube\.uno| + peertube\.servebeer\.com| + peertube\.fedi\.quebec| + tube\.h3z\.jp| + tube\.plus200\.com| + peertube\.eric\.ovh| + tube\.metadocs\.cc| + tube\.unmondemeilleur\.eu| + gouttedeau\.space| + video\.antirep\.net| + nrop\.cant\.at| + tube\.ksl-bmx\.de| + tube\.plaf\.fr| + tube\.tchncs\.de| + video\.devinberg\.com| + hitchtube\.fr| + peertube\.kosebamse\.com| + yunopeertube\.myddns\.me| + peertube\.varney\.fr| + peertube\.anon-kenkai\.com| + tube\.maiti\.info| + tubee\.fr| + videos\.dinofly\.com| + toobnix\.org| + videotape\.me| + voca\.tube| + video\.heromuster\.com| + video\.lemediatv\.fr| + video\.up\.edu\.ph| + balafon\.video| + video\.ivel\.fr| + thickrips\.cloud| + pt\.laurentkruger\.fr| + video\.monarch-pass\.net| + peertube\.artica\.center| + video\.alternanet\.fr| + indymotion\.fr| + fanvid\.stopthatimp\.net| + video\.farci\.org| + v\.lesterpig\.com| + video\.okaris\.de| + tube\.pawelko\.net| + peertube\.mablr\.org| + tube\.fede\.re| + pytu\.be| + evertron\.tv| + devtube\.dev-wiki\.de| + raptube\.antipub\.org| + video\.selea\.se| + peertube\.mygaia\.org| + video\.oh14\.de| + peertube\.livingutopia\.org| + peertube\.the-penguin\.de| + tube\.thechangebook\.org| + tube\.anjara\.eu| + pt\.pube\.tk| + video\.samedi\.pm| + mplayer\.demouliere\.eu| + widemus\.de| + peertube\.me| + peertube\.zapashcanon\.fr| + video\.latavernedejohnjohn\.fr| + peertube\.pcservice46\.fr| + peertube\.mazzonetto\.eu| + video\.irem\.univ-paris-diderot\.fr| + video\.livecchi\.cloud| + alttube\.fr| + video\.coop\.tools| + video\.cabane-libre\.org| + peertube\.openstreetmap\.fr| + videos\.alolise\.org| + irrsinn\.video| + video\.antopie\.org| + scitech\.video| + tube2\.nemsia\.org| + video\.amic37\.fr| + peertube\.freeforge\.eu| + video\.arbitrarion\.com| + video\.datsemultimedia\.com| + stoptrackingus\.tv| + peertube\.ricostrongxxx\.com| + docker\.videos\.lecygnenoir\.info| + peertube\.togart\.de| + tube\.postblue\.info| + videos\.domainepublic\.net| + peertube\.cyber-tribal\.com| + video\.gresille\.org| + peertube\.dsmouse\.net| + cinema\.yunohost\.support| + tube\.theocevaer\.fr| + repro\.video| + tube\.4aem\.com| + quaziinc\.com| + peertube\.metawurst\.space| + videos\.wakapo\.com| + video\.ploud\.fr| + video\.freeradical\.zone| + tube\.valinor\.fr| + refuznik\.video| + pt\.kircheneuenburg\.de| + peertube\.asrun\.eu| + peertube\.lagob\.fr| + videos\.side-ways\.net| + 91video\.online| + video\.valme\.io| + video\.taboulisme\.com| + videos-libr\.es| + tv\.mooh\.fr| + nuage\.acostey\.fr| + video\.monsieur-a\.fr| + peertube\.librelois\.fr| + videos\.pair2jeux\.tube| + videos\.pueseso\.club| + peer\.mathdacloud\.ovh| + media\.assassinate-you\.net| + vidcommons\.org| + ptube\.rousset\.nom\.fr| + tube\.cyano\.at| + videos\.squat\.net| + video\.iphodase\.fr| + peertube\.makotoworkshop\.org| + peertube\.serveur\.slv-valbonne\.fr| + vault\.mle\.party| + hostyour\.tv| + videos\.hack2g2\.fr| + libre\.tube| + pire\.artisanlogiciel\.net| + videos\.numerique-en-commun\.fr| + video\.netsyms\.com| + video\.die-partei\.social| + video\.writeas\.org| + peertube\.swarm\.solvingmaz\.es| + tube\.pericoloso\.ovh| + watching\.cypherpunk\.observer| + videos\.adhocmusic\.com| + tube\.rfc1149\.net| + peertube\.librelabucm\.org| + videos\.numericoop\.fr| + peertube\.koehn\.com| + peertube\.anarchmusicall\.net| + tube\.kampftoast\.de| + vid\.y-y\.li| + peertube\.xtenz\.xyz| + diode\.zone| + tube\.egf\.mn| + peertube\.nomagic\.uk| + visionon\.tv| + videos\.koumoul\.com| + video\.rastapuls\.com| + video\.mantlepro\.com| + video\.deadsuperhero\.com| + peertube\.musicstudio\.pro| + peertube\.we-keys\.fr| + artitube\.artifaille\.fr| + peertube\.ethernia\.net| + tube\.midov\.pl| + peertube\.fr| + watch\.snoot\.tube| + peertube\.donnadieu\.fr| + argos\.aquilenet\.fr| + tube\.nemsia\.org| + tube\.bruniau\.net| + videos\.darckoune\.moe| + tube\.traydent\.info| + dev\.videos\.lecygnenoir\.info| + peertube\.nayya\.org| + peertube\.live| + peertube\.mofgao\.space| + video\.lequerrec\.eu| + peertube\.amicale\.net| + aperi\.tube| + tube\.ac-lyon\.fr| + video\.lw1\.at| + www\.yiny\.org| + videos\.pofilo\.fr| + tube\.lou\.lt| + choob\.h\.etbus\.ch| + tube\.hoga\.fr| + peertube\.heberge\.fr| + video\.obermui\.de| + videos\.cloudfrancois\.fr| + betamax\.video| + video\.typica\.us| + tube\.piweb\.be| + video\.blender\.org| + peertube\.cat| + tube\.kdy\.ch| + pe\.ertu\.be| + peertube\.social| + videos\.lescommuns\.org| + tv\.datamol\.org| + videonaute\.fr| + dialup\.express| peertube\.nogafa\.org| - peertube\.pl| megatube\.lilomoino\.fr| peertube\.tamanoir\.foucry\.net| - peertube\.inapurna\.org| - peertube\.netzspielplatz\.de| - video\.deadsuperhero\.com| peertube\.devosi\.org| peertube\.1312\.media| - tube\.worldofhauru\.xyz| tube\.bootlicker\.party| skeptikon\.fr| - peertube\.geekshell\.fr| - tube\.opportunis\.me| - peertube\.peshane\.net| video\.blueline\.mg| tube\.homecomputing\.fr| - videos\.cloudfrancois\.fr| - peertube\.viviers-fibre\.net| tube\.ouahpiti\.info| video\.tedomum\.net| video\.g3l\.org| fontube\.fr| peertube\.gaialabs\.ch| - peertube\.extremely\.online| - peertube\.public-infrastructure\.eu| tube\.kher\.nl| peertube\.qtg\.fr| - tube\.22decembre\.eu| - facegirl\.me| video\.migennes\.net| - janny\.moe| tube\.p2p\.legal| - video\.atlanti\.se| troll\.tv| - peertube\.geekael\.fr| - vid\.leotindall\.com| - video\.anormallostpod\.ovh| - p-tube\.h3z\.jp| - tube\.darfweb\.eu| videos\.iut-orsay\.fr| peertube\.solidev\.net| - videos\.symphonie-of-code\.fr| - testtube\.ortg\.de| videos\.cemea\.org| - peertube\.gwendalavir\.eu| video\.passageenseine\.fr| videos\.festivalparminous\.org| peertube\.touhoppai\.moe| - peertube\.duckdns\.org| sikke\.fi| - peertube\.mastodon\.host| - firedragonvideos\.com| - vidz\.dou\.bet| - peertube\.koehn\.com| peer\.hostux\.social| share\.tube| peertube\.walkingmountains\.fr| - medias\.libox\.fr| - peertube\.moe| - peertube\.xyz| - jp\.peertube\.network| videos\.benpro\.fr| - tube\.otter\.sh| - peertube\.angristan\.xyz| peertube\.parleur\.net| - peer\.ecutsa\.fr| peertube\.heraut\.eu| - peertube\.tifox\.fr| - peertube\.maly\.io| - vod\.mochi\.academy| - exode\.me| - coste\.video| tube\.aquilenet\.fr| peertube\.gegeweb\.eu| framatube\.org| @@ -100,18 +404,11 @@ class PeerTubeIE(InfoExtractor): tube\.conferences-gesticulees\.net| peertube\.datagueule\.tv| video\.lqdn\.fr| - meilleurtube\.delire\.party| tube\.mochi\.academy| - peertube\.dav\.li| media\.zat\.im| - pytu\.be| - peertube\.valvin\.fr| - peertube\.nsa\.ovh| video\.colibris-outilslibres\.org| - video\.hispagatos\.org| tube\.svnet\.fr| peertube\.video| - videos\.lecygnenoir\.info| peertube3\.cpy\.re| peertube2\.cpy\.re| videos\.tcit\.fr| @@ -126,7 +423,7 @@ class PeerTubeIE(InfoExtractor): (?P%s) ''' % (_INSTANCES_RE, _UUID_RE) _TESTS = [{ - 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', + 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', 'md5': '80f24ff364cc9d333529506a263e7feb', 'info_dict': { 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index 401298cb8..88b6859b0 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -15,7 +15,7 @@ from ..utils import ( class PikselIE(InfoExtractor): - _VALID_URL = r'https?://player\.piksel\.com/v/(?P[a-z0-9]+)' + _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P[a-z0-9_]+)' _TESTS = [ { 'url': 'http://player.piksel.com/v/ums2867l', @@ -40,6 +40,11 @@ class PikselIE(InfoExtractor): 'timestamp': 1486171129, 'upload_date': '20170204' } + }, + { + # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ + 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', + 'only_matching': True, } ] @@ -52,8 +57,11 @@ class PikselIE(InfoExtractor): return mobj.group('url') def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-de-program-uuid=[\'"]([a-z0-9]+)', + webpage, 'program uuid', default=display_id) app_token = self._search_regex([ r'clientAPI\s*:\s*"([^"]+)"', r'data-de-api-key\s*=\s*"([^"]+)"' diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py index 557b2b5ad..602207beb 100644 --- a/youtube_dl/extractor/platzi.py +++ b/youtube_dl/extractor/platzi.py @@ -18,43 +18,10 @@ from ..utils import ( ) -class PlatziIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - platzi\.com/clases| # es version - courses\.platzi\.com/classes # en version - )/[^/]+/(?P\d+)-[^/?\#&]+ - ''' +class PlatziBaseIE(InfoExtractor): _LOGIN_URL = 'https://platzi.com/login/' _NETRC_MACHINE = 'platzi' - _TESTS = [{ - 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', - 'md5': '8f56448241005b561c10f11a595b37e3', - 'info_dict': { - 'id': '12074', - 'ext': 'mp4', - 'title': 'Creando nuestra primera página', - 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', - 'duration': 420, - }, - 'skip': 'Requires platzi account credentials', - }, { - 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', - 'info_dict': { - 'id': '13430', - 'ext': 'mp4', - 'title': 'Background', - 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', - 'duration': 360, - }, - 'skip': 'Requires platzi account credentials', - 'params': { - 'skip_download': True, - }, - }] - def _real_initialize(self): self._login() @@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor): 'Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') + +class PlatziIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P\d+)-[^/?\#&]+ + ''' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + def _real_extract(self, url): lecture_id = self._match_id(url) @@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor): data = self._parse_json( self._search_regex( - r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), + # client_data may contain "};" so that we have to try more + # strict regex first + (r'client_data\s*=\s*({.+?})\s*;\s*\n', + r'client_data\s*=\s*({.+?})\s*;'), + webpage, 'client data'), lecture_id) material = data['initialState']['material'] @@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor): } -class PlatziCourseIE(InfoExtractor): +class PlatziCourseIE(PlatziBaseIE): _VALID_URL = r'''(?x) https?:// (?: diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 11b8cfcf7..ba0ad7da2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -403,6 +403,15 @@ class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + ]+\bclass=["\']page_next| + ]+\brel=["\']next| + ]+\bid=["\']moreDataBtn + ''', webpage) is not None + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') @@ -411,13 +420,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): page = int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) - page_url = self._make_page_url(url) - entries = [] for page_num in (page, ) if page is not None else itertools.count(1): try: webpage = self._download_webpage( - page_url, item_id, 'Downloading page %d' % page_num, + url, item_id, 'Downloading page %d' % page_num, query={'page': page_num}) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: @@ -547,18 +554,6 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super(PornHubPagedVideoListIE, cls).suitable(url)) - def _make_page_url(self, url): - return url - - @staticmethod - def _has_more(webpage): - return re.search( - r'''(?x) - ]+\bclass=["\']page_next| - ]+\brel=["\']next| - ]+\bid=["\']moreDataBtn - ''', webpage) is not None - class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos/upload)' @@ -572,11 +567,3 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'only_matching': True, }] - - def _make_page_url(self, url): - mobj = re.match(self._VALID_URL, url) - return '%s/ajax' % mobj.group('url') - - @staticmethod - def _has_more(webpage): - return True diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py deleted file mode 100644 index 23ac93d7e..000000000 --- a/youtube_dl/extractor/promptfile.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - urlencode_postdata, -) - - -class PromptFileIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P[0-9A-Z\-]+)' - _TEST = { - 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', - 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', - 'info_dict': { - 'id': '86D1CE8462-576CAAE416', - 'ext': 'mp4', - 'title': 'oceans.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - if re.search(r'(?!We are).+[^-]', webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - chash = self._search_regex( - r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') - fields = self._hidden_inputs(webpage) - keys = list(fields.keys()) - chash_key = keys[0] if len(keys) == 1 else next( - key for key in keys if key.startswith('cha')) - fields[chash_key] = chash + fields[chash_key] - - webpage = self._download_webpage( - url, video_id, 'Downloading video page', - data=urlencode_postdata(fields), - headers={'Content-type': 'application/x-www-form-urlencoded'}) - - video_url = self._search_regex( - (r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*Download File', - r']+href=(["\'])(?Phttps?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), - webpage, 'video url', group='url') - title = self._html_search_regex( - r'', webpage, 'title') - thumbnail = self._html_search_regex( - r'
.*button>.*?This video has been removed']): raise ExtractorError('Video %s has been removed' % video_id, expected=True) - title = self._html_search_regex( - (r']+class="(?:video_title_text|videoTitle)[^"]*">(?P(?:(?!\1).)+)</h\1>', - r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), - webpage, 'title', group='title', - default=None) or self._og_search_title(webpage) + info = self._search_json_ld(webpage, video_id, default={}) + + if not info.get('title'): + info['title'] = self._html_search_regex( + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) formats = [] sources = self._parse_json( @@ -88,28 +92,28 @@ class RedTubeIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( - r'<span[^>]+>ADDED ([^<]+)<', - webpage, 'upload date', fatal=False)) + r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', + webpage, 'upload date', default=None)) duration = int_or_none(self._og_search_property( 'video:duration', webpage, default=None) or self._search_regex( r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', - r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'), - webpage, 'view count', fatal=False)) + r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', + r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), + webpage, 'view count', default=None)) # No self-labeling, but they describe themselves as # "Home of Videos Porno" age_limit = 18 - return { + return merge_dicts(info, { 'id': video_id, 'ext': 'mp4', - 'title': title, 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'age_limit': age_limit, 'formats': formats, - } + }) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index c1a9deafe..7d2e34b3b 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -48,6 +48,16 @@ class TeachableBaseIE(InfoExtractor): 'https://%s/sign_in' % site, None, 'Downloading %s login page' % site) + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']user-signout', + r'<a[^>]+\bhref=["\']/sign_out', + r'Log\s+[Oo]ut\s*<')) + + if is_logged(login_page): + self._logged_in = True + return + login_url = compat_str(urlh.geturl()) login_form = self._hidden_inputs(login_page) @@ -78,10 +88,7 @@ class TeachableBaseIE(InfoExtractor): 'Go to https://%s/ and accept.' % (site, site), expected=True) # Successful login - if any(re.search(p, response) for p in ( - r'class=["\']user-signout', - r'<a[^>]+\bhref=["\']/sign_out', - r'>\s*Log out\s*<')): + if is_logged(response): self._logged_in = True return diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index db5a4f44e..63e2455b2 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -182,20 +182,29 @@ class TEDIE(InfoExtractor): title = talk_info['title'].strip() - native_downloads = try_get( - talk_info, - (lambda x: x['downloads']['nativeDownloads'], - lambda x: x['nativeDownloads']), - dict) or {} + downloads = talk_info.get('downloads') or {} + native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {} formats = [{ 'url': format_url, 'format_id': format_id, - 'format': format_id, } for (format_id, format_url) in native_downloads.items() if format_url is not None] + + subtitled_downloads = downloads.get('subtitledDownloads') or {} + for lang, subtitled_download in subtitled_downloads.items(): + for q in self._NATIVE_FORMATS: + q_url = subtitled_download.get(q) + if not q_url: + continue + formats.append({ + 'url': q_url, + 'format_id': '%s-%s' % (q, lang), + 'language': lang, + }) + if formats: for f in formats: - finfo = self._NATIVE_FORMATS.get(f['format_id']) + finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0]) if finfo: f.update(finfo) @@ -215,34 +224,7 @@ class TEDIE(InfoExtractor): http_url = None for format_id, resources in resources_.items(): - if format_id == 'h264': - for resource in resources: - h264_url = resource.get('file') - if not h264_url: - continue - bitrate = int_or_none(resource.get('bitrate')) - formats.append({ - 'url': h264_url, - 'format_id': '%s-%sk' % (format_id, bitrate), - 'tbr': bitrate, - }) - if re.search(r'\d+k', h264_url): - http_url = h264_url - elif format_id == 'rtmp': - streamer = talk_info.get('streamer') - if not streamer: - continue - for resource in resources: - formats.append({ - 'format_id': '%s-%s' % (format_id, resource.get('name')), - 'url': streamer, - 'play_path': resource['file'], - 'ext': 'flv', - 'width': int_or_none(resource.get('width')), - 'height': int_or_none(resource.get('height')), - 'tbr': int_or_none(resource.get('bitrate')), - }) - elif format_id == 'hls': + if format_id == 'hls': if not isinstance(resources, dict): continue stream_url = url_or_none(resources.get('stream')) @@ -251,6 +233,36 @@ class TEDIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( stream_url, video_name, 'mp4', m3u8_id=format_id, fatal=False)) + else: + if not isinstance(resources, list): + continue + if format_id == 'h264': + for resource in resources: + h264_url = resource.get('file') + if not h264_url: + continue + bitrate = int_or_none(resource.get('bitrate')) + formats.append({ + 'url': h264_url, + 'format_id': '%s-%sk' % (format_id, bitrate), + 'tbr': bitrate, + }) + if re.search(r'\d+k', h264_url): + http_url = h264_url + elif format_id == 'rtmp': + streamer = talk_info.get('streamer') + if not streamer: + continue + for resource in resources: + formats.append({ + 'format_id': '%s-%s' % (format_id, resource.get('name')), + 'url': streamer, + 'play_path': resource['file'], + 'ext': 'flv', + 'width': int_or_none(resource.get('width')), + 'height': int_or_none(resource.get('height')), + 'tbr': int_or_none(resource.get('bitrate')), + }) m3u8_formats = list(filter( lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 6965c127b..ae9f66787 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, smuggle_url, try_get, + unified_timestamp, ) @@ -22,7 +23,13 @@ class TeleQuebecBaseIE(InfoExtractor): class TeleQuebecIE(TeleQuebecBaseIE): - _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?: + zonevideo\.telequebec\.tv/media| + coucou\.telequebec\.tv/videos + )/(?P<id>\d+) + ''' _TESTS = [{ # available till 01.01.2023 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', @@ -41,6 +48,9 @@ class TeleQuebecIE(TeleQuebecBaseIE): # no description 'url': 'http://zonevideo.telequebec.tv/media/30261', 'only_matching': True, + }, { + 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain', + 'only_matching': True, }] def _real_extract(self, url): @@ -61,6 +71,52 @@ class TeleQuebecIE(TeleQuebecBaseIE): return info +class TeleQuebecSquatIE(InfoExtractor): + _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://squat.telequebec.tv/videos/9314', + 'info_dict': { + 'id': 'd59ae78112d542e793d83cc9d3a5b530', + 'ext': 'mp4', + 'title': 'Poupeflekta', + 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75', + 'duration': 1351, + 'timestamp': 1569057600, + 'upload_date': '20190921', + 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir', + 'season': 'Saison 3', + 'season_number': 3, + 'episode_number': 57, + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id, + video_id) + + media_id = video['sourceId'] + + return { + '_type': 'url_transparent', + 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id, + 'ie_key': TeleQuebecIE.ie_key(), + 'id': media_id, + 'title': video.get('titre'), + 'description': video.get('description'), + 'timestamp': unified_timestamp(video.get('datePublication')), + 'series': video.get('container'), + 'season': video.get('saison'), + 'season_number': int_or_none(video.get('noSaison')), + 'episode_number': int_or_none(video.get('episode')), + } + + class TeleQuebecEmissionIE(TeleQuebecBaseIE): _VALID_URL = r'''(?x) https?:// diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 51923e44a..a819d048c 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -72,8 +72,13 @@ class TV4IE(InfoExtractor): video_id = self._match_id(url) info = self._download_json( - 'http://www.tv4play.se/player/assets/%s.json' % video_id, - video_id, 'Downloading video info JSON') + 'https://playback-api.b17g.net/asset/%s' % video_id, + video_id, 'Downloading video info JSON', query={ + 'service': 'tv4', + 'device': 'browser', + 'protocol': 'hls,dash', + 'drm': 'widevine', + })['metadata'] title = info['title'] @@ -111,5 +116,9 @@ class TV4IE(InfoExtractor): 'timestamp': parse_iso8601(info.get('broadcast_date_time')), 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('image'), - 'is_live': info.get('is_live') is True, + 'is_live': info.get('isLive') is True, + 'series': info.get('seriesTitle'), + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode': info.get('episodeTitle'), + 'episode_number': int_or_none(info.get('episodeNumber')), } diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py deleted file mode 100644 index 31eee0ba7..000000000 --- a/youtube_dl/extractor/vessel.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_iso8601, - sanitized_Request, -) - - -class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)' - _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' - _LOGIN_URL = 'https://www.vessel.com/api/account/login' - _NETRC_MACHINE = 'vessel' - _TESTS = [{ - 'url': 'https://www.vessel.com/videos/HDN7G5UMs', - 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', - 'info_dict': { - 'id': 'HDN7G5UMs', - 'ext': 'mp4', - 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20150317', - 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', - 'timestamp': int, - }, - }, { - 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', - 'only_matching': True, - }, { - 'url': 'https://www.vessel.com/videos/F01_dsLj1', - 'only_matching': True, - }, { - 'url': 'https://www.vessel.com/videos/RRX-sir-J', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return [url for _, url in re.findall( - r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1', - webpage)] - - @staticmethod - def make_json_request(url, data): - payload = json.dumps(data).encode('utf-8') - req = sanitized_Request(url, payload) - req.add_header('Content-Type', 'application/json; charset=utf-8') - return req - - @staticmethod - def find_assets(data, asset_type, asset_id=None): - for asset in data.get('assets', []): - if not asset.get('type') == asset_type: - continue - elif asset_id is not None and not asset.get('id') == asset_id: - continue - else: - yield asset - - def _check_access_rights(self, data): - access_info = data.get('__view', {}) - if not access_info.get('allow_access', True): - err_code = access_info.get('error_code') or '' - if err_code == 'ITEM_PAID_ONLY': - raise ExtractorError( - 'This video requires subscription.', expected=True) - else: - raise ExtractorError( - 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - self.report_login() - data = { - 'client_id': 'web', - 'type': 'password', - 'user_key': username, - 'password': password, - } - login_request = VesselIE.make_json_request(self._LOGIN_URL, data) - self._download_webpage(login_request, None, False, 'Wrong login info') - - def _real_initialize(self): - self._login() - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) - asset_id = data['model']['data']['id'] - - req = VesselIE.make_json_request( - self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) - data = self._download_json(req, video_id) - video_asset_id = data.get('main_video_asset') - - self._check_access_rights(data) - - try: - video_asset = next( - VesselIE.find_assets(data, 'video', asset_id=video_asset_id)) - except StopIteration: - raise ExtractorError('No video assets found') - - formats = [] - for f in video_asset.get('sources', []): - location = f.get('location') - if not location: - continue - name = f.get('name') - if name == 'hls-index': - formats.extend(self._extract_m3u8_formats( - location, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) - elif name == 'dash-index': - formats.extend(self._extract_mpd_formats( - location, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'format_id': name, - 'tbr': f.get('bitrate'), - 'height': f.get('height'), - 'width': f.get('width'), - 'url': location, - }) - self._sort_formats(formats) - - thumbnails = [] - for im_asset in VesselIE.find_assets(data, 'image'): - thumbnails.append({ - 'url': im_asset['location'], - 'width': im_asset.get('width', 0), - 'height': im_asset.get('height', 0), - }) - - return { - 'id': video_id, - 'title': data['title'], - 'formats': formats, - 'thumbnails': thumbnails, - 'description': data.get('short_description'), - 'duration': data.get('duration'), - 'comment_count': data.get('comment_count'), - 'like_count': data.get('like_count'), - 'view_count': data.get('view_count'), - 'timestamp': parse_iso8601(data.get('released_at')), - } diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py index c43d1a1e8..851ad936c 100644 --- a/youtube_dl/extractor/viewlift.py +++ b/youtube_dl/extractor/viewlift.py @@ -13,11 +13,12 @@ from ..utils import ( js_to_json, parse_age_limit, parse_duration, + try_get, ) class ViewLiftBaseIE(InfoExtractor): - _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv' + _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv' class ViewLiftEmbedIE(ViewLiftBaseIE): @@ -113,7 +114,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): class ViewLiftIE(ViewLiftBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX + _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'md5': '19844f897b35af219773fd63bdec2942', @@ -128,7 +129,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'categories': 'mincount:3', 'age_limit': 14, 'upload_date': '20150421', - 'timestamp': 1429656819, + 'timestamp': 1429656820, } }, { 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india', @@ -141,10 +142,26 @@ class ViewLiftIE(ViewLiftBaseIE): 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 979, - 'categories': 'mincount:2', 'timestamp': 1399478279, 'upload_date': '20140507', } + }, { + 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love', + 'info_dict': { + 'id': '00000148-7b53-de26-a9fb-fbf306f70020', + 'display_id': 'augie_alone/s_2_ep_12_love', + 'ext': 'mp4', + 'title': 'Augie, Alone:S. 2 Ep. 12 - Love', + 'description': 'md5:db2a5c72d994f16a780c1eb353a8f403', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 107, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://main.snagfilms.com/films/title/the_freebie', + 'only_matching': True, }, { # Film is not playable in your area. 'url': 'http://www.snagfilms.com/films/title/inside_mecca', @@ -162,6 +179,10 @@ class ViewLiftIE(ViewLiftBaseIE): 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url) + def _real_extract(self, url): domain, display_id = re.match(self._VALID_URL, url).groups() @@ -181,7 +202,21 @@ class ViewLiftIE(ViewLiftBaseIE): gist = content_data['gist'] film_id = gist['id'] title = gist['title'] - video_assets = content_data['streamingInfo']['videoAssets'] + video_assets = try_get( + content_data, lambda x: x['streamingInfo']['videoAssets'], dict) + if not video_assets: + token = self._download_json( + 'https://prod-api.viewlift.com/identity/anonymous-token', + film_id, 'Downloading authorization token', + query={'site': 'snagfilms'})['authorizationToken'] + video_assets = self._download_json( + 'https://prod-api.viewlift.com/entitlement/video/status', + film_id, headers={ + 'Authorization': token, + 'Referer': url, + }, query={ + 'id': film_id + })['video']['streamingInfo']['videoAssets'] formats = [] mpeg_video_assets = video_assets.get('mpeg') or [] @@ -241,8 +276,9 @@ class ViewLiftIE(ViewLiftBaseIE): if category.get('title')] break else: - title = self._search_regex( - r'itemprop="title">([^<]+)<', webpage, 'title') + title = self._html_search_regex( + (r'itemprop="title">([^<]+)<', + r'(?s)itemprop="title">(.+?)<div'), webpage, 'title') description = self._html_search_regex( r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>', webpage, 'description', default=None) or self._og_search_description(webpage) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f57ed2288..8b6dc0e24 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -403,8 +403,17 @@ class VKIE(VKBaseIE): data = self._parse_json( self._search_regex( r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page, - 'player params'), - video_id)['params'][0] + 'player params', default='{}'), + video_id) + if data: + data = data['params'][0] + + # <!--{...} + if not data: + data = self._parse_json( + self._search_regex( + r'<!--\s*({.+})', info_page, 'payload'), + video_id)['payload'][-1][-1]['player']['params'][0] title = unescapeHTML(data['md_title']) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py deleted file mode 100644 index ea234e3c5..000000000 --- a/youtube_dl/extractor/wimp.py +++ /dev/null @@ -1,54 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .youtube import YoutubeIE - - -class WimpIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'http://www.wimp.com/maru-is-exhausted/', - 'md5': 'ee21217ffd66d058e8b16be340b74883', - 'info_dict': { - 'id': 'maru-is-exhausted', - 'ext': 'mp4', - 'title': 'Maru is exhausted.', - 'description': 'md5:57e099e857c0a4ea312542b684a869b8', - } - }, { - 'url': 'http://www.wimp.com/clowncar/', - 'md5': '5c31ad862a90dc5b1f023956faec13fe', - 'info_dict': { - 'id': 'cG4CEr2aiSg', - 'ext': 'webm', - 'title': 'Basset hound clown car...incredible!', - 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom', - 'upload_date': '20140303', - 'uploader': 'Gretchen Hoey', - 'uploader_id': 'gretchenandjeff1', - }, - 'add_ie': ['Youtube'], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - youtube_id = self._search_regex( - (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']", - r'data-id=["\']([0-9A-Za-z_-]{11})'), - webpage, 'video URL', default=None) - if youtube_id: - return self.url_result(youtube_id, YoutubeIE.ie_key()) - - info_dict = self._extract_jwplayer_data( - webpage, video_id, require_title=False) - - info_dict.update({ - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - }) - - return info_dict diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index d268372e6..a5b94d279 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -8,6 +9,7 @@ from ..utils import ( clean_html, determine_ext, dict_get, + extract_attributes, ExtractorError, int_or_none, parse_duration, @@ -18,21 +20,21 @@ from ..utils import ( class XHamsterIE(InfoExtractor): + _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)' _VALID_URL = r'''(?x) https?:// - (?:.+?\.)?xhamster\.(?:com|one)/ + (?:.+?\.)?%s/ (?: movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html| videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+) ) - ''' - + ''' % _DOMAINS _TESTS = [{ - 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', - 'md5': '8281348b8d3c53d39fffb377d24eac4e', + 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'md5': '98b4687efb1ffd331c4197854dc09e8f', 'info_dict': { 'id': '1509445', - 'display_id': 'femaleagent_shy_beauty_takes_the_bait', + 'display_id': 'femaleagent-shy-beauty-takes-the-bait', 'ext': 'mp4', 'title': 'FemaleAgent Shy beauty takes the bait', 'timestamp': 1350194821, @@ -40,13 +42,12 @@ class XHamsterIE(InfoExtractor): 'uploader': 'Ruseful2011', 'duration': 893, 'age_limit': 18, - 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'], }, }, { - 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=', 'info_dict': { 'id': '2221348', - 'display_id': 'britney_spears_sexy_booty', + 'display_id': 'britney-spears-sexy-booty', 'ext': 'mp4', 'title': 'Britney Spears Sexy Booty', 'timestamp': 1379123460, @@ -54,13 +55,12 @@ class XHamsterIE(InfoExtractor): 'uploader': 'jojo747400', 'duration': 200, 'age_limit': 18, - 'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'], }, 'params': { 'skip_download': True, }, }, { - # empty seo + # empty seo, unavailable via new URL schema 'url': 'http://xhamster.com/movies/5667973/.html', 'info_dict': { 'id': '5667973', @@ -71,7 +71,6 @@ class XHamsterIE(InfoExtractor): 'uploader': 'parejafree', 'duration': 72, 'age_limit': 18, - 'categories': ['Amateur', 'Blowjobs'], }, 'params': { 'skip_download': True, @@ -94,6 +93,18 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'only_matching': True, + }, { + 'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, + }, { + 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, + }, { + 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', + 'only_matching': True, + }, { + 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + 'only_matching': True, }] def _real_extract(self, url): @@ -285,7 +296,7 @@ class XHamsterIE(InfoExtractor): class XHamsterEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)' + _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS _TEST = { 'url': 'http://xhamster.com/xembed.php?video=3328539', 'info_dict': { @@ -322,3 +333,49 @@ class XHamsterEmbedIE(InfoExtractor): video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl')) return self.url_result(video_url, 'XHamster') + + +class XHamsterUserIE(InfoExtractor): + _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS + _TESTS = [{ + # Paginated user profile + 'url': 'https://xhamster.com/users/netvideogirls/videos', + 'info_dict': { + 'id': 'netvideogirls', + }, + 'playlist_mincount': 267, + }, { + # Non-paginated user profile + 'url': 'https://xhamster.com/users/firatkaan/videos', + 'info_dict': { + 'id': 'firatkaan', + }, + 'playlist_mincount': 1, + }] + + def _entries(self, user_id): + next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id + for pagenum in itertools.count(1): + page = self._download_webpage( + next_page_url, user_id, 'Downloading page %s' % pagenum) + for video_tag in re.findall( + r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)', + page): + video = extract_attributes(video_tag) + video_url = url_or_none(video.get('href')) + if not video_url or not XHamsterIE.suitable(video_url): + continue + video_id = XHamsterIE._match_id(video_url) + yield self.url_result( + video_url, ie=XHamsterIE.ie_key(), video_id=video_id) + mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page) + if not mobj: + break + next_page = extract_attributes(mobj.group(0)) + next_page_url = url_or_none(next_page.get('href')) + if not next_page_url: + break + + def _real_extract(self, url): + user_id = self._match_id(url) + return self.playlist_result(self._entries(user_id), user_id) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 166bcf443..8fc64914c 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -17,7 +17,8 @@ class XVideosIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?xvideos\.com/video| + (?:[^/]+\.)?xvideos2?\.com/video| + (?:www\.)?xvideos\.es/video| flashservice\.xvideos\.com/embedframe/| static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= ) @@ -39,6 +40,42 @@ class XVideosIE(InfoExtractor): }, { 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'only_matching': True, + }, { + 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True + }, { + 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl', + 'only_matching': True }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 25d056b3c..5e397324b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -41,7 +41,6 @@ from ..utils import ( orderedSet, parse_codecs, parse_duration, - qualities, remove_quotes, remove_start, smuggle_url, @@ -384,13 +383,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:www|no)\.)?invidiou\.sh/| (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:www\.)?invidious\.kabi\.tk/| - (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| (?:www\.)?invidious\.nixnet\.xyz/| + (?:www\.)?invidious\.drycat\.fr/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| (?:www\.)?yt\.elukerio\.org/| + (?:www\.)?yt\.lelux\.fi/| + (?:www\.)?kgg2m7yk5aybusll\.onion/| + (?:www\.)?qklhadlycap4cnod\.onion/| + (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| + (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/| + (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/| + (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/| + (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1909,6 +1916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return int_or_none(self._search_regex( r'\bclen[=/](\d+)', media_url, 'filesize', default=None)) + streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or [] + streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or []) + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() formats = [{ @@ -1917,10 +1927,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': video_info['conn'][0], 'player_url': player_url, }] - elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): + elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True) + formats = [] formats_spec = {} fmt_list = video_info.get('fmt_list', [''])[0] if fmt_list: @@ -1934,91 +1945,104 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'width': int_or_none(width_height[0]), 'height': int_or_none(width_height[1]), } - q = qualities(['small', 'medium', 'hd720']) - streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) - if streaming_formats: - for fmt in streaming_formats: - itag = str_or_none(fmt.get('itag')) - if not itag: - continue - quality = fmt.get('quality') - quality_label = fmt.get('qualityLabel') or quality - formats_spec[itag] = { - 'asr': int_or_none(fmt.get('audioSampleRate')), - 'filesize': int_or_none(fmt.get('contentLength')), - 'format_note': quality_label, - 'fps': int_or_none(fmt.get('fps')), - 'height': int_or_none(fmt.get('height')), - 'quality': q(quality), - # bitrate for itag 43 is always 2147483647 - 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None, - 'width': int_or_none(fmt.get('width')), - } - formats = [] - for url_data_str in encoded_url_map.split(','): - url_data = compat_parse_qs(url_data_str) - if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'): + for fmt in streaming_formats: + itag = str_or_none(fmt.get('itag')) + if not itag: continue + quality = fmt.get('quality') + quality_label = fmt.get('qualityLabel') or quality + formats_spec[itag] = { + 'asr': int_or_none(fmt.get('audioSampleRate')), + 'filesize': int_or_none(fmt.get('contentLength')), + 'format_note': quality_label, + 'fps': int_or_none(fmt.get('fps')), + 'height': int_or_none(fmt.get('height')), + # bitrate for itag 43 is always 2147483647 + 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None, + 'width': int_or_none(fmt.get('width')), + } + + for fmt in streaming_formats: + if fmt.get('drm_families'): + continue + url = url_or_none(fmt.get('url')) + + if not url: + cipher = fmt.get('cipher') + if not cipher: + continue + url_data = compat_parse_qs(cipher) + url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str)) + if not url: + continue + else: + cipher = None + url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) # Unsupported FORMAT_STREAM_TYPE_OTF if stream_type == 3: continue - format_id = url_data['itag'][0] - url = url_data['url'][0] - if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): - ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' - jsplayer_url_json = self._search_regex( - ASSETS_RE, - embed_webpage if age_gate else video_webpage, - 'JS player URL (1)', default=None) - if not jsplayer_url_json and not age_gate: - # We need the embed website after all - if embed_webpage is None: - embed_url = proto + '://www.youtube.com/embed/%s' % video_id - embed_webpage = self._download_webpage( - embed_url, video_id, 'Downloading embed webpage') + format_id = fmt.get('itag') or url_data['itag'][0] + if not format_id: + continue + format_id = compat_str(format_id) + + if cipher: + if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): + ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' jsplayer_url_json = self._search_regex( - ASSETS_RE, embed_webpage, 'JS player URL') + ASSETS_RE, + embed_webpage if age_gate else video_webpage, + 'JS player URL (1)', default=None) + if not jsplayer_url_json and not age_gate: + # We need the embed website after all + if embed_webpage is None: + embed_url = proto + '://www.youtube.com/embed/%s' % video_id + embed_webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed webpage') + jsplayer_url_json = self._search_regex( + ASSETS_RE, embed_webpage, 'JS player URL') - player_url = json.loads(jsplayer_url_json) - if player_url is None: - player_url_json = self._search_regex( - r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', - video_webpage, 'age gate player URL') - player_url = json.loads(player_url_json) - - if 'sig' in url_data: - url += '&signature=' + url_data['sig'][0] - elif 's' in url_data: - encrypted_sig = url_data['s'][0] - - if self._downloader.params.get('verbose'): + player_url = json.loads(jsplayer_url_json) if player_url is None: - player_version = 'unknown' - player_desc = 'unknown' - else: - if player_url.endswith('swf'): - player_version = self._search_regex( - r'-(.+?)(?:/watch_as3)?\.swf$', player_url, - 'flash player', fatal=False) - player_desc = 'flash player %s' % player_version + player_url_json = self._search_regex( + r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', + video_webpage, 'age gate player URL') + player_url = json.loads(player_url_json) + + if 'sig' in url_data: + url += '&signature=' + url_data['sig'][0] + elif 's' in url_data: + encrypted_sig = url_data['s'][0] + + if self._downloader.params.get('verbose'): + if player_url is None: + player_version = 'unknown' + player_desc = 'unknown' else: - player_version = self._search_regex( - [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', - r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], - player_url, - 'html5 player', fatal=False) - player_desc = 'html5 player %s' % player_version + if player_url.endswith('swf'): + player_version = self._search_regex( + r'-(.+?)(?:/watch_as3)?\.swf$', player_url, + 'flash player', fatal=False) + player_desc = 'flash player %s' % player_version + else: + player_version = self._search_regex( + [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', + r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], + player_url, + 'html5 player', fatal=False) + player_desc = 'html5 player %s' % player_version - parts_sizes = self._signature_cache_id(encrypted_sig) - self.to_screen('{%s} signature length %s, %s' % - (format_id, parts_sizes, player_desc)) + parts_sizes = self._signature_cache_id(encrypted_sig) + self.to_screen('{%s} signature length %s, %s' % + (format_id, parts_sizes, player_desc)) - signature = self._decrypt_signature( - encrypted_sig, video_id, player_url, age_gate) - sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature' - url += '&%s=%s' % (sp, signature) + signature = self._decrypt_signature( + encrypted_sig, video_id, player_url, age_gate) + sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature' + url += '&%s=%s' % (sp, signature) if 'ratebypass' not in url: url += '&ratebypass=yes' @@ -2038,24 +2062,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) + if width is None: + width = int_or_none(fmt.get('width')) + if height is None: + height = int_or_none(fmt.get('height')) + filesize = int_or_none(url_data.get( 'clen', [None])[0]) or _extract_filesize(url) - quality = url_data.get('quality', [None])[0] + quality = url_data.get('quality', [None])[0] or fmt.get('quality') + quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel') + + tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000) + or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None + fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps')) more_fields = { 'filesize': filesize, - 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000), + 'tbr': tbr, 'width': width, 'height': height, - 'fps': int_or_none(url_data.get('fps', [None])[0]), - 'format_note': url_data.get('quality_label', [None])[0] or quality, - 'quality': q(quality), + 'fps': fps, + 'format_note': quality_label or quality, } for key, value in more_fields.items(): if value: dct[key] = value - type_ = url_data.get('type', [None])[0] + type_ = url_data.get('type', [None])[0] or fmt.get('mimeType') if type_: type_split = type_.split(';') kind_ext = type_split[0].split('/') @@ -2709,7 +2742,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): page, 'title', default=None) _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref=' - uploader = self._search_regex( + uploader = self._html_search_regex( r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE, page, 'uploader', default=None) mobj = re.search( diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index afa3f6c47..145c123a4 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -41,6 +41,7 @@ class ZDFBaseIE(InfoExtractor): class ZDFIE(ZDFBaseIE): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html' _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') + _GEO_COUNTRIES = ['DE'] _TESTS = [{ 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b53a08cae..c3eafb068 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.08.13' +__version__ = '2019.09.28'