diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0decf19a1..5469c73cf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.18*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.07*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.03.18 +[debug] youtube-dl version 2019.04.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d0e3a6088..421f247fd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +version 2019.04.07 + +Core ++ [downloader/external] Pass rtmp_conn to ffmpeg + +Extractors ++ [ruutu] Add support for audio podcasts (#20473, #20545) ++ [xvideos] Extract all thumbnails (#20432) ++ [platzi] Add support for platzi.com (#20562) +* [dvtv] Fix extraction (#18514, #19174) ++ [vrv] Add basic support for individual movie links (#19229) ++ [bfi:player] Add support for player.bfi.org.uk (#19235) +* [hbo] Fix extraction and extract subtitles (#14629, #13709) +* [youtube] Extract srv[1-3] subtitle formats (#20566) +* [adultswim] Fix extraction (#18025) +* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339) +* [adn] Fix subtitle compatibility with ffmpeg +* [adn] Fix extraction and add support for positioning styles (#20549) +* [vk] Use unique video id (#17848) +* [newstube] Fix extraction +* [rtl2] Actualize extraction ++ [adobeconnect] Add support for adobeconnect.com (#20283) ++ [gaia] Add support for authentication (#14605) ++ [mediasite] Add support for dashed ids and named catalogs (#20531) + + +version 2019.04.01 + +Core +* [utils] Improve int_or_none and float_or_none (#20403) +* Check for valid --min-sleep-interval when --max-sleep-interval is specified + (#20435) + +Extractors ++ [weibo] Extend URL regular expression (#20496) ++ [xhamster] Add support for xhamster.one (#20508) ++ [mediasite] Add support for catalogs (#20507) ++ [teamtreehouse] Add support for teamtreehouse.com (#9836) ++ [ina] Add support for audio URLs +* [ina] Improve extraction +* [cwtv] Fix episode number extraction (#20461) +* [npo] Improve DRM detection ++ [pornhub] Add support for DASH formats (#20403) +* [svtplay] Update API endpoint (#20430) + + version 2019.03.18 Core diff --git a/README.md b/README.md index e476045b2..92c3a92a1 100644 --- a/README.md +++ b/README.md @@ -642,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can also use special names to select particular edge case formats: + - `best`: Select the best quality format represented by a single file with video and audio. - `worst`: Select the worst quality format represented by a single file with video and audio. - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. @@ -658,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + - `filesize`: The number of bytes, if known in advance - `width`: Width of the video, if known - `height`: Height of the video, if known @@ -668,6 +670,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `fps`: Frame rate Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: + - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3d4447a8..df272c479 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **acast:channel** - **AddAnime** - **ADN**: Anime Digital Network + - **AdobeConnect** - **AdobeTV** - **AdobeTVChannel** - **AdobeTVShow** @@ -101,6 +102,7 @@ - **Bellator** - **BellMedia** - **Bet** + - **bfi:player** - **Bigflix** - **Bild**: Bild.de - **BiliBili** @@ -345,7 +347,6 @@ - **Groupon** - **Hark** - **hbo** - - **hbo:episode** - **HearThisAt** - **Heise** - **HellPorno** @@ -488,6 +489,8 @@ - **Medialaan** - **Mediaset** - **Mediasite** + - **MediasiteCatalog** + - **MediasiteNamedCatalog** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 @@ -670,6 +673,8 @@ - **Piksel** - **Pinkbike** - **Pladform** + - **Platzi** + - **PlatziCourse** - **play.fm** - **PlayPlusTV** - **PlaysTV** @@ -869,6 +874,7 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** + - **TeamTreeHouse** - **TechTalks** - **techtv.mit.edu** - **ted** diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3b92acd97..57f52f888 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -309,6 +309,8 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, otherwise prefer ffmpeg. + ffmpeg_location: Location of the ffmpeg/avconv binary; either the path + to the binary or its containing directory. postprocessor_args: A list of additional command-line arguments for the postprocessor. diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 5f73f7f0f..acdb27712 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -289,6 +289,7 @@ class FFmpegFD(ExternalFD): tc_url = info_dict.get('tc_url') flash_version = info_dict.get('flash_version') live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') if player_url is not None: args += ['-rtmp_swfverify', player_url] if page_url is not None: @@ -303,6 +304,11 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_flashver', flash_version] if live: args += ['-rtmp_live', 'live'] + if isinstance(conn, list): + for entry in conn: + args += ['-rtmp_conn', entry] + elif isinstance(conn, compat_str): + args += ['-rtmp_conn', conn] args += ['-i', url, '-c', 'copy'] diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 1eb99c39a..1e04a55a6 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -21,7 +21,6 @@ from ..utils import ( intlist_to_bytes, long_to_bytes, pkcs1pad, - srt_subtitles_timecode, strip_or_none, urljoin, ) @@ -42,6 +41,18 @@ class ADNIE(InfoExtractor): } _BASE_URL = 'http://animedigitalnetwork.fr' _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) + _POS_ALIGN_MAP = { + 'start': 1, + 'end': 3, + } + _LINE_ALIGN_MAP = { + 'middle': 8, + 'end': 4, + } + + @staticmethod + def _ass_subtitles_timecode(seconds): + return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) def _get_subtitles(self, sub_path, video_id): if not sub_path: @@ -49,14 +60,14 @@ class ADNIE(InfoExtractor): enc_subtitles = self._download_webpage( urljoin(self._BASE_URL, sub_path), - video_id, fatal=False) + video_id, 'Downloading subtitles data', fatal=False) if not enc_subtitles: return None # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), - bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')), + bytes_to_intlist(binascii.unhexlify(self._K + '083db5aebd9353b4')), bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( @@ -67,23 +78,27 @@ class ADNIE(InfoExtractor): subtitles = {} for sub_lang, sub in subtitles_json.items(): - srt = '' - for num, current in enumerate(sub): - start, end, text = ( + ssa = '''[Script Info] +ScriptType:V4.00 +[V4 Styles] +Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding +Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0 +[Events] +Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' + for current in sub: + start, end, text, line_align, position_align = ( float_or_none(current.get('startTime')), float_or_none(current.get('endTime')), - current.get('text')) + current.get('text'), current.get('lineAlign'), + current.get('positionAlign')) if start is None or end is None or text is None: continue - srt += os.linesep.join( - ( - '%d' % num, - '%s --> %s' % ( - srt_subtitles_timecode(start), - srt_subtitles_timecode(end)), - text, - os.linesep, - )) + alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) + ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( + self._ass_subtitles_timecode(start), + self._ass_subtitles_timecode(end), + '{\\a%d}' % alignment if alignment != 2 else '', + text.replace('\n', '\\N').replace('', '{\\i1}').replace('', '{\\i0}')) if sub_lang == 'vostf': sub_lang = 'fr' @@ -91,8 +106,8 @@ class ADNIE(InfoExtractor): 'ext': 'json', 'data': json.dumps(sub), }, { - 'ext': 'srt', - 'data': srt, + 'ext': 'ssa', + 'data': ssa, }]) return subtitles @@ -100,7 +115,15 @@ class ADNIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_config = self._parse_json(self._search_regex( - r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) + r'playerConfig\s*=\s*({.+});', webpage, + 'player config', default='{}'), video_id, fatal=False) + if not player_config: + config_url = urljoin(self._BASE_URL, self._search_regex( + r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', + webpage, 'config url')) + player_config = self._download_json( + config_url, video_id, + 'Downloading player config JSON metadata')['player'] video_info = {} video_info_str = self._search_regex( @@ -129,12 +152,15 @@ class ADNIE(InfoExtractor): encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() links_data = self._download_json( - urljoin(self._BASE_URL, links_url), video_id, headers={ + urljoin(self._BASE_URL, links_url), video_id, + 'Downloading links JSON metadata', headers={ 'Authorization': 'Bearer ' + authorization, }) links = links_data.get('links') or {} metas = metas or links_data.get('meta') or {} - sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token + sub_path = sub_path or links_data.get('subtitles') or \ + 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id + sub_path += '&token=' + token error = links_data.get('error') title = metas.get('title') or video_info['title'] @@ -142,9 +168,11 @@ class ADNIE(InfoExtractor): for format_id, qualities in links.items(): if not isinstance(qualities, dict): continue - for load_balancer_url in qualities.values(): + for quality, load_balancer_url in qualities.items(): load_balancer_data = self._download_json( - load_balancer_url, video_id, fatal=False) or {} + load_balancer_url, video_id, + 'Downloading %s %s JSON metadata' % (format_id, quality), + fatal=False) or {} m3u8_url = load_balancer_data.get('location') if not m3u8_url: continue diff --git a/youtube_dl/extractor/adobeconnect.py b/youtube_dl/extractor/adobeconnect.py new file mode 100644 index 000000000..728549eb9 --- /dev/null +++ b/youtube_dl/extractor/adobeconnect.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class AdobeConnectIE(InfoExtractor): + _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P[\w-]+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(.+?)', webpage, 'title') + qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) + is_live = qs.get('isLive', ['false'])[0] == 'true' + formats = [] + for con_string in qs['conStrings'][0].split(','): + formats.append({ + 'format_id': con_string.split('://')[0], + 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), + 'ext': 'flv', + 'play_path': 'mp4:' + qs['streamName'][0], + 'rtmp_conn': 'S:' + qs['ticket'][0], + 'rtmp_live': is_live, + 'url': con_string, + }) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'formats': formats, + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 88c96a950..8d1d9ac7d 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -1,13 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .turner import TurnerBaseIE from ..utils import ( + determine_ext, + float_or_none, int_or_none, + mimetype2ext, + parse_age_limit, + parse_iso8601, strip_or_none, - url_or_none, + try_get, ) @@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Rick and Morty - Pilot', 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', - 'timestamp': 1493267400, - 'upload_date': '20170427', + 'timestamp': 1543294800, + 'upload_date': '20181127', }, 'params': { # m3u8 download @@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }, { 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'info_dict': { @@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE): }, { 'url': 'http://www.adultswim.com/videos/attack-on-titan', 'info_dict': { - 'id': 'b7A69dzfRzuaXIECdxW8XQ', + 'id': 'attack-on-titan', 'title': 'Attack on Titan', - 'description': 'md5:6c8e003ea0777b47013e894767f5e114', + 'description': 'md5:41caa9416906d90711e31dc00cb7db7e', }, 'playlist_mincount': 12, }, { @@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', }] def _real_extract(self, url): show_path, episode_path = re.match(self._VALID_URL, url).groups() display_id = episode_path or show_path - webpage = self._download_webpage(url, display_id) - initial_data = self._parse_json(self._search_regex( - r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});', - webpage, 'initial data'), display_id) - - is_stream = show_path == 'streams' - if is_stream: - if not episode_path: - episode_path = 'live-stream' - - video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path) - video_id = video_data.get('stream') - - if not video_id: - entries = [] - for episode in video_data.get('archiveEpisodes', []): - episode_url = url_or_none(episode.get('url')) - if not episode_url: - continue - entries.append(self.url_result( - episode_url, 'AdultSwim', episode.get('id'))) - return self.playlist_result( - entries, video_data.get('id'), video_data.get('title'), - strip_or_none(video_data.get('description'))) + query = '''query { + getShowBySlug(slug:"%s") { + %%s + } +}''' % show_path + if episode_path: + query = query % '''title + getVideoBySlug(slug:"%s") { + _id + auth + description + duration + episodeNumber + launchDate + mediaID + seasonNumber + poster + title + tvRating + }''' % episode_path + ['getVideoBySlug'] else: - show_data = initial_data['show'] + query = query % '''metaDescription + title + videos(first:1000,sort:["episode_number"]) { + edges { + node { + _id + slug + } + } + }''' + show_data = self._download_json( + 'https://www.adultswim.com/api/search', display_id, + data=json.dumps({'query': query}).encode(), + headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] + if episode_path: + video_data = show_data['getVideoBySlug'] + video_id = video_data['_id'] + episode_title = title = video_data['title'] + series = show_data.get('title') + if series: + title = '%s - %s' % (series, title) + info = { + 'id': video_id, + 'title': title, + 'description': strip_or_none(video_data.get('description')), + 'duration': float_or_none(video_data.get('duration')), + 'formats': [], + 'subtitles': {}, + 'age_limit': parse_age_limit(video_data.get('tvRating')), + 'thumbnail': video_data.get('poster'), + 'timestamp': parse_iso8601(video_data.get('launchDate')), + 'series': series, + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode': episode_title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + } - if not episode_path: - entries = [] - for video in show_data.get('videos', []): - slug = video.get('slug') - if not slug: + auth = video_data.get('auth') + media_id = video_data.get('mediaID') + if media_id: + info.update(self._extract_ngtv_info(media_id, { + # CDN_TOKEN_APP_ID from: + # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js + 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': auth, + })) + + if not auth: + extract_data = self._download_json( + 'https://www.adultswim.com/api/shows/v1/videos/' + video_id, + video_id, query={'fields': 'stream'}, fatal=False) or {} + assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] + for asset in assets: + asset_url = asset.get('url') + if not asset_url: continue - entries.append(self.url_result( - 'http://adultswim.com/videos/%s/%s' % (show_path, slug), - 'AdultSwim', video.get('id'))) - return self.playlist_result( - entries, show_data.get('id'), show_data.get('title'), - strip_or_none(show_data.get('metadata', {}).get('description'))) + ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) + if ext == 'm3u8': + info['formats'].extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + continue + # info['formats'].extend(self._extract_f4m_formats( + # asset_url, video_id, f4m_id='hds', fatal=False)) + elif ext in ('scc', 'ttml', 'vtt'): + info['subtitles'].setdefault('en', []).append({ + 'url': asset_url, + }) + self._sort_formats(info['formats']) - video_data = show_data['sluggedVideo'] - video_id = video_data['id'] - - info = self._extract_cvp_info( - 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id, - video_id, { - 'secure': { - 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', - 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', - }, - }, { - 'url': url, - 'site_name': 'AdultSwim', - 'auth_required': video_data.get('auth'), - }) - - info.update({ - 'id': video_id, - 'display_id': display_id, - 'description': info.get('description') or strip_or_none(video_data.get('description')), - }) - if not is_stream: - info.update({ - 'duration': info.get('duration') or int_or_none(video_data.get('duration')), - 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')), - 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')), - 'episode': info['title'], - 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')), - }) - - info['series'] = video_data.get('collection_title') or info.get('series') - if info['series'] and info['series'] != info['title']: - info['title'] = '%s - %s' % (info['series'], info['title']) - - return info + return info + else: + entries = [] + for edge in show_data.get('videos', {}).get('edges', []): + video = edge.get('node') or {} + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('_id'))) + return self.playlist_result( + entries, show_path, show_data.get('title'), + strip_or_none(show_data.get('metaDescription'))) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index cb9279193..dffa9733d 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, int_or_none, @@ -12,12 +16,12 @@ from ..utils import ( class AolIE(InfoExtractor): - IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P[^/?#&]+)' + IE_NAME = 'aol.com' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.com/video/(?:[^/]+/)*)(?P[0-9a-f]+)' _TESTS = [{ # video with 5min ID - 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', + 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { 'id': '518167793', @@ -34,7 +38,7 @@ class AolIE(InfoExtractor): } }, { # video with vidible ID - 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', + 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', 'ext': 'mp4', @@ -49,17 +53,17 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { - 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, }, { - 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', - 'only_matching': True, - }, { - 'url': 'http://on.aol.com/video/519442220', + 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', 'only_matching': True, }, { 'url': 'aol-video:5707d6b8e4b090497b04f706', 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', + 'only_matching': True, }] def _real_extract(self, url): @@ -73,7 +77,7 @@ class AolIE(InfoExtractor): video_data = response['data'] formats = [] - m3u8_url = video_data.get('videoMasterPlaylist') + m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) if m3u8_url: formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) @@ -96,6 +100,12 @@ class AolIE(InfoExtractor): 'width': int(mobj.group(1)), 'height': int(mobj.group(2)), }) + else: + qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + f.update({ + 'width': int_or_none(qs.get('w', [None])[0]), + 'height': int_or_none(qs.get('h', [None])[0]), + }) formats.append(f) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py new file mode 100644 index 000000000..60c8944b5 --- /dev/null +++ b/youtube_dl/extractor/bfi.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class BFIPlayerIE(InfoExtractor): + IE_NAME = 'bfi:player' + _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P[\w-]+)-online' + _TEST = { + 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', + 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', + 'info_dict': { + 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', + 'ext': 'mp4', + 'title': 'Computer Doctor', + 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', + }, + 'skip': 'BFI Player films cannot be played outside of the UK', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + entries = [] + for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): + player_attr = extract_attributes(player_el) + ooyala_id = player_attr.get('data-video-id') + if not ooyala_id: + continue + entries.append(self.url_result( + 'ooyala:' + ooyala_id, 'Ooyala', + ooyala_id, player_attr.get('data-label'))) + return self.playlist_result(entries) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index 4c5c6be10..3707dc97f 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -28,7 +28,7 @@ class BIQLEIE(InfoExtractor): 'url': 'http://biqle.org/watch/-44781847_168547604', 'md5': '7f24e72af1db0edf7c1aaba513174f97', 'info_dict': { - 'id': '168547604', + 'id': '-44781847_168547604', 'ext': 'mp4', 'title': 'Ребенок в шоке от автоматической мойки', 'timestamp': 1396633454, diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index 20996962a..de7f6d670 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -10,16 +10,16 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + try_get, unescapeHTML, + parse_iso8601, ) class DVTVIE(InfoExtractor): IE_NAME = 'dvtv' IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P[0-9a-f]{32})' - _TESTS = [{ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'md5': '67cb83e4a955d36e1b5d31993134a0c2', @@ -28,11 +28,13 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', 'duration': 1484, + 'upload_date': '20141217', + 'timestamp': 1418792400, } }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { - 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'id': '973eb3bc854e11e498be002590604f2e', }, 'playlist': [{ @@ -84,6 +86,8 @@ class DVTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta', 'duration': 1103, + 'upload_date': '20170511', + 'timestamp': 1494514200, }, 'params': { 'skip_download': True, @@ -91,43 +95,59 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'only_matching': True, + }, { + # Test live stream video (liveStarter) parsing + 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/', + 'md5': '2e552e483f2414851ca50467054f9d5d', + 'info_dict': { + 'id': '8d116360288011e98c840cc47ab5f122', + 'ext': 'mp4', + 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu', + 'upload_date': '20190204', + 'timestamp': 1549289591, + }, + 'params': { + # Video content is no longer available + 'skip_download': True, + }, }] - def _parse_video_metadata(self, js, video_id, live_js=None): + def _parse_video_metadata(self, js, video_id, timestamp): data = self._parse_json(js, video_id, transform_source=js_to_json) - if live_js: - data.update(self._parse_json( - live_js, video_id, transform_source=js_to_json)) - title = unescapeHTML(data['title']) + live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict) + if live_starter: + data.update(live_starter) + formats = [] - for video in data['sources']: - video_url = video.get('file') - if not video_url: - continue - video_type = video.get('type') - ext = determine_ext(video_url, mimetype2ext(video_type)) - if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif video_type == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - else: - label = video.get('label') - height = self._search_regex( - r'^(\d+)[pP]', label or '', 'height', default=None) - format_id = ['http'] - for f in (ext, label): - if f: - format_id.append(f) - formats.append({ - 'url': video_url, - 'format_id': '-'.join(format_id), - 'height': int_or_none(height), - }) + for tracks in data.get('tracks', {}).values(): + for video in tracks: + video_url = video.get('src') + if not video_url: + continue + video_type = video.get('type') + ext = determine_ext(video_url, mimetype2ext(video_type)) + if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif video_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + else: + label = video.get('label') + height = self._search_regex( + r'^(\d+)[pP]', label or '', 'height', default=None) + format_id = ['http'] + for f in (ext, label): + if f: + format_id.append(f) + formats.append({ + 'url': video_url, + 'format_id': '-'.join(format_id), + 'height': int_or_none(height), + }) self._sort_formats(formats) return { @@ -136,41 +156,29 @@ class DVTVIE(InfoExtractor): 'description': data.get('description'), 'thumbnail': data.get('image'), 'duration': int_or_none(data.get('duration')), - 'timestamp': int_or_none(data.get('pubtime')), + 'timestamp': int_or_none(timestamp), 'formats': formats } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + timestamp = parse_iso8601(self._html_search_meta( + 'article:published_time', webpage, 'published time', default=None)) - # live content - live_item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - # single video - item = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', - webpage, 'video', default=None) - - if item: - return self._parse_video_metadata(item, video_id, live_item) - - # playlist - items = re.findall( - r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);", - webpage) - if not items: - items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage) - + items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) if items: - return { - '_type': 'playlist', - 'id': video_id, - 'title': self._og_search_title(webpage), - 'entries': [self._parse_video_metadata(i, video_id) for i in items] - } + return self.playlist_result( + [self._parse_video_metadata(i, video_id, timestamp) for i in items], + video_id, self._html_search_meta('twitter:title', webpage)) + + item = self._search_regex( + r'(?s)BBXPlayer\.setup\((.+?)\);', + webpage, 'video', default=None) + if item: + # remove function calls (ex. htmldeentitize) + # TODO this should be fixed in a general way in the js_to_json + item = re.sub(r'\w+?\((.+)\)', r'\1', item) + return self._parse_video_metadata(item, video_id, timestamp) raise ExtractorError('Could not find neither video nor playlist') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a1a0f9cd5..cc19af5c4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -20,6 +20,7 @@ from .acast import ( ) from .addanime import AddAnimeIE from .adn import ADNIE +from .adobeconnect import AdobeConnectIE from .adobetv import ( AdobeTVIE, AdobeTVShowIE, @@ -106,6 +107,7 @@ from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE from .beatport import BeatportIE from .bet import BetIE +from .bfi import BFIPlayerIE from .bigflix import BigflixIE from .bild import BildIE from .bilibili import ( @@ -440,10 +442,7 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import ( - HBOIE, - HBOEpisodeIE, -) +from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE @@ -635,6 +634,7 @@ from .mediaset import MediasetIE from .mediasite import ( MediasiteIE, MediasiteCatalogIE, + MediasiteNamedCatalogIE, ) from .medici import MediciIE from .megaphone import MegaphoneIE @@ -868,6 +868,10 @@ from .picarto import ( from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE @@ -1089,6 +1093,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stv import STVPlayerIE from .sunporno import SunPornoIE from .svt import ( SVTIE, diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py index f2eef3f4c..e9527758f 100644 --- a/youtube_dl/extractor/gaia.py +++ b/youtube_dl/extractor/gaia.py @@ -4,12 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) from ..utils import ( + ExtractorError, int_or_none, str_or_none, strip_or_none, try_get, + urlencode_postdata, ) @@ -46,6 +51,29 @@ class GaiaIE(InfoExtractor): 'skip_download': True, }, }] + _NETRC_MACHINE = 'gaia' + _jwt = None + + def _real_initialize(self): + auth = self._get_cookies('https://www.gaia.com/').get('auth') + if auth: + auth = self._parse_json( + compat_urllib_parse_unquote(auth.value), + None, fatal=False) + if not auth: + username, password = self._get_login_info() + if username is None: + return + auth = self._download_json( + 'https://auth.gaia.com/v1/login', + None, data=urlencode_postdata({ + 'username': username, + 'password': password + })) + if auth.get('success') is False: + raise ExtractorError(', '.join(auth['messages']), expected=True) + if auth: + self._jwt = auth.get('jwt') def _real_extract(self, url): display_id, vtype = re.search(self._VALID_URL, url).groups() @@ -59,8 +87,12 @@ class GaiaIE(InfoExtractor): media_id = compat_str(vdata['nid']) title = node['title'] + headers = None + if self._jwt: + headers = {'Authorization': 'Bearer ' + self._jwt} media = self._download_json( - 'https://brooklyn.gaia.com/media/' + media_id, media_id) + 'https://brooklyn.gaia.com/media/' + media_id, + media_id, headers=headers) formats = self._extract_m3u8_formats( media['mediaUrls']['bcHLS'], media_id, 'mp4') self._sort_formats(formats) diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 859ad5429..44440233d 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -4,16 +4,28 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( xpath_text, xpath_element, int_or_none, parse_duration, + urljoin, ) -class HBOBaseIE(InfoExtractor): +class HBOIE(InfoExtractor): + IE_NAME = 'hbo' + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer', + 'md5': '8126210656f433c452a21367f9ad85b3', + 'info_dict': { + 'id': '22113301', + 'ext': 'mp4', + 'title': 'Game of Thrones - Trailer', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + } _FORMATS_INFO = { 'pro7': { 'width': 1280, @@ -53,10 +65,17 @@ class HBOBaseIE(InfoExtractor): }, } - def _extract_from_id(self, video_id): - video_data = self._download_xml( - 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) - title = xpath_text(video_data, 'title', 'title', True) + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + location_path = self._parse_json(self._html_search_regex( + r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl'] + video_data = self._download_xml(urljoin(url, location_path), display_id) + video_id = xpath_text(video_data, 'id', fatal=True) + episode_title = title = xpath_text(video_data, 'title', fatal=True) + series = xpath_text(video_data, 'program') + if series: + title = '%s - %s' % (series, title) formats = [] for source in xpath_element(video_data, 'videos', 'sources', True): @@ -128,68 +147,23 @@ class HBOBaseIE(InfoExtractor): 'width': width, }) + subtitles = None + caption_url = xpath_text(video_data, 'captionUrl') + if caption_url: + subtitles = { + 'en': [{ + 'url': caption_url, + 'ext': 'ttml' + }], + } + return { 'id': video_id, 'title': title, 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), + 'series': series, + 'episode': episode_title, 'formats': formats, 'thumbnails': thumbnails, + 'subtitles': subtitles, } - - -class HBOIE(HBOBaseIE): - IE_NAME = 'hbo' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 1072, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_from_id(video_id) - - -class HBOEpisodeIE(HBOBaseIE): - IE_NAME = 'hbo:episode' - _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P[0-9a-z-]+))(?:\.html)?' - - _TESTS = [{ - 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', - 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb', - 'info_dict': { - 'id': '1439518', - 'display_id': 'ep-52-inside-the-episode', - 'ext': 'mp4', - 'title': 'Ep. 52: Inside the Episode', - 'thumbnail': r're:https?://.*\.jpg$', - 'duration': 240, - }, - }, { - 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', - 'only_matching': True, - }, { - 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver', - 'only_matching': True, - }] - - def _real_extract(self, url): - path, display_id = re.match(self._VALID_URL, url).groups() - - content = self._download_json( - 'http://www.hbo.com/api/content/' + path, display_id)['content'] - - video_id = compat_str((content.get('parsed', {}).get( - 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId']) - - info_dict = self._extract_from_id(video_id) - info_dict['display_id'] = display_id - - return info_dict diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py index 5c9e49d90..694a264d6 100644 --- a/youtube_dl/extractor/mediasite.py +++ b/youtube_dl/extractor/mediasite.py @@ -22,7 +22,7 @@ from ..utils import ( ) -_ID_RE = r'[0-9a-f]{32,34}' +_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' class MediasiteIE(InfoExtractor): @@ -98,6 +98,11 @@ class MediasiteIE(InfoExtractor): 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d', 'only_matching': True, }, + { + # dashed id + 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d', + 'only_matching': True, + } ] # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) @@ -264,6 +269,10 @@ class MediasiteCatalogIE(InfoExtractor): }, { 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521', 'only_matching': True, + }, { + # dashed id + 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e', + 'only_matching': True, }] def _real_extract(self, url): @@ -333,3 +342,25 @@ class MediasiteCatalogIE(InfoExtractor): catalog, lambda x: x['CurrentFolder']['Name'], compat_str) return self.playlist_result(entries, catalog_id, title,) + + +class MediasiteNamedCatalogIE(InfoExtractor): + _VALID_URL = r'(?xi)(?Phttps?://[^/]+/Mediasite)/Catalog/catalogs/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + mediasite_url = mobj.group('url') + catalog_name = mobj.group('catalog_name') + + webpage = self._download_webpage(url, catalog_name) + + catalog_id = self._search_regex( + r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id') + + return self.url_result( + '%s/Catalog/Full/%s' % (mediasite_url, catalog_id), + ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index e3f35f1d8..dab4aec44 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -1,12 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import base64 +import hashlib from .common import InfoExtractor +from ..aes import aes_cbc_decrypt from ..utils import ( - ExtractorError, + bytes_to_intlist, int_or_none, + intlist_to_bytes, + parse_codecs, + parse_duration, ) @@ -14,7 +19,7 @@ class NewstubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P.+)' _TEST = { 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', - 'md5': '801eef0c2a9f4089fa04e4fe3533abdc', + 'md5': '9d10320ad473444352f72f746ccb8b8c', 'info_dict': { 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'ext': 'mp4', @@ -25,84 +30,45 @@ class NewstubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - page = self._download_webpage(url, video_id, 'Downloading page') + page = self._download_webpage(url, video_id) + title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True) video_guid = self._html_search_regex( - r'\d+)-[^/?\#&]+ + ''' + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in compat_str(urlh.geturl()): + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 18a327d81..70f000ca8 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -21,7 +21,7 @@ from ..utils import ( class RTL2IE(InfoExtractor): IE_NAME = 'rtl2' - _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P[^?#/]*?)(?:$|/(?:$|[?#]))' + _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P\d+)[^/]+/(?P\d+)-|folge/)(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'info_dict': { @@ -34,10 +34,11 @@ class RTL2IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'info_dict': { - 'id': '21040-anna-erwischt-alex', + 'id': 'anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' @@ -46,31 +47,29 @@ class RTL2IE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }] def _real_extract(self, url): - # Some rtl2 urls have no slash at the end, so append it. - if not url.endswith('/'): - url += '/' + vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups() + if not vico_id: + webpage = self._download_webpage(url, display_id) - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - mobj = re.search( - r']+data-collection="(?P\d+)"[^>]+data-video="(?P\d+)"', - webpage) - if mobj: - vico_id = mobj.group('vico_id') - vivi_id = mobj.group('vivi_id') - else: - vico_id = self._html_search_regex( - r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') - vivi_id = self._html_search_regex( - r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') + mobj = re.search( + r'data-collection="(?P\d+)"[^>]+data-video="(?P\d+)"', + webpage) + if mobj: + vico_id = mobj.group('vico_id') + vivi_id = mobj.group('vivi_id') + else: + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info = self._download_json( - 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php', - video_id, query={ + 'https://service.rtl2.de/api-player-vipo/video.php', + display_id, query={ 'vico_id': vico_id, 'vivi_id': vivi_id, }) @@ -89,7 +88,7 @@ class RTL2IE(InfoExtractor): 'format_id': 'rtmp', 'url': rtmp_url, 'play_path': stream_url, - 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', + 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', 'page_url': url, 'flash_version': 'LNX 11,2,202,429', 'rtmp_conn': rtmp_conn, @@ -99,12 +98,12 @@ class RTL2IE(InfoExtractor): m3u8_url = video_info.get('streamurl_hls') if m3u8_url: - formats.extend(self._extract_akamai_formats(m3u8_url, video_id)) + formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) self._sort_formats(formats) return { - 'id': video_id, + 'id': display_id, 'title': title, 'thumbnail': video_info.get('image'), 'description': video_info.get('beschreibung'), diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index f530f0083..f05401b36 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -59,6 +59,20 @@ class RuutuIE(InfoExtractor): 'url': 'http://www.ruutu.fi/video/3193728', 'only_matching': True, }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': ['HTTP Error 502: Bad Gateway'], + } ] def _real_extract(self, url): @@ -94,6 +108,12 @@ class RuutuIE(InfoExtractor): continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py new file mode 100644 index 000000000..ccb074cd4 --- /dev/null +++ b/youtube_dl/extractor/stv.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse +) +from ..utils import ( + extract_attributes, + float_or_none, + int_or_none, + str_or_none, +) + + +class STVPlayerIE(InfoExtractor): + IE_NAME = 'stv:player' + _VALID_URL = r'https?://player\.stv\.tv/(?Pepisode|video)/(?P[a-z0-9]{4})' + _TEST = { + 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', + 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'info_dict': { + 'id': '6016487034001', + 'ext': 'mp4', + 'upload_date': '20190321', + 'title': 'Interview with the cast ahead of new Victoria', + 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', + 'timestamp': 1553179628, + 'uploader_id': '1486976045', + }, + 'skip': 'this resource is unavailable outside of the UK', + } + _PUBLISHER_ID = '1486976045' + _PTYPE_MAP = { + 'episode': 'episodes', + 'video': 'shortform', + } + + def _real_extract(self, url): + ptype, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( + r'itemprop="embedURL"[^>]+href="([^"]+)', + webpage, 'embed URL', default=None)).query) + publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + + player_attr = extract_attributes(self._search_regex( + r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + + info = {} + duration = ref_id = series = video_id = None + api_ref_id = player_attr.get('data-player-api-refid') + if api_ref_id: + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), + api_ref_id, fatal=False) + if resp: + result = resp.get('results') or {} + video = result.get('video') or {} + video_id = str_or_none(video.get('id')) + ref_id = video.get('guid') + duration = video.get('length') + programme = result.get('programme') or {} + series = programme.get('name') or programme.get('shortName') + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) + info.update({ + 'description': result.get('summary'), + 'subtitles': subtitles, + 'view_count': int_or_none(result.get('views')), + }) + if not video_id: + video_id = qs.get('videoId', [None])[0] or self._search_regex( + r'([^/]+/)*[^/?#]+)' + _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P([^/]+/)*[^/?#]+)' _TESTS = [ { 'url': 'http://teamcoco.com/video/mary-kay-remote', @@ -79,15 +79,20 @@ class TeamcocoIE(TurnerBaseIE): }, { 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv', 'only_matching': True, + }, { + 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft', + 'only_matching': True, } ] def _graphql_call(self, query_template, object_type, object_id): find_object = 'find' + object_type return self._download_json( - 'http://teamcoco.com/graphql/', object_id, data=json.dumps({ + 'https://teamcoco.com/graphql', object_id, data=json.dumps({ 'query': query_template % (find_object, object_id) - }))['data'][find_object] + }).encode(), headers={ + 'Content-Type': 'application/json', + })['data'][find_object] def _real_extract(self, url): display_id = self._match_id(url) @@ -145,7 +150,12 @@ class TeamcocoIE(TurnerBaseIE): 'accessTokenType': 'jws', })) else: - video_sources = self._graphql_call('''{ + d = self._download_json( + 'https://teamcoco.com/_truman/d/' + video_id, + video_id, fatal=False) or {} + video_sources = d.get('meta') or {} + if not video_sources: + video_sources = self._graphql_call('''{ %s(id: "%s") { src } diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index 083e9f36d..66088b9ab 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/v| + (?:www\.)?tiktok\.com/share/video + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'md5': 'd584b572e92fcd48888051f238022420', 'info_dict': { @@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, 'repost_count': int, } - } + }, { + 'url': 'https://www.tiktok.com/share/video/6606727368545406213', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://m.tiktok.com/v/%s.html' % video_id, video_id) data = self._parse_json(self._search_regex( r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) return self._extract_aweme(data) class TikTokUserIE(TikTokBaseIE): - _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P\d+)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?tiktok\.com/h5/share/usr| + (?:www\.)?tiktok\.com/share/user + ) + /(?P\d+) + ''' + _TESTS = [{ 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', 'info_dict': { 'id': '188294915489964032', }, 'playlist_mincount': 24, - } + }, { + 'url': 'https://www.tiktok.com/share/user/188294915489964032', + 'only_matching': True, + }] def _real_extract(self, url): user_id = self._match_id(url) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d1fe95654..1072550f1 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -6,10 +6,7 @@ import re import sys from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( clean_html, ExtractorError, @@ -103,7 +100,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'md5': '7babad3b85ea2e91948005b1b8b0cb84', 'info_dict': { - 'id': '162222515', + 'id': '-77521_162222515', 'ext': 'mp4', 'title': 'ProtivoGunz - Хуёвая песня', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', @@ -117,7 +114,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/video205387401_165548505', 'md5': '6c0aeb2e90396ba97035b9cbde548700', 'info_dict': { - 'id': '165548505', + 'id': '205387401_165548505', 'ext': 'mp4', 'title': 'No name', 'uploader': 'Tom Cruise', @@ -132,7 +129,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', 'info_dict': { - 'id': '162925554', + 'id': '32194266_162925554', 'ext': 'mp4', 'uploader': 'Vladimir Gavrin', 'title': 'Lin Dan', @@ -149,7 +146,7 @@ class VKIE(VKBaseIE): 'md5': 'a590bcaf3d543576c9bd162812387666', 'note': 'Only available for registered users', 'info_dict': { - 'id': '164049491', + 'id': '-8871596_164049491', 'ext': 'mp4', 'uploader': 'Триллеры', 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', @@ -163,7 +160,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 'md5': '4d7a5ef8cf114dfa09577e57b2993202', 'info_dict': { - 'id': '168067957', + 'id': '-43215063_168067957', 'ext': 'mp4', 'uploader': 'Киномания - лучшее из мира кино', 'title': ' ', @@ -177,7 +174,7 @@ class VKIE(VKBaseIE): 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', 'note': 'ivi.ru embed', 'info_dict': { - 'id': '60690', + 'id': '-43215063_169084319', 'ext': 'mp4', 'title': 'Книга Илая', 'duration': 6771, @@ -191,7 +188,7 @@ class VKIE(VKBaseIE): 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4', 'md5': '091287af5402239a1051c37ec7b92913', 'info_dict': { - 'id': '171201961', + 'id': '30481095_171201961', 'ext': 'mp4', 'title': 'ТюменцевВВ_09.07.2015', 'uploader': 'Anton Ivanov', @@ -206,10 +203,10 @@ class VKIE(VKBaseIE): 'url': 'https://vk.com/video276849682_170681728', 'info_dict': { 'id': 'V3K4mi0SYkc', - 'ext': 'webm', + 'ext': 'mp4', 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', - 'duration': 179, + 'duration': 178, 'upload_date': '20130116', 'uploader': "Children's Joy Foundation Inc.", 'uploader_id': 'thecjf', @@ -239,7 +236,7 @@ class VKIE(VKBaseIE): 'url': 'http://vk.com/video-110305615_171782105', 'md5': 'e13fcda136f99764872e739d13fac1d1', 'info_dict': { - 'id': '171782105', + 'id': '-110305615_171782105', 'ext': 'mp4', 'title': 'S-Dance, репетиции к The way show', 'uploader': 'THE WAY SHOW | 17 апреля', @@ -254,14 +251,17 @@ class VKIE(VKBaseIE): { # finished live stream, postlive_mp4 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', - 'md5': '90d22d051fccbbe9becfccc615be6791', 'info_dict': { - 'id': '456242764', + 'id': '-387766_456242764', 'ext': 'mp4', - 'title': 'ИгроМир 2016 — день 1', + 'title': 'ИгроМир 2016 День 1 — Игромания Утром', 'uploader': 'Игромания', 'duration': 5239, - 'view_count': int, + # TODO: use act=show to extract view_count + # 'view_count': int, + 'upload_date': '20160929', + 'uploader_id': '-387766', + 'timestamp': 1475137527, }, }, { @@ -465,7 +465,7 @@ class VKIE(VKBaseIE): self._sort_formats(formats) return { - 'id': compat_str(data.get('vid') or video_id), + 'id': video_id, 'formats': formats, 'title': title, 'thumbnail': data.get('jpg'), diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 6c060ae76..c11da97de 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -150,9 +150,10 @@ class VRVIE(VRVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - episode_path = self._get_cms_resource( - 'cms:/episodes/' + video_id, video_id) - video_data = self._call_cms(episode_path, video_id, 'video') + object_data = self._call_cms(self._get_cms_resource( + 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] + resource_path = object_data['__links__']['resource']['href'] + video_data = self._call_cms(resource_path, video_id, 'video') title = video_data['title'] streams_path = video_data['__links__'].get('streams', {}).get('href') diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index ec2d913fc..166bcf443 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -57,10 +57,17 @@ class XVideosIE(InfoExtractor): webpage, 'title', default=None, group='title') or self._og_search_title(webpage) - thumbnail = self._search_regex( - (r'setThumbUrl\(\s*(["\'])(?P(?:(?!\1).)+)\1', - r'url_bigthumb=(?P.+?)&'), - webpage, 'thumbnail', fatal=False, group='thumbnail') + thumbnails = [] + for preference, thumbnail in enumerate(('', '169')): + thumbnail_url = self._search_regex( + r'setThumbUrl%s\(\s*(["\'])(?P(?:(?!\1).)+)\1' % thumbnail, + webpage, 'thumbnail', default=None, group='thumbnail') + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'preference': preference, + }) + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( @@ -98,6 +105,6 @@ class XVideosIE(InfoExtractor): 'formats': formats, 'title': title, 'duration': duration, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'age_limit': 18, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 886fc1591..132572c88 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -484,7 +484,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5e86bc4d5..5c7d550f5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.18' +__version__ = '2019.04.07'