diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ed3e0a157..058eb4321 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.26 +[debug] youtube-dl version 2018.10.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 241712037..86cf489b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version 2018.10.05 + +Extractors +* [pluralsight] Improve authentication (#17762) +* [dailymotion] Fix extraction (#17699) +* [crunchyroll] Switch to HTTPS for RpcApi (#17749) ++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705) +* [philharmoniedeparis] Fix extraction (#17705) ++ [jamendo] Add support for licensing.jamendo.com (#17724) ++ [openload] Add support for oload.cloud (#17710) +* [pluralsight] Fix subtitles extraction (#17726, #17728) ++ [vimeo] Add another config regular expression (#17690) +* [spike] Fix Paramount Network extraction (#17677) +* [hotstar] Fix extraction (#14694, #14931, #17637) + + version 2018.09.26 Extractors diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 736ab6da7..f167a6ddc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -360,7 +360,7 @@ - **HitRecord** - **HornBunny** - **HotNewHipHop** - - **HotStar** + - **hotstar** - **hotstar:playlist** - **Howcast** - **HowStuffWorks** diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 14f9a14ed..465ae396e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,8 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import base64 import json +import re +import struct from .common import InfoExtractor from .adobepass import AdobePassIE @@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor): 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', expected=True) + def _brightcove_new_url_result(self, publisher_id, video_id): + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + def _get_video_info(self, video_id, query, referer=None): headers = {} linkBase = query.get('linkBaseURL') @@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor): r"

We're sorry.

([\s\n]*

.*?

)+", webpage, 'error message', default=None) if error_msg is not None: + publisher_id = query.get('publisherId') + if publisher_id and publisher_id[0].isdigit(): + publisher_id = publisher_id[0] + if not publisher_id: + player_key = query.get('playerKey') + if player_key and ',' in player_key[0]: + player_key = player_key[0] + else: + player_id = query.get('playerID') + if player_id and player_id[0].isdigit(): + player_page = self._download_webpage( + 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + video_id, headers=headers, fatal=False) + if player_page: + player_key = self._search_regex( + r'Q', base64.urlsafe_b64decode(enc_pub_id))[0] + if publisher_id: + return self._brightcove_new_url_result(publisher_id, video_id) raise ExtractorError( 'brightcove said: %s' % error_msg, expected=True) @@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor): else: return ad_info - if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % video_id) + if not info.get('url') and not info.get('formats'): + uploader_id = info.get('uploader_id') + if uploader_id: + info.update(self._brightcove_new_url_result(uploader_id, video_id)) + else: + raise ExtractorError('Unable to extract video url for %s' % video_id) return info diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 224a1fb5d..f9bd535f6 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_iso8601, @@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( + data = self._download_json( 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id, - video_id)['video'] + video_id) + if data.get('result') != 'ok': + raise ExtractorError(data['msg'], expected=True) + video_data = data['video'] title = video_data['title'] mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 040f0bd02..1816c559e 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -22,7 +22,10 @@ from ..utils import ( parse_iso8601, sanitized_Request, str_to_int, + try_get, unescapeHTML, + update_url_query, + url_or_none, urlencode_postdata, ) @@ -171,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: - player = self._parse_json(player_v5, video_id) - metadata = player['metadata'] + player = self._parse_json(player_v5, video_id, fatal=False) or {} + metadata = try_get(player, lambda x: x['metadata'], dict) + if not metadata: + metadata_url = url_or_none(try_get( + player, lambda x: x['context']['metadata_template_url1'])) + if metadata_url: + metadata_url = metadata_url.replace(':videoId', video_id) + else: + metadata_url = update_url_query( + 'https://www.dailymotion.com/player/metadata/video/%s' + % video_id, { + 'embedder': url, + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) + metadata = self._download_json( + metadata_url, video_id, 'Downloading metadata JSON') - if metadata.get('error', {}).get('type') == 'password_protected': + if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': password = self._downloader.params.get('videopassword') if password: r = int(metadata['id'][1:], 36) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 464c8d690..17b576df3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1153,7 +1153,6 @@ from .tv2 import ( TV2ArticleIE, ) from .tv2hu import TV2HuIE -from .tv3 import TV3IE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tva import TVAIE diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index dc01b6346..c652603a5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -314,6 +314,9 @@ class OpenloadIE(InfoExtractor): # Its title has not got its extension but url has it 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', 'only_matching': True, + }, { + 'url': 'https://oload.cc/embed/5NEAbI2BDSk', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 9eb027679..426dd8121 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -2,52 +2,63 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + parse_iso8601, +) class PatreonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P[^&#]+)' - _TESTS = [ - { - 'url': 'http://www.patreon.com/creation?hid=743933', - 'md5': 'e25505eec1053a6e6813b8ed369875cc', - 'info_dict': { - 'id': '743933', - 'ext': 'mp3', - 'title': 'Episode 166: David Smalley of Dogma Debate', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P\d+)' + _TESTS = [{ + 'url': 'http://www.patreon.com/creation?hid=743933', + 'md5': 'e25505eec1053a6e6813b8ed369875cc', + 'info_dict': { + 'id': '743933', + 'ext': 'mp3', + 'title': 'Episode 166: David Smalley of Dogma Debate', + 'description': 'md5:713b08b772cd6271b9f3906683cfacdf', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', + 'timestamp': 1406473987, + 'upload_date': '20140727', }, - { - 'url': 'http://www.patreon.com/creation?hid=754133', - 'md5': '3eb09345bf44bf60451b8b0b81759d0a', - 'info_dict': { - 'id': '754133', - 'ext': 'mp3', - 'title': 'CD 167 Extra', - 'uploader': 'Cognitive Dissonance Podcast', - 'thumbnail': 're:^https?://.*$', - }, + }, { + 'url': 'http://www.patreon.com/creation?hid=754133', + 'md5': '3eb09345bf44bf60451b8b0b81759d0a', + 'info_dict': { + 'id': '754133', + 'ext': 'mp3', + 'title': 'CD 167 Extra', + 'uploader': 'Cognitive Dissonance Podcast', + 'thumbnail': 're:^https?://.*$', }, - { - 'url': 'https://www.patreon.com/creation?hid=1682498', - 'info_dict': { - 'id': 'SU4fj_aEMVw', - 'ext': 'mp4', - 'title': 'I\'m on Patreon!', - 'uploader': 'TraciJHines', - 'thumbnail': 're:^https?://.*$', - 'upload_date': '20150211', - 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', - 'uploader_id': 'TraciJHines', - }, - 'params': { - 'noplaylist': True, - 'skip_download': True, - } + 'skip': 'Patron-only content', + }, { + 'url': 'https://www.patreon.com/creation?hid=1682498', + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'uploader': 'TraciJHines', + 'thumbnail': 're:^https?://.*$', + 'upload_date': '20150211', + 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', + 'uploader_id': 'TraciJHines', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, } - ] + }, { + 'url': 'https://www.patreon.com/posts/episode-166-of-743933', + 'only_matching': True, + }, { + 'url': 'https://www.patreon.com/posts/743933', + 'only_matching': True, + }] # Currently Patreon exposes download URL via hidden CSS, so login is not # needed. Keeping this commented for when this inevitably changes. @@ -78,38 +89,48 @@ class PatreonIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage).strip() - - attach_fn = self._html_search_regex( - r'
', - webpage, 'attachment URL', default=None) - embed = self._html_search_regex( - r']+id="watchCreation"[^>]*>\s*]+src="([^"]+)"', - webpage, 'embedded URL', default=None) - - if attach_fn is not None: - video_url = 'http://www.patreon.com' + attach_fn - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._html_search_regex( - r'(.*?) is creating', webpage, 'uploader') - elif embed is not None: - return self.url_result(embed) - else: - playlist = self._parse_json(self._search_regex( - r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', - webpage, 'playlist JSON'), - video_id, transform_source=js_to_json) - data = playlist[0] - video_url = self._proto_relative_url(data['mp3']) - thumbnail = self._proto_relative_url(data.get('cover')) - uploader = data.get('artist') - - return { + post = self._download_json( + 'https://www.patreon.com/api/posts/' + video_id, video_id) + attributes = post['data']['attributes'] + title = attributes['title'].strip() + image = attributes.get('image') or {} + info = { 'id': video_id, - 'url': video_url, - 'ext': 'mp3', 'title': title, - 'uploader': uploader, - 'thumbnail': thumbnail, + 'description': clean_html(attributes.get('content')), + 'thumbnail': image.get('large_url') or image.get('url'), + 'timestamp': parse_iso8601(attributes.get('published_at')), + 'like_count': int_or_none(attributes.get('like_count')), + 'comment_count': int_or_none(attributes.get('comment_count')), } + + def add_file(file_data): + file_url = file_data.get('url') + if file_url: + info.update({ + 'url': file_url, + 'ext': determine_ext(file_data.get('name'), 'mp3'), + }) + + for i in post.get('included', []): + i_type = i.get('type') + if i_type == 'attachment': + add_file(i.get('attributes') or {}) + elif i_type == 'user': + user_attributes = i.get('attributes') + if user_attributes: + info.update({ + 'uploader': user_attributes.get('full_name'), + 'uploader_url': user_attributes.get('url'), + }) + + if not info.get('url'): + add_file(attributes.get('post_file') or {}) + + if not info.get('url'): + info.update({ + '_type': 'url', + 'url': attributes['embed']['url'], + }) + + return info diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index daf172570..eafe56897 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -4,6 +4,7 @@ import collections import json import os import random +import re from .common import InfoExtractor from ..compat import ( @@ -196,7 +197,10 @@ query viewClip { if error: raise ExtractorError('Unable to login: %s' % error, expected=True) - if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): BLOCKED = 'Your account has been blocked due to suspicious activity' if BLOCKED in response: raise ExtractorError( diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 261bcbb83..10ac8ed1f 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE): options = self._download_json( 'http://rutube.ru/api/play/options/%s/?format=json' % video_id, - video_id, 'Downloading options JSON') + video_id, 'Downloading options JSON', + headers=self.geo_verification_headers()) formats = [] for format_id, format_url in options['video_balancer'].items(): diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 212ac80ab..f9b6aa48f 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -212,8 +212,6 @@ class TEDIE(InfoExtractor): http_url = None for format_id, resources in resources_.items(): - if not isinstance(resources, dict): - continue if format_id == 'h264': for resource in resources: h264_url = resource.get('file') @@ -242,6 +240,8 @@ class TEDIE(InfoExtractor): 'tbr': int_or_none(resource.get('bitrate')), }) elif format_id == 'hls': + if not isinstance(resources, dict): + continue stream_url = url_or_none(resources.get('stream')) if not stream_url: continue diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py deleted file mode 100644 index 3867ec90d..000000000 --- a/youtube_dl/extractor/tv3.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TV3IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx' - _TEST = { - 'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx', - 'info_dict': { - 'id': '4659127992001', - 'ext': 'mp4', - 'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3', - 'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.', - 'uploader_id': '3812193411001', - 'upload_date': '20151213', - 'timestamp': 1449975272, - }, - 'expected_warnings': [ - 'Failed to download MPD manifest' - ], - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - brightcove_id = self._search_regex(r'