diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d7c15e85a..0c24155e6 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.09.12.1** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.09.12.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 741862590..9babe0360 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.09.12.1** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 4fb035ea4..72322fe26 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.09.12.1** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 73ed62012..da7f2cf93 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.09.12.1** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.06.27 + [debug] youtube-dl version 2019.09.12.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index a9d3653e2..d41022b9f 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.06.27** +- [ ] I've verified that I'm running youtube-dl version **2019.09.12.1** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd9ccbe96..ac759ddc4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -339,6 +339,72 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. diff --git a/ChangeLog b/ChangeLog index 4ae3d6c7c..2aba02065 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,163 @@ +version 2019.09.12.1 + +Extractors +* [youtube] Remove quality and tbr for itag 43 (#22372) + + +version 2019.09.12 + +Extractors +* [youtube] Quick extraction tempfix (#22367, #22163) + + +version 2019.09.01 + +Core ++ [extractor/generic] Add support for squarespace embeds (#21294, #21802, + #21859) ++ [downloader/external] Respect mtime option for aria2c (#22242) + +Extractors ++ [xhamster:user] Add support for user pages (#16330, #18454) ++ [xhamster] Add support for more domains ++ [verystream] Add support for woof.tube (#22217) ++ [dailymotion] Add support for lequipe.fr (#21328, #22152) ++ [openload] Add support for oload.vip (#22205) ++ [bbccouk] Extend URL regular expression (#19200) ++ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223) +* [safari] Fix authentication (#22161, #22184) +* [usanetwork] Fix extraction (#22105) ++ [einthusan] Add support for einthusan.ca (#22171) +* [youtube] Improve unavailable message extraction (#22117) ++ [piksel] Extract subtitles (#20506) + + +version 2019.08.13 + +Core +* [downloader/fragment] Fix ETA calculation of resumed download (#21992) +* [YoutubeDL] Check annotations availability (#18582) + +Extractors +* [youtube:playlist] Improve flat extraction (#21927) +* [youtube] Fix annotations extraction (#22045) ++ [discovery] Extract series meta field (#21808) +* [youtube] Improve error detection (#16445) +* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986) ++ [roosterteeth] Add support for watch URLs +* [discovery] Limit video data by show slug (#21980) + + +version 2019.08.02 + +Extractors ++ [tvigle] Add support for HLS and DASH formats (#21967) +* [tvigle] Fix extraction (#21967) ++ [yandexvideo] Add support for DASH formats (#21971) +* [discovery] Use API call for video data extraction (#21808) ++ [mgtv] Extract format_note (#21881) +* [tvn24] Fix metadata extraction (#21833, #21834) +* [dlive] Relax URL regular expression (#21909) ++ [openload] Add support for oload.best (#21913) +* [youtube] Improve metadata extraction for age gate content (#21943) + + +version 2019.07.30 + +Extractors +* [youtube] Fix and improve title and description extraction (#21934) + + +version 2019.07.27 + +Extractors ++ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265) ++ [discovery] Add support go.discovery.com URLs +* [youtube:playlist] Relax video regular expression (#21844) +* [generic] Restrict --default-search schemeless URLs detection pattern + (#21842) +* [vrv] Fix CMS signing query extraction (#21809) + + +version 2019.07.16 + +Extractors ++ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv + (#21281, #21290) +* [kaltura] Check source format URL (#21290) +* [ctsnews] Fix YouTube embeds extraction (#21678) ++ [einthusan] Add support for einthusan.com (#21748, #21775) ++ [youtube] Add support for invidious.mastodon.host (#21777) ++ [gfycat] Extend URL regular expression (#21779, #21780) +* [youtube] Restrict is_live extraction (#21782) + + +version 2019.07.14 + +Extractors +* [porn91] Fix extraction (#21312) ++ [yandexmusic] Extract track number and disk number (#21421) ++ [yandexmusic] Add support for multi disk albums (#21420, #21421) +* [lynda] Handle missing subtitles (#20490, #20513) ++ [youtube] Add more invidious instances to URL regular expression (#21694) +* [twitter] Improve uploader id extraction (#21705) +* [spankbang] Fix and improve metadata extraction +* [spankbang] Fix extraction (#21763, #21764) ++ [dlive] Add support for dlive.tv (#18080) ++ [livejournal] Add support for livejournal.com (#21526) +* [roosterteeth] Fix free episode extraction (#16094) +* [dbtv] Fix extraction +* [bellator] Fix extraction +- [rudo] Remove extractor (#18430, #18474) +* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224) +* [bleacherreport] Fix Bleacher Report CMS extraction +* [espn] Fix fivethirtyeight.com extraction +* [5tv] Relax video URL regular expression and support https URLs +* [youtube] Fix is_live extraction (#21734) +* [youtube] Fix authentication (#11270) + + +version 2019.07.12 + +Core ++ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016) + +Extractors ++ [mgtv] Pass Referer HTTP header for format URLs (#21726) ++ [beeg] Add support for api/v6 v2 URLs without t argument (#21701) +* [voxmedia:volume] Improvevox embed extraction (#16846) +* [funnyordie] Move extraction to VoxMedia extractor (#16846) +* [gameinformer] Fix extraction (#8895, #15363, #17206) +* [funk] Fix extraction (#17915) +* [packtpub] Relax lesson URL regular expression (#21695) +* [packtpub] Fix extraction (#21268) +* [philharmoniedeparis] Relax URL regular expression (#21672) +* [peertube] Detect embed URLs in generic extraction (#21666) +* [mixer:vod] Relax URL regular expression (#21657, #21658) ++ [lecturio] Add support id based URLs (#21630) ++ [go] Add site info for disneynow (#21613) +* [ted] Restrict info regular expression (#21631) +* [twitch:vod] Actualize m3u8 URL (#21538, #21607) +* [vzaar] Fix videos with empty title (#21606) +* [tvland] Fix extraction (#21384) +* [arte] Clean extractor (#15583, #21614) + + +version 2019.07.02 + +Core ++ [utils] Introduce random_user_agent and use as default User-Agent (#21546) + +Extractors ++ [vevo] Add support for embed.vevo.com URLs (#21565) ++ [openload] Add support for oload.biz (#21574) +* [xiami] Update API base URL (#21575) +* [yourporn] Fix extraction (#21585) ++ [acast] Add support for URLs with episode id (#21444) ++ [dailymotion] Add support for DM.player embeds +* [soundcloud] Update client id + + version 2019.06.27 Extractors diff --git a/README.md b/README.md index 8c48a3012..c39b13616 100644 --- a/README.md +++ b/README.md @@ -1216,6 +1216,72 @@ Incorrect: 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'([^<]+)' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 55ae43144..18bddc138 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -58,16 +58,8 @@ - **ARD:mediathek** - **ARDBetaMediathek** - **Arkena** - - **arte.tv** - **arte.tv:+7** - - **arte.tv:cinema** - - **arte.tv:concert** - - **arte.tv:creative** - - **arte.tv:ddc** - **arte.tv:embed** - - **arte.tv:future** - - **arte.tv:info** - - **arte.tv:magazine** - **arte.tv:playlist** - **AsianCrush** - **AsianCrushPlaylist** @@ -231,6 +223,8 @@ - **DiscoveryNetworksDe** - **DiscoveryVR** - **Disney** + - **dlive:stream** + - **dlive:vod** - **Dotsub** - **DouyuShow** - **DouyuTV**: 斗鱼 @@ -313,9 +307,7 @@ - **FrontendMastersCourse** - **FrontendMastersLesson** - **Funimation** - - **FunkChannel** - - **FunkMix** - - **FunnyOrDie** + - **Funk** - **Fusion** - **Fux** - **FXNetworks** @@ -458,6 +450,7 @@ - **linkedin:learning:course** - **LinuxAcademy** - **LiTV** + - **LiveJournal** - **LiveLeak** - **LiveLeakEmbed** - **livestream** @@ -764,7 +757,6 @@ - **rtve.es:television** - **RTVNH** - **RTVS** - - **Rudo** - **RUHD** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels @@ -896,7 +888,6 @@ - **TF1** - **TFO** - **TheIntercept** - - **theoperaplatform** - **ThePlatform** - **ThePlatformFeed** - **TheScene** @@ -1109,6 +1100,7 @@ - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XHamster** - **XHamsterEmbed** + - **XHamsterUser** - **xiami:album**: 虾米音乐 - 专辑 - **xiami:artist**: 虾米音乐 - 歌手 - **xiami:collection**: 虾米音乐 - 精选集 @@ -1126,6 +1118,7 @@ - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - **yahoo:gyao:player** + - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3e832fec2..6a44bc7ba 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1783,6 +1783,8 @@ class YoutubeDL(object): annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') + elif not info_dict.get('annotations'): + self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 165c975dd..9a659fc65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -94,7 +94,7 @@ def _real_main(argv=None): if opts.verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: - sys.exit('ERROR: batch file could not be read') + sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index eaa7adf7c..c6d674bc6 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD): except compat_urllib_error.HTTPError as err: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately - # retried with the same request data this usually succeeds (1-2 attemps + # retried with the same request data this usually succeeds (1-2 attempts # is usually enough) thus allowing to download the whole file successfully. # To be future-proof we will retry all fragments that fail with any # HTTP error. diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index acdb27712..c31f8910a 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD): cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') + cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += ['--', info_dict['url']] return cmd diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index f2e5733b6..02f35459e 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -190,12 +190,13 @@ class FragmentFD(FileDownloader): }) def _start_frag_download(self, ctx): + resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] # This dict stores the download progress, it's updated by the progress # hook state = { 'status': 'downloading', - 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], + 'downloaded_bytes': resume_len, 'fragment_index': ctx['fragment_index'], 'fragment_count': total_frags, 'filename': ctx['filename'], @@ -234,8 +235,8 @@ class FragmentFD(FileDownloader): state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] if not ctx['live']: state['eta'] = self.calc_eta( - start, time_now, estimated_size, - state['downloaded_bytes']) + start, time_now, estimated_size - resume_len, + state['downloaded_bytes'] - resume_len) state['speed'] = s.get('speed') or ctx.get('speed') ctx['speed'] = state['speed'] ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 063fcf444..1ca666b4a 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -146,7 +146,7 @@ def write_piff_header(stream, params): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication - avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one + avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) avcc_payload += u16.pack(len(sps)) avcc_payload += sps diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index cd29aca77..8b407bf9c 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE): IE_NAME = 'abcnews:video' _VALID_URL = r'''(?x) https?:// - abcnews\.go\.com/ (?: - [^/]+/video/(?P[0-9a-z-]+)-| - video/embed\?.*?\bid= + abcnews\.go\.com/ + (?: + [^/]+/video/(?P[0-9a-z-]+)-| + video/embed\?.*?\bid= + )| + fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ ) (?P\d+) ''' diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index c4362be88..b17c792d2 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -7,6 +7,7 @@ import functools from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, float_or_none, int_or_none, try_get, @@ -27,7 +28,7 @@ class ACastIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', - 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', + 'md5': '16d936099ec5ca2d5869e3a813ee8dc4', 'info_dict': { 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'ext': 'mp3', @@ -46,28 +47,37 @@ class ACastIE(InfoExtractor): }, { 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', + 'only_matching': True, }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() s = self._download_json( - 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id), - display_id)['result'] + 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), + display_id) media_url = s['url'] + if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): + episode_url = s.get('episodeUrl') + if episode_url: + display_id = episode_url + else: + channel, display_id = re.match(self._VALID_URL, s['link']).groups() cast_data = self._download_json( 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)['result'] e = cast_data['episode'] - title = e['name'] + title = e.get('name') or s['title'] return { 'id': compat_str(e['id']), 'display_id': display_id, 'url': media_url, 'title': title, - 'description': e.get('description') or e.get('summary'), + 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), 'thumbnail': e.get('image'), - 'timestamp': unified_timestamp(e.get('publishingDate')), - 'duration': float_or_none(s.get('duration') or e.get('duration')), + 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), + 'duration': float_or_none(e.get('duration') or s.get('duration')), 'filesize': int_or_none(e.get('contentLength')), 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index a14ffd42c..78b6d3143 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -25,6 +25,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'ATT': { + 'name': 'AT&T U-verse', + 'username_field': 'userid', + 'password_field': 'password', + }, 'ATTOTT': { 'name': 'DIRECTV NOW', 'username_field': 'userid', diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ffc321821..2bd3bfe8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -4,17 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - find_xpath_attr, - get_element_by_attribute, int_or_none, - NO_DEFAULT, qualities, try_get, unified_strdate, @@ -25,59 +18,7 @@ from ..utils import ( # add tests. -class ArteTvIE(InfoExtractor): - _VALID_URL = r'https?://videos\.arte\.tv/(?Pfr|de|en|es)/.*-(?P.*?)\.html' - IE_NAME = 'arte.tv' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - lang = mobj.group('lang') - video_id = mobj.group('id') - - ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') - ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') - ref_xml_doc = self._download_xml( - ref_xml_url, video_id, note='Downloading metadata') - config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) - config_xml_url = config_node.attrib['ref'] - config = self._download_xml( - config_xml_url, video_id, note='Downloading configuration') - - formats = [{ - 'format_id': q.attrib['quality'], - # The playpath starts at 'mp4:', if we don't manually - # split the url, rtmpdump will incorrectly parse them - 'url': q.text.split('mp4:', 1)[0], - 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], - 'ext': 'flv', - 'quality': 2 if q.attrib['quality'] == 'hd' else 1, - } for q in config.findall('./urls/url')] - self._sort_formats(formats) - - title = config.find('.//name').text - thumbnail = config.find('.//firstThumbnailUrl').text - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class ArteTVBaseIE(InfoExtractor): - @classmethod - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - lang = mobj.group('lang') - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - if 'vid' in query: - video_id = query['vid'][0] - else: - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') - return video_id, lang - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor): 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) + qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) LANGS = { 'fr': 'F', 'de': 'A', 'en': 'E[ANG]', 'es': 'E[ESP]', + 'it': 'E[ITA]', + 'pl': 'E[POL]', } langcode = LANGS.get(lang, lang) @@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor): l = re.escape(langcode) # Language preference from most to least priority - # Reference: section 5.6.3 of - # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + # Reference: section 6.8 of + # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf PREFERENCES = ( # original version in requested language, without subtitles r'VO{0}$'.format(l), @@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?Pfr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?P\d{6}-\d{3}-[AF])' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', - 'only_matching': True, - }, { - 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', - 'only_matching': True, - }, { - 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn', - 'only_matching': True, + 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', + 'info_dict': { + 'id': '088501-000-A', + 'ext': 'mp4', + 'title': 'Mexico: Stealing Petrol to Survive', + 'upload_date': '20190628', + }, }] - @classmethod - def suitable(cls, url): - return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - entries = [ - self.url_result(url) - for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] - return self.playlist_result(entries) - - -# It also uses the arte_vp_url url from the webpage to extract the information -class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', - 'info_dict': { - 'id': '057405-001-A', - 'ext': 'mp4', - 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', - 'upload_date': '20150716', - }, - }, { - 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'playlist_count': 11, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', - 'only_matching': True, - }] - - -class ArteTVInfoIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:info' - _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', - 'info_dict': { - 'id': '067528-000-A', - 'ext': 'mp4', - 'title': 'Service civique, un cache misère ?', - 'upload_date': '20160403', - }, - }] - - -class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:future' - _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', - 'info_dict': { - 'id': '050940-028-A', - 'ext': 'mp4', - 'title': 'Les écrevisses aussi peuvent être anxieuses', - 'upload_date': '20140902', - }, - }, { - 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', - 'only_matching': True, - }] - - -class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:ddc' - _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' - - _TESTS = [] - - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - if lang == 'folge': - lang = 'de' - elif lang == 'emission': - lang = 'fr' - webpage = self._download_webpage(url, video_id) - scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) - script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') - javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') - json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') - return self._extract_from_json_url(json_url, video_id, lang) - - -class ArteTVConcertIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:concert' - _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', - 'md5': '9ea035b7bd69696b67aa2ccaaa218161', - 'info_dict': { - 'id': '186', - 'ext': 'mp4', - 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', - 'upload_date': '20140128', - 'description': 'md5:486eb08f991552ade77439fe6d82c305', - }, - }] - - -class ArteTVCinemaIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:cinema' - _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', - 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', - 'info_dict': { - 'id': '062494-000-A', - 'ext': 'mp4', - 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', - 'upload_date': '20150807', - }, - }] - - -class ArteTVMagazineIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:magazine' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - # Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..." - 'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium', - 'md5': '2a9369bcccf847d1c741e51416299f25', - 'info_dict': { - 'id': '065965-000-A', - 'ext': 'mp4', - 'title': 'Trepalium - Extrait Ep.01', - 'upload_date': '20160121', - }, - }, { - # Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium" - 'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium', - 'md5': 'fedc64fc7a946110fe311634e79782ca', - 'info_dict': { - 'id': '054813-004_PLUS7-F', - 'ext': 'mp4', - 'title': 'Trepalium (4/6)', - 'description': 'md5:10057003c34d54e95350be4f9b05cb40', - 'upload_date': '20160218', - }, - }, { - 'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis', - 'only_matching': True, - }] + lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url( + 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), + video_id, lang) class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) - http://www\.arte\.tv - /(?:playerv2/embed|arte_vp/index)\.php\?json_url= + https://www\.arte\.tv + /player/v3/index\.php\?json_url= (?P<json_url> - http://arte\.tv/papi/tvguide/videos/stream/player/ - (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* + https?://api\.arte\.tv/api/player/v1/config/ + (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) ) ''' _TESTS = [] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - lang = mobj.group('lang') - json_url = mobj.group('json_url') + json_url, lang, video_id = re.match(self._VALID_URL, url).groups() return self._extract_from_json_url(json_url, video_id, lang) -class TheOperaPlatformIE(ArteTVPlus7IE): - IE_NAME = 'theoperaplatform' - _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello', - 'md5': '970655901fa2e82e04c00b955e9afe7b', - 'info_dict': { - 'id': '060338-009-A', - 'ext': 'mp4', - 'title': 'Verdi - OTELLO', - 'upload_date': '20160927', - }, - }] - - class ArteTVPlaylistIE(ArteTVBaseIE): IE_NAME = 'arte.tv:playlist' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'info_dict': { - 'id': 'PL-013263', - 'title': 'Areva & Uramin', - 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', + 'id': 'RC-016954', + 'title': 'Earn a Living', + 'description': 'md5:d322c55011514b3a7241f7fb80d494c2', }, 'playlist_mincount': 6, - }, { - 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', - 'only_matching': True, }] def _real_extract(self, url): - playlist_id, lang = self._extract_url_info(url) + lang, playlist_id = re.match(self._VALID_URL, url).groups() collection = self._download_json( 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' % (lang, playlist_id), playlist_id) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 6d71c5ad5..0348e680c 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -5,14 +5,12 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - extract_attributes, - remove_end, -) +from ..utils import extract_attributes class AsianCrushIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' + _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))' + _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE _TESTS = [{ 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', 'md5': 'c3b740e48d0ba002a42c0b72857beae6', @@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor): 'id': '1_y4tmjm5r', 'ext': 'mp4', 'title': 'Women Who Flirt', - 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'description': 'md5:7e986615808bcfb11756eb503a751487', 'timestamp': 1496936429, 'upload_date': '20170608', 'uploader_id': 'craig@crifkin.com', @@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor): }, { 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', + 'only_matching': True, + }, { + 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor): r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') player = self._download_webpage( - 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + 'https://api.%s/embeddedVideoPlayer' % host, video_id, query={'id': entry_id}) kaltura_id = self._search_regex( @@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor): r'/p(?:artner_id)?/(\d+)', player, 'partner id', default='513551') - return self.url_result( - 'kaltura:%s:%s' % (partner_id, kaltura_id), - ie=KalturaIE.ie_key(), video_id=kaltura_id, - video_title=title) + description = self._html_search_regex( + r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>', + webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': KalturaIE.ie_key(), + 'id': video_id, + 'title': title, + 'description': description, + } class AsianCrushPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b' - _TEST = { + _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE + _TESTS = [{ 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', 'info_dict': { 'id': '12481', @@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor): 'description': 'md5:7addd7c5132a09fd4741152d96cce886', }, 'playlist_count': 20, - } + }, { + 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', + 'only_matching': True, + }, { + 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', + 'only_matching': True, + }, { + 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', + 'only_matching': True, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor): entries.append(self.url_result( mobj.group('url'), ie=AsianCrushIE.ie_key())) - title = remove_end( - self._html_search_regex( - r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage, - 'title', default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', - default=None) or self._search_regex( - r'<title>([^<]+)', webpage, 'title', fatal=False), - ' | AsianCrush') + title = self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False) + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) description = self._og_search_description( webpage, default=None) or self._html_search_meta( diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index e76507951..901c5a54f 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor): iplayer(?:/[^/]+)?/(?:episode/|playlist/)| music/(?:clips|audiovideo/popular)[/#]| radio/player/| + sounds/play/| events/[^/]+/play/[^/]+/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) @@ -70,7 +71,7 @@ class BBCCoUkIE(InfoExtractor): 'info_dict': { 'id': 'b039d07m', 'ext': 'flv', - 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', + 'title': 'Kaleidoscope, Leonard Cohen', 'description': 'The Canadian poet and songwriter reflects on his musical career.', }, 'params': { @@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, + }, { + 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb', + 'note': 'Audio', + 'info_dict': { + 'id': 'm0007jz9', + 'ext': 'mp4', + 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra', + 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.", + 'duration': 9840, + }, + 'params': { + # rtmp download + 'skip_download': True, + } }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, @@ -609,7 +624,7 @@ class BBCIE(BBCCoUkIE): 'url': 'http://www.bbc.com/news/world-europe-32668511', 'info_dict': { 'id': 'world-europe-32668511', - 'title': 'Russia stages massive WW2 parade despite Western boycott', + 'title': 'Russia stages massive WW2 parade', 'description': 'md5:00ff61976f6081841f759a08bf78cc9c', }, 'playlist_count': 2, diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index e264a145f..86abdae00 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,7 +99,7 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\w+)' + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P[^?#&]+)' _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', @@ -122,6 +122,9 @@ class BeamProVodIE(BeamProBaseIE): }, { 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', 'only_matching': True, + }, { + 'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', + 'only_matching': True, }] @staticmethod diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c15a0ac8f..5788d13ba 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -32,6 +32,10 @@ class BeegIE(InfoExtractor): # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', 'only_matching': True, + }, { + # api/v6 v2 w/o t + 'url': 'https://beeg.com/1277207756', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -49,14 +53,17 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - t = qs.get('t', [''])[0].split('-') - if len(t) > 1: + if len(video_id) >= 10: query = { 'v': 2, - 's': t[0], - 'e': t[1], } + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query.update({ + 's': t[0], + 'e': t[1], + }) else: query = {'v': 1} diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 3746671d3..80bd696e2 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -15,6 +15,7 @@ from ..utils import ( float_or_none, parse_iso8601, smuggle_url, + str_or_none, strip_jsonp, unified_timestamp, unsmuggle_url, @@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor): return self.playlist_result( entries, bangumi_id, season_info.get('bangumi_title'), season_info.get('evaluate')) + + +class BilibiliAudioBaseIE(InfoExtractor): + def _call_api(self, path, sid, query=None): + if not query: + query = {'sid': sid} + return self._download_json( + 'https://www.bilibili.com/audio/music-service-c/web/' + path, + sid, query=query)['data'] + + +class BilibiliAudioIE(BilibiliAudioBaseIE): + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P\d+)' + _TEST = { + 'url': 'https://www.bilibili.com/audio/au1003142', + 'md5': 'fec4987014ec94ef9e666d4d158ad03b', + 'info_dict': { + 'id': '1003142', + 'ext': 'm4a', + 'title': '【tsukimi】YELLOW / 神山羊', + 'artist': 'tsukimi', + 'comment_count': int, + 'description': 'YELLOW的mp3版!', + 'duration': 183, + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + 'thumbnail': r're:^https?://.+\.jpg', + 'timestamp': 1564836614, + 'upload_date': '20190803', + 'uploader': 'tsukimi-つきみぐー', + 'view_count': int, + }, + } + + def _real_extract(self, url): + au_id = self._match_id(url) + + play_data = self._call_api('url', au_id) + formats = [{ + 'url': play_data['cdns'][0], + 'filesize': int_or_none(play_data.get('size')), + }] + + song = self._call_api('song/info', au_id) + title = song['title'] + statistic = song.get('statistic') or {} + + subtitles = None + lyric = song.get('lyric') + if lyric: + subtitles = { + 'origin': [{ + 'url': lyric, + }] + } + + return { + 'id': au_id, + 'title': title, + 'formats': formats, + 'artist': song.get('author'), + 'comment_count': int_or_none(statistic.get('comment')), + 'description': song.get('intro'), + 'duration': int_or_none(song.get('duration')), + 'subtitles': subtitles, + 'thumbnail': song.get('cover'), + 'timestamp': int_or_none(song.get('passtime')), + 'uploader': song.get('uname'), + 'view_count': int_or_none(statistic.get('play')), + } + + +class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P\d+)' + _TEST = { + 'url': 'https://www.bilibili.com/audio/am10624', + 'info_dict': { + 'id': '10624', + 'title': '每日新曲推荐(每日11:00更新)', + 'description': '每天11:00更新,为你推送最新音乐', + }, + 'playlist_count': 19, + } + + def _real_extract(self, url): + am_id = self._match_id(url) + + songs = self._call_api( + 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data'] + + entries = [] + for song in songs: + sid = str_or_none(song.get('id')) + if not sid: + continue + entries.append(self.url_result( + 'https://www.bilibili.com/audio/au' + sid, + BilibiliAudioIE.ie_key(), sid)) + + if entries: + album_data = self._call_api('menu/info', am_id) or {} + album_title = album_data.get('title') + if album_title: + for entry in entries: + entry['album'] = album_title + return self.playlist_result( + entries, am_id, album_title, album_data.get('intro')) + + return self.playlist_result(entries, am_id) diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index b92031c8a..dc86c57c5 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, remove_end, ) -from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): @@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor): }, { 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', 'info_dict': { - 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'id': 'b4xd0LK3SK', 'ext': 'mp4', - 'uploader': '(none)', - 'upload_date': '20160708', - 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + # TODO: fix url_transparent information overriding + # 'uploader': 'Juan Pablo Echenique', + 'title': 'Comentario Oscar Cáceres', + }, + 'params': { + # empty m3u8 manifest + 'skip_download': True, }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', @@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - rudo_url = RudoIE._extract_url(webpage) + rudo_url = self._search_regex( + r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage, 'embed URL', None, group='url') if not rudo_url: raise ExtractorError('No videos found') @@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r']+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)', + r']+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)', webpage, 'uploader', fatal=False) return { diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index e829974ff..dc60224d0 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor): video = article_data.get('video') if video: video_type = video['type'] - if video_type == 'cms.bleacherreport.com': + if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] elif video_type == 'ooyala.com': info['url'] = 'ooyala:%s' % video['id'] @@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor): class BleacherReportCMSIE(AMPIE): - _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P[0-9a-f-]{36})' + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P[0-9a-f-]{36}|\d{5})' _TESTS = [{ - 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', 'info_dict': { 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', @@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE): def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) + info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) info['id'] = video_id return info diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 58ec5c979..8e2f7217a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import base64 -import json import re import struct @@ -11,14 +10,12 @@ from .adobepass import AdobePassIE from ..compat import ( compat_etree_fromstring, compat_parse_qs, - compat_str, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, compat_HTTPError, ) from ..utils import ( - determine_ext, ExtractorError, extract_attributes, find_xpath_attr, @@ -27,18 +24,19 @@ from ..utils import ( js_to_json, int_or_none, parse_iso8601, + smuggle_url, unescapeHTML, unsmuggle_url, update_url_query, clean_html, mimetype2ext, + UnsupportedError, ) class BrightcoveLegacyIE(InfoExtractor): IE_NAME = 'brightcove:legacy' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P.*)' - _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated' _TESTS = [ { @@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor): 'timestamp': 1368213670, 'upload_date': '20130510', 'uploader_id': '1589608506001', - } + }, + 'skip': 'The player has been deactivated by the content owner', }, { # From http://medianetwork.oracle.com/video/player/1785452137001 @@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'upload_date': '20120814', 'uploader_id': '1460825906', }, + 'skip': 'video not playable', }, { # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ @@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'ext': 'mp4', 'title': 'This Bracelet Acts as a Personal Thermostat', 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', - 'uploader': 'Mashable', + # 'uploader': 'Mashable', 'timestamp': 1382041798, 'upload_date': '20131017', 'uploader_id': '1130468786001', @@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'id': '3550319591001', }, 'playlist_mincount': 7, + 'skip': 'Unsupported URL', }, { # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) @@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'title': 'Lesson 08', }, 'playlist_mincount': 10, + 'skip': 'Unsupported URL', }, { # playerID inferred from bcpid @@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor): 'only_matching': True, # Tested in GenericIE } ] - FLV_VCODECS = { - 1: 'SORENSON', - 2: 'ON2', - 3: 'H264', - 4: 'VP8', - } @classmethod def _build_brighcove_url(cls, object_str): @@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor): @classmethod def _make_brightcove_url(cls, params): - return update_url_query(cls._FEDERATED_URL, params) + return update_url_query( + 'http://c.brightcove.com/services/viewer/htmlFederated', params) @classmethod def _extract_brightcove_url(cls, webpage): @@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor): videoPlayer = query.get('@videoPlayer') if videoPlayer: # We set the original url as the default 'Referer' header - referer = smuggled_data.get('Referer', url) + referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) + video_id = videoPlayer[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: query['playerID'] = [mobj.group(1)] - return self._get_video_info( - videoPlayer[0], query, referer=referer) - elif 'playerKey' in query: - player_key = query['playerKey'] - return self._get_playlist_info(player_key[0]) - else: - raise ExtractorError( - 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', - expected=True) - - def _brightcove_new_url_result(self, publisher_id, video_id): - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) - return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) - - def _get_video_info(self, video_id, query, referer=None): - headers = {} - linkBase = query.get('linkBaseURL') - if linkBase is not None: - referer = linkBase[0] - if referer is not None: - headers['Referer'] = referer - webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query) - - error_msg = self._html_search_regex( - r"

We're sorry.

([\s\n]*

.*?

)+", webpage, - 'error message', default=None) - if error_msg is not None: publisher_id = query.get('publisherId') if publisher_id and publisher_id[0].isdigit(): publisher_id = publisher_id[0] @@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor): else: player_id = query.get('playerID') if player_id and player_id[0].isdigit(): + headers = {} + if referer: + headers['Referer'] = referer player_page = self._download_webpage( 'http://link.brightcove.com/services/player/bcpid' + player_id[0], video_id, headers=headers, fatal=False) @@ -349,136 +323,16 @@ class BrightcoveLegacyIE(InfoExtractor): if player_key: enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] - if publisher_id: - return self._brightcove_new_url_result(publisher_id, video_id) - raise ExtractorError( - 'brightcove said: %s' % error_msg, expected=True) - - self.report_extraction(video_id) - info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') - info = json.loads(info)['data'] - video_info = info['programmedContent']['videoPlayer']['mediaDTO'] - video_info['_youtubedl_adServerURL'] = info.get('adServerURL') - - return self._extract_video_info(video_info) - - def _get_playlist_info(self, player_key): - info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key - playlist_info = self._download_webpage( - info_url, player_key, 'Downloading playlist information') - - json_data = json.loads(playlist_info) - if 'videoList' in json_data: - playlist_info = json_data['videoList'] - playlist_dto = playlist_info['mediaCollectionDTO'] - elif 'playlistTabs' in json_data: - playlist_info = json_data['playlistTabs'] - playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0] - else: - raise ExtractorError('Empty playlist') - - videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']] - - return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], - playlist_title=playlist_dto['displayName']) - - def _extract_video_info(self, video_info): - video_id = compat_str(video_info['id']) - publisher_id = video_info.get('publisherId') - info = { - 'id': video_id, - 'title': video_info['displayName'].strip(), - 'description': video_info.get('shortDescription'), - 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), - 'uploader': video_info.get('publisherName'), - 'uploader_id': compat_str(publisher_id) if publisher_id else None, - 'duration': float_or_none(video_info.get('length'), 1000), - 'timestamp': int_or_none(video_info.get('creationDate'), 1000), - } - - renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) - if renditions: - formats = [] - for rend in renditions: - url = rend['defaultURL'] - if not url: - continue - ext = None - if rend['remote']: - url_comp = compat_urllib_parse_urlparse(url) - if url_comp.path.endswith('.m3u8'): - formats.extend( - self._extract_m3u8_formats( - url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - continue - elif 'akamaihd.net' in url_comp.netloc: - # This type of renditions are served through - # akamaihd.net, but they don't use f4m manifests - url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' - ext = 'flv' - if ext is None: - ext = determine_ext(url) - tbr = int_or_none(rend.get('encodingRate'), 1000) - a_format = { - 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), - 'url': url, - 'ext': ext, - 'filesize': int_or_none(rend.get('size')) or None, - 'tbr': tbr, - } - if rend.get('audioOnly'): - a_format.update({ - 'vcodec': 'none', - }) - else: - a_format.update({ - 'height': int_or_none(rend.get('frameHeight')), - 'width': int_or_none(rend.get('frameWidth')), - 'vcodec': rend.get('videoCodec'), - }) - - # m3u8 manifests with remote == false are media playlists - # Not calling _extract_m3u8_formats here to save network traffic - if ext == 'm3u8': - a_format.update({ - 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), - 'ext': 'mp4', - 'protocol': 'm3u8_native', - }) - - formats.append(a_format) - self._sort_formats(formats) - info['formats'] = formats - elif video_info.get('FLVFullLengthURL') is not None: - info.update({ - 'url': video_info['FLVFullLengthURL'], - 'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')), - 'filesize': int_or_none(video_info.get('FLVFullSize')), - }) - - if self._downloader.params.get('include_ads', False): - adServerURL = video_info.get('_youtubedl_adServerURL') - if adServerURL: - ad_info = { - '_type': 'url', - 'url': adServerURL, - } - if 'url' in info: - return { - '_type': 'playlist', - 'title': info['title'], - 'entries': [ad_info, info], - } - else: - return ad_info - - if not info.get('url') and not info.get('formats'): - uploader_id = info.get('uploader_id') - if uploader_id: - info.update(self._brightcove_new_url_result(uploader_id, video_id)) - else: - raise ExtractorError('Unable to extract video url for %s' % video_id) - return info + if publisher_id: + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + if referer: + brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + # TODO: figure out if it's possible to extract playlistId from playerKey + # elif 'playerKey' in query: + # player_key = query['playerKey'] + # return self._get_playlist_info(player_key[0]) + raise UnsupportedError(url) class BrightcoveNewIE(AdobePassIE): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c3e9eec6..859786617 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -220,7 +220,7 @@ class InfoExtractor(object): * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) - * "resolution" (optional, string "{width}x{height"}, + * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index d565335cf..679f1d92e 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import unified_timestamp +from .youtube import YoutubeIE class CtsNewsIE(InfoExtractor): @@ -14,8 +15,8 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201501291578109', 'ext': 'mp4', - 'title': '以色列.真主黨交火 3人死亡', - 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...', + 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網', + 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...', 'timestamp': 1422528540, 'upload_date': '20150129', } @@ -26,7 +27,7 @@ class CtsNewsIE(InfoExtractor): 'info_dict': { 'id': '201309031304098', 'ext': 'mp4', - 'title': '韓國31歲童顏男 貌如十多歲小孩', + 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網', 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1378205880, @@ -62,8 +63,7 @@ class CtsNewsIE(InfoExtractor): video_url = mp4_feed['source_url'] else: self.to_screen('Not CTSPlayer video, trying Youtube...') - youtube_url = self._search_regex( - r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url') + youtube_url = YoutubeIE._extract_url(page) return self.url_result(youtube_url, ie='Youtube') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1a2c1308a..745971900 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -48,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor): - _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P[^/?_]+)' + _VALID_URL = r'''(?ix) + https?:// + (?: + (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| + (?:www\.)?lequipe\.fr/video + ) + /(?P[^/?_]+) + ''' IE_NAME = 'dailymotion' _FORMATS = [ @@ -133,14 +140,26 @@ class DailymotionIE(DailymotionBaseInfoExtractor): }, { 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun', 'only_matching': True, + }, { + 'url': 'https://www.lequipe.fr/video/x791mem', + 'only_matching': True, + }, { + 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): + urls = [] # Look for embedded Dailymotion player - matches = re.findall( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) - return list(map(lambda m: unescapeHTML(m[1]), matches)) + # https://developer.dailymotion.com/player#player-parameters + for mobj in re.finditer( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage): + urls.append(unescapeHTML(mobj.group('url'))) + for mobj in re.finditer( + r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): + urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index f232f0dc5..aaedf2e3d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -7,50 +7,51 @@ from .common import InfoExtractor class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P[0-9]+)(?:#(?P.+))?' + _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P[^/]+))/)?(?P[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' _TESTS = [{ - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': '2e24f67936517b143a234b4cadf792ec', + 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', + 'md5': 'b8f850ba1860adbda668d367f9b77699', 'info_dict': { - 'id': '3649835190001', - 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'id': 'PynxJnNWChE', 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', + 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', 'thumbnail': r're:https?://.*\.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69.544, - 'uploader_id': '1027729757001', + 'upload_date': '20160916', + 'duration': 69, + 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', + 'uploader': 'Dagbladet', }, - 'add_ie': ['BrightcoveNew'] + 'add_ie': ['Youtube'] }, { - 'url': 'http://dbtv.no/3649835190001', + 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, }, { - 'url': 'http://www.dbtv.no/lazyplayer/4631135248001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/vice/5000634109001', - 'only_matching': True, - }, { - 'url': 'http://dbtv.no/filmtrailer/3359293614001', + 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', + r']+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', webpage)] def _real_extract(self, url): - video_id, display_id = re.match(self._VALID_URL, url).groups() - - return { + display_id, video_id = re.match(self._VALID_URL, url).groups() + info = { '_type': 'url_transparent', - 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, 'id': video_id, 'display_id': display_id, - 'ie_key': 'BrightcoveNew', } + if len(video_id) == 11: + info.update({ + 'url': video_id, + 'ie_key': 'Youtube', + }) + else: + info.update({ + 'url': 'jwplatform:' + video_id, + 'ie_key': 'JWPlatform', + }) + return info diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index b70c307a7..6a2712cc5 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -5,23 +5,17 @@ import re import string from .discoverygo import DiscoveryGoBaseIE -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) -from ..utils import ( - ExtractorError, - try_get, -) +from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): _VALID_URL = r'''(?x)https?:// (?P + (?:(?:www|go)\.)?discovery| (?:www\.)? (?: - discovery| investigationdiscovery| discoverylife| animalplanet| @@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE): cookingchanneltv| motortrend ) - )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))''' + )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' _TESTS = [{ - 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', + 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { - 'id': '5a2d9b4d6b66d17a5026e1fd', + 'id': '5a2f35ce6b66d17a5026e29e', 'ext': 'mp4', - 'title': 'Dave Foley', - 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', - 'duration': 608, + 'title': 'Riding with Matthew Perry', + 'description': 'md5:a34333153e79bc4526019a5129e7f878', + 'duration': 84, }, 'params': { 'skip_download': True, # requires ffmpeg @@ -56,20 +50,20 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, { 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', 'only_matching': True, + }, { + 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', + 'only_matching': True, + }, { + # using `show_slug` is important to get the correct video data + 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False + _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, path, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) - - react_data = self._parse_json(self._search_regex( - r'window\.__reactTransmitPacket\s*=\s*({.+?});', - webpage, 'react data'), display_id) - content_blocks = react_data['layout'][path]['contentBlocks'] - video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0] - video_id = video['id'] + site, show_slug, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -79,27 +73,36 @@ class DiscoveryIE(DiscoveryGoBaseIE): if auth_storage_cookie and auth_storage_cookie.value: auth_storage = self._parse_json(compat_urllib_parse_unquote( compat_urllib_parse_unquote(auth_storage_cookie.value)), - video_id, fatal=False) or {} + display_id, fatal=False) or {} access_token = auth_storage.get('a') or auth_storage.get('access_token') if not access_token: access_token = self._download_json( - 'https://%s.com/anonymous' % site, display_id, query={ + 'https://%s.com/anonymous' % site, display_id, + 'Downloading token JSON metadata', query={ 'authRel': 'authorization', - 'client_id': try_get( - react_data, lambda x: x['application']['apiClientId'], - compat_str) or '3020a40c2356a645b4b4', + 'client_id': '3020a40c2356a645b4b4', 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, })['access_token'] - try: - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token + headers = self.geo_verification_headers() + headers['Authorization'] = 'Bearer ' + access_token + try: + video = self._download_json( + self._API_BASE_URL + 'content/videos', + display_id, 'Downloading content JSON metadata', + headers=headers, query={ + 'embed': 'show.name', + 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags', + 'slug': display_id, + 'show_slug': show_slug, + })[0] + video_id = video['id'] stream = self._download_json( - 'https://api.discovery.com/v1/streaming/video/' + video_id, - display_id, headers=headers) + self._API_BASE_URL + 'streaming/video/' + video_id, + display_id, 'Downloading streaming JSON metadata', headers=headers) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): e_description = self._parse_json( diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py new file mode 100644 index 000000000..d95c67a5b --- /dev/null +++ b/youtube_dl/extractor/dlive.py @@ -0,0 +1,97 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class DLiveVODIE(InfoExtractor): + IE_NAME = 'dlive:vod' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P.+?)\+(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR', + 'info_dict': { + 'id': '3mTzOl4WR', + 'ext': 'mp4', + 'title': 'Minecraft with james charles epic', + 'upload_date': '20190701', + 'timestamp': 1562011015, + 'uploader_id': 'pdp', + } + }, { + 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg', + 'only_matching': True, + }] + + def _real_extract(self, url): + uploader_id, vod_id = re.match(self._VALID_URL, url).groups() + broadcast = self._download_json( + 'https://graphigo.prd.dlive.tv/', vod_id, + data=json.dumps({'query': '''query { + pastBroadcast(permlink:"%s+%s") { + content + createdAt + length + playbackUrl + title + thumbnailUrl + viewCount + } +}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] + title = broadcast['title'] + formats = self._extract_m3u8_formats( + broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + return { + 'id': vod_id, + 'title': title, + 'uploader_id': uploader_id, + 'formats': formats, + 'description': broadcast.get('content'), + 'thumbnail': broadcast.get('thumbnailUrl'), + 'timestamp': int_or_none(broadcast.get('createdAt'), 1000), + 'view_count': int_or_none(broadcast.get('viewCount')), + } + + +class DLiveStreamIE(InfoExtractor): + IE_NAME = 'dlive:stream' + _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P[\w.-]+)' + + def _real_extract(self, url): + display_name = self._match_id(url) + user = self._download_json( + 'https://graphigo.prd.dlive.tv/', display_name, + data=json.dumps({'query': '''query { + userByDisplayName(displayname:"%s") { + livestream { + content + createdAt + title + thumbnailUrl + watchingCount + } + username + } +}''' % display_name}).encode())['data']['userByDisplayName'] + livestream = user['livestream'] + title = livestream['title'] + username = user['username'] + formats = self._extract_m3u8_formats( + 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, + display_name, 'mp4') + self._sort_formats(formats) + return { + 'id': display_name, + 'title': self._live_title(title), + 'uploader': display_name, + 'uploader_id': username, + 'formats': formats, + 'description': livestream.get('content'), + 'thumbnail': livestream.get('thumbnailUrl'), + 'is_live': True, + 'timestamp': int_or_none(livestream.get('createdAt'), 1000), + 'view_count': int_or_none(livestream.get('watchingCount')), + } diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 4485bf8c1..4e0f8bc81 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..compat import ( @@ -18,7 +19,7 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?Peinthusan\.(?:tv|com|ca))/movie/watch/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', @@ -32,6 +33,12 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', 'only_matching': True, + }, { + 'url': 'https://einthusan.com/movie/watch/9097/', + 'only_matching': True, + }, { + 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi', + 'only_matching': True, }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js @@ -41,7 +48,9 @@ class EinthusanIE(InfoExtractor): )).decode('utf-8'), video_id) def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -53,7 +62,7 @@ class EinthusanIE(InfoExtractor): page_id = self._html_search_regex( ']+data-pageid="([^"]+)"', webpage, 'page ID') video_data = self._download_json( - 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id, + 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id, data=urlencode_postdata({ 'xEvent': 'UIVideoPlayer.PingOutcome', 'xJson': json.dumps({ diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 8cc9bd165..6cf05e6da 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor): _TEST = { 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', 'info_dict': { - 'id': '21846851', - 'ext': 'mp4', + 'id': '56032156', + 'ext': 'flv', 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', - 'timestamp': 1513960621, - 'upload_date': '20171222', }, 'params': { 'skip_download': True, }, - 'expected_warnings': ['Unable to download f4m manifest'], } def _real_extract(self, url): @@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r'data-video-id=["\'](?P\d+)', - webpage, 'video id', group='id') + embed_url = self._search_regex( + r']+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)', + webpage, 'embed url') - return self.url_result( - 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + return self.url_result(embed_url, 'AbcNewsVideo') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 530474f3f..44120cae2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -58,17 +58,8 @@ from .ard import ( ARDMediathekIE, ) from .arte import ( - ArteTvIE, ArteTVPlus7IE, - ArteTVCreativeIE, - ArteTVConcertIE, - ArteTVInfoIE, - ArteTVFutureIE, - ArteTVCinemaIE, - ArteTVDDCIE, - ArteTVMagazineIE, ArteTVEmbedIE, - TheOperaPlatformIE, ArteTVPlaylistIE, ) from .asiancrush import ( @@ -113,6 +104,8 @@ from .bild import BildIE from .bilibili import ( BiliBiliIE, BiliBiliBangumiIE, + BilibiliAudioIE, + BilibiliAudioAlbumIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( @@ -404,11 +397,7 @@ from .frontendmasters import ( FrontendMastersCourseIE ) from .funimation import FunimationIE -from .funk import ( - FunkMixIE, - FunkChannelIE, -) -from .funnyordie import FunnyOrDieIE +from .funk import FunkIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE @@ -592,6 +581,7 @@ from .linkedin import ( ) from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE +from .livejournal import LiveJournalIE from .liveleak import ( LiveLeakIE, LiveLeakEmbedIE, @@ -980,7 +970,6 @@ from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE -from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, @@ -1268,6 +1257,10 @@ from .udn import UDNEmbedIE from .ufctv import UFCTVIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE @@ -1434,6 +1427,7 @@ from .xfileshare import XFileShareIE from .xhamster import ( XHamsterIE, XHamsterEmbedIE, + XHamsterUserIE, ) from .xiami import ( XiamiSongIE, @@ -1457,6 +1451,7 @@ from .yahoo import ( YahooSearchIE, YahooGyaOPlayerIE, YahooGyaOIE, + YahooJapanNewsIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 789dd79d5..a3dcdca3e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -428,7 +428,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) - thumbnail = self._og_search_thumbnail(webpage) + thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) view_count = parse_count(self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 9f9863746..c4c0f1b3d 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -9,7 +9,7 @@ from ..utils import int_or_none class FiveTVIE(InfoExtractor): _VALID_URL = r'''(?x) - http:// + https?:// (?:www\.)?5-tv\.ru/ (?: (?:[^/]+/)+(?P\d+)| @@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor): 'duration': 180, }, }, { + # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/ 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails', 'info_dict': { 'id': 'glavnoe', @@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor): 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, + 'skip': 'redirect to «Известия. Главное» project page', }, { 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', 'only_matching': True, @@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - [r']+?class="flowplayer[^>]+?data-href="([^"]+)"', + [r']+?class="(?:flow)?player[^>]+?data-href="([^"]+)"', r']+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py index 7e1af95e0..81d1949fd 100644 --- a/youtube_dl/extractor/funk.py +++ b/youtube_dl/extractor/funk.py @@ -1,89 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor from .nexx import NexxIE -from ..compat import compat_str from ..utils import ( int_or_none, - try_get, + str_or_none, ) -class FunkBaseIE(InfoExtractor): - _HEADERS = { - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8', - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4', - } - _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4' - - @staticmethod - def _make_headers(referer): - headers = FunkBaseIE._HEADERS.copy() - headers['Referer'] = referer - return headers - - def _make_url_result(self, video): - return { - '_type': 'url_transparent', - 'url': 'nexx:741:%s' % video['sourceId'], - 'ie_key': NexxIE.ie_key(), - 'id': video['sourceId'], - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'season_number': int_or_none(video.get('seasonNr')), - 'episode_number': int_or_none(video.get('episodeNr')), - } - - -class FunkMixIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P[^/]+)/(?P[^/?#&]+)' +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ - 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten', - 'md5': '8edf617c2f2b7c9847dfda313f199009', - 'info_dict': { - 'id': '123748', - 'ext': 'mp4', - 'title': '"Die realste Kifferdoku aller Zeiten"', - 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d', - 'timestamp': 1490274721, - 'upload_date': '20170323', - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - mix_id = mobj.group('id') - alias = mobj.group('alias') - - lists = self._download_json( - 'https://www.funk.net/api/v3.1/curation/curatedLists/', - mix_id, headers=self._make_headers(url), query={ - 'size': 100, - })['_embedded']['curatedListList'] - - metas = next( - l for l in lists - if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas'] - video = next( - meta['videoDataDelegate'] - for meta in metas - if try_get( - meta, lambda x: x['videoDataDelegate']['alias'], - compat_str) == alias) - - return self._make_url_result(video) - - -class FunkChannelIE(FunkBaseIE): - _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P[^/]+)/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2', + 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', + 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'info_dict': { 'id': '1155821', 'ext': 'mp4', @@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE): 'timestamp': 1514507395, 'upload_date': '20171229', }, - 'params': { - 'skip_download': True, - }, + }, { - # only available via byIdList API - 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', - 'info_dict': { - 'id': '205067', - 'ext': 'mp4', - 'title': 'Martin Sonneborn erklärt die EU', - 'description': 'md5:050f74626e4ed87edf4626d2024210c0', - 'timestamp': 1494424042, - 'upload_date': '20170510', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', + 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - alias = mobj.group('alias') - - headers = self._make_headers(url) - - video = None - - # Id-based channels are currently broken on their side: webplayer - # tries to process them via byChannelAlias endpoint and fails - # predictably. - for page_num in itertools.count(): - by_channel_alias = self._download_json( - 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s' - % channel_id, - 'Downloading byChannelAlias JSON page %d' % (page_num + 1), - headers=headers, query={ - 'filterFsk': 'false', - 'sort': 'creationDate,desc', - 'size': 100, - 'page': page_num, - }, fatal=False) - if not by_channel_alias: - break - video_list = try_get( - by_channel_alias, lambda x: x['_embedded']['videoList'], list) - if not video_list: - break - try: - video = next(r for r in video_list if r.get('alias') == alias) - break - except StopIteration: - pass - if not try_get( - by_channel_alias, lambda x: x['_links']['next']): - break - - if not video: - by_id_list = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/byIdList', - channel_id, 'Downloading byIdList JSON', headers=headers, - query={ - 'ids': alias, - }, fatal=False) - if by_id_list: - video = try_get(by_id_list, lambda x: x['result'][0], dict) - - if not video: - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', - channel_id, 'Downloading filter JSON', headers=headers, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] - video = next(r for r in results if r.get('alias') == alias) - - return self._make_url_result(video) + display_id, nexx_id = re.match(self._VALID_URL, url).groups() + video = self._download_json( + 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) + return { + '_type': 'url_transparent', + 'url': 'nexx:741:' + nexx_id, + 'ie_key': NexxIE.ie_key(), + 'id': nexx_id, + 'title': video.get('title'), + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'channel_id': str_or_none(video.get('channelId')), + 'display_id': display_id, + 'tags': video.get('tags'), + 'thumbnail': video.get('imageUrlLandscape'), + } diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py deleted file mode 100644 index f85e7de14..000000000 --- a/youtube_dl/extractor/funnyordie.py +++ /dev/null @@ -1,162 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - unified_timestamp, -) - - -class FunnyOrDieIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?Pembed|articles|videos)/(?P[0-9a-f]+)(?:$|[?#/])' - _TESTS = [{ - 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', - 'info_dict': { - 'id': '0732f586d7', - 'ext': 'mp4', - 'title': 'Heart-Shaped Box: Literal Video Version', - 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', - 'thumbnail': r're:^http:.*\.jpg$', - 'uploader': 'DASjr', - 'timestamp': 1317904928, - 'upload_date': '20111006', - 'duration': 318.3, - }, - }, { - 'url': 'http://www.funnyordie.com/embed/e402820827', - 'info_dict': { - 'id': 'e402820827', - 'ext': 'mp4', - 'title': 'Please Use This Song (Jon Lajoie)', - 'description': 'Please use this to sell something. www.jonlajoie.com', - 'thumbnail': r're:^http:.*\.jpg$', - 'timestamp': 1398988800, - 'upload_date': '20140502', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - - links = re.findall(r']+src=(["\'])(?P.+?/master\.m3u8[^"\']*)\1', - webpage, 'm3u8 url', group='url') - - formats = [] - - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - source_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] - bitrates.sort() - - if source_formats: - self._sort_formats(source_formats) - - for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): - for path, ext in links: - ff = f.copy() - if ff: - if ext != 'mp4': - ff = dict( - [(k, v) for k, v in ff.items() - if k in ('height', 'width', 'format_id')]) - ff.update({ - 'format_id': ff['format_id'].replace('hls', ext), - 'ext': ext, - 'protocol': 'http', - }) - else: - ff.update({ - 'format_id': '%s-%d' % (ext, bitrate), - 'vbr': bitrate, - }) - ff['url'] = self._proto_relative_url( - '%s%d.%s' % (path, bitrate, ext)) - formats.append(ff) - self._check_formats(formats, video_id) - - formats.extend(m3u8_formats) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - subtitles = {} - for src, src_lang in re.findall(r']+\bclass=["\']channel-preview-name[^>]+>(.+?).+)\.aspx' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P[^.?&#]+)' + _TESTS = [{ + # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', 'md5': '292f26da1ab4beb4c9099f1304d2b071', 'info_dict': { @@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor): 'upload_date': '20150928', 'uploader_id': '694940074001', }, - } + }, { + # Brightcove id inside unique element with field--name-field-brightcove-video-id class + 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue', + 'info_dict': { + 'id': '6057111913001', + 'ext': 'mp4', + 'title': 'New Gameplay Today – Streets Of Rogue', + 'timestamp': 1562699001, + 'upload_date': '20190709', + 'uploader_id': '694940074001', + + }, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage( url, display_id, headers=self.geo_verification_headers()) - brightcove_id = self._search_regex( - [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"], - webpage, 'brightcove id') - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', - brightcove_id) + brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage)) + brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage) + return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eeb0d25f6..d1725d98b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2075,6 +2075,22 @@ class GenericIE(InfoExtractor): }, 'playlist_count': 6, }, + { + # Squarespace video embed, 2019-08-28 + 'url': 'http://ootboxford.com', + 'info_dict': { + 'id': 'Tc7b_JGdZfw', + 'title': 'Out of the Blue, at Childish Things 10', + 'ext': 'mp4', + 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f', + 'uploader_id': 'helendouglashouse', + 'uploader': 'Helen & Douglas House', + 'upload_date': '20140328', + }, + 'params': { + 'skip_download': True, + }, + }, { # Zype embed 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', @@ -2104,6 +2120,23 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to download MPD manifest'], }, + { + # DailyMotion embed with DM.player + 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804', + 'info_dict': { + 'id': 'k6aKkGHd9FJs4mtJN39', + 'ext': 'mp4', + 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final', + 'description': 'This video is private.', + 'uploader_id': 'x1jf30l', + 'uploader': 'beIN SPORTS USA', + 'upload_date': '20190528', + 'timestamp': 1559062971, + }, + 'params': { + 'skip_download': True, + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2209,7 +2242,7 @@ class GenericIE(InfoExtractor): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if '/' in url: + if re.match(r'^[^\s/]+\.[^\s/]+/', url): self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': @@ -2378,6 +2411,12 @@ class GenericIE(InfoExtractor): # Unescaping the whole page allows to handle those cases in a generic way webpage = compat_urllib_parse_unquote(webpage) + # Unescape squarespace embeds to be detected by generic extractor, + # see https://github.com/ytdl-org/youtube-dl/issues/21294 + webpage = re.sub( + r']+class=[^>]*?\bsqs-video-wrapper\b[^>]*>', + lambda x: unescapeHTML(x.group(0)), webpage) + # it's tempting to parse this further, but you would # have to take into account all the variations like # Video Title - Site Name diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index eb6f85836..bbe3cb283 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -11,7 +11,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P[^-/?#]+)' + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#]+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor): 'categories': list, 'age_limit': 0, } + }, { + 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish', + 'only_matching': True }, { 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull', 'only_matching': True diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 2a032178d..c834cf1c4 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -34,9 +34,13 @@ class GoIE(AdobePassIE): 'watchdisneyxd': { 'brand': '009', 'resource_id': 'DisneyXD', + }, + 'disneynow': { + 'brand': '011', + 'resource_id': 'Disney', } } - _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|disneynow)\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ + _VALID_URL = r'https?://(?:(?:(?P%s)\.)?go|(?Pdisneynow))\.com/(?:(?:[^/]+/)*(?Pvdka\w+)|(?:[^/]+/)*(?P[^/?#]+))'\ % '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', @@ -78,17 +82,16 @@ class GoIE(AdobePassIE): def _extract_videos(self, brand, video_id='-1', show_id='-1'): display_id = video_id if video_id != '-1' else show_id - foo = 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id) - print("Foo is:", foo) + return self._download_json( 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id), display_id)['video'] def _real_extract(self, url): - sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() - print("sub_domain:",sub_domain) + mobj = re.match(self._VALID_URL, url) + sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2') + video_id, display_id = mobj.group('id', 'display_id') site_info = self._SITE_INFO.get(sub_domain, {}) - print("site_info:", site_info) brand = site_info.get('brand') if not video_id or not site_info: webpage = self._download_webpage(url, display_id or video_id) @@ -101,8 +104,7 @@ class GoIE(AdobePassIE): brand = self._search_regex( (r'data-brand=\s*["\']\s*(\d+)', r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand', - default='008') - print("Brand 100:", brand) + default='004') #was 8 premerge site_info = next( si for _, si in self._SITE_INFO.items() if si.get('brand') == brand) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 79d5bbb2e..f9f7c5a64 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import hashlib import hmac +import re import time import uuid @@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE): format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue + format_url = re.sub( + r'(?<=//staragvod)(\d)', r'web\1', format_url) tags = str_or_none(playback_set.get('tagsCombination')) or '' if tags and 'encryption:plain' not in tags: continue @@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE): try: if 'package:hls' in tags or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id='hls')) + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls')) elif 'package:dash' in tags or ext == 'mpd': formats.extend(self._extract_mpd_formats( format_url, video_id, mpd_id='dash')) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index ffd87b55f..b061850a1 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -22,7 +22,7 @@ from ..utils import ( class InstagramIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/p/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv)/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor): }, { 'url': 'http://instagram.com/p/9o6LshA7zy/embed/', 'only_matching': True, + }, { + 'url': 'https://www.instagram.com/tv/aye83DjauH/', + 'only_matching': True, }] @staticmethod diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 647b905f1..2aabd98b5 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class JWPlatformIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' + _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P[a-zA-Z0-9]{8})' _TESTS = [{ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 639d73837..0a733424c 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor): { 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'only_matching': True, + }, + { + # unavailable source format + 'url': 'kaltura:513551:1_66x4rg7o', + 'only_matching': True, } ] @@ -306,12 +311,17 @@ class KalturaIE(InfoExtractor): f['fileExt'] = 'mp4' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + format_id = '%(fileExt)s-%(bitrate)s' % f + # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o) + if f.get('isOriginal') is True and not self._is_valid_url( + video_url, entry_id, format_id): + continue # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g # -f mp4-56) vcodec = 'none' if 'videoCodecId' not in f and f.get( 'frameRate') == 0 else f.get('videoCodecId') formats.append({ - 'format_id': '%(fileExt)s-%(bitrate)s' % f, + 'format_id': format_id, 'ext': f.get('fileExt'), 'tbr': int_or_none(f['bitrate']), 'fps': int_or_none(f.get('frameRate')), diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 24f78d928..6ed7da4ab 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -6,8 +6,8 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, determine_ext, - extract_attributes, ExtractorError, float_or_none, int_or_none, @@ -19,6 +19,7 @@ from ..utils import ( class LecturioBaseIE(InfoExtractor): + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' _NETRC_MACHINE = 'lecturio' @@ -67,51 +68,56 @@ class LecturioIE(LecturioBaseIE): _VALID_URL = r'''(?x) https:// (?: - app\.lecturio\.com/[^/]+/(?P[^/?#&]+)\.lecture| - (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag + app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))| + (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag ) ''' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', - 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', + 'md5': '9a42cf1d8282a6311bf7211bbde26fde', 'info_dict': { 'id': '39634', 'ext': 'mp4', - 'title': 'Important Concepts and Terms – Introduction to Microbiology', + 'title': 'Important Concepts and Terms — Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', }, { 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 'only_matching': True, + }, { + 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', + 'only_matching': True, }] _CC_LANGS = { + 'Arabic': 'ar', + 'Bulgarian': 'bg', 'German': 'de', 'English': 'en', 'Spanish': 'es', + 'Persian': 'fa', 'French': 'fr', + 'Japanese': 'ja', 'Polish': 'pl', + 'Pashto': 'ps', 'Russian': 'ru', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') or mobj.group('id_de') - - webpage = self._download_webpage( - 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, - display_id) - - lecture_id = self._search_regex( - r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id') - - api_url = self._search_regex( - r'lectureDataLink\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'api url', group='url') - - video = self._download_json(api_url, display_id) - + nt = mobj.group('nt') or mobj.group('nt_de') + lecture_id = mobj.group('id') + display_id = nt or lecture_id + api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' + video = self._download_json( + self._API_BASE_URL + api_path, display_id) title = video['title'].strip() + if not lecture_id: + pid = video.get('productId') or video.get('uid') + if pid: + spid = pid.split('_') + if spid and len(spid) == 2: + lecture_id = spid[1] formats = [] for format_ in video['content']['media']: @@ -129,24 +135,30 @@ class LecturioIE(LecturioBaseIE): continue label = str_or_none(format_.get('label')) filesize = int_or_none(format_.get('fileSize')) - formats.append({ + f = { 'url': file_url, 'format_id': label, 'filesize': float_or_none(filesize, invscale=1000) - }) + } + if label: + mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) + if mobj: + f.update({ + 'format_id': mobj.group(2), + 'height': int(mobj.group(1)), + }) + formats.append(f) self._sort_formats(formats) subtitles = {} automatic_captions = {} - cc = self._parse_json( - self._search_regex( - r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles', - default='{}'), display_id, fatal=False) - for cc_label, cc_url in cc.items(): - cc_url = url_or_none(cc_url) + captions = video.get('captions') or [] + for cc in captions: + cc_url = cc.get('url') if not cc_url: continue - lang = self._search_regex( + cc_label = cc.get('translatedCode') + lang = cc.get('languageCode') or self._search_regex( r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0] if cc_label else 'en') original_lang = self._search_regex( @@ -160,7 +172,7 @@ class LecturioIE(LecturioBaseIE): }) return { - 'id': lecture_id, + 'id': lecture_id or nt, 'title': title, 'formats': formats, 'subtitles': subtitles, @@ -169,37 +181,40 @@ class LecturioIE(LecturioBaseIE): class LecturioCourseIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P[^/?#&]+)\.course' - _TEST = { + _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', 'info_dict': { 'id': 'microbiology-introduction', 'title': 'Microbiology: Introduction', + 'description': 'md5:13da8500c25880c6016ae1e6d78c386a', }, 'playlist_count': 45, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://app.lecturio.com/#/course/c/6434', + 'only_matching': True, + }] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - + nt, course_id = re.match(self._VALID_URL, url).groups() + display_id = nt or course_id + api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json' + course = self._download_json( + self._API_BASE_URL + api_path, display_id) entries = [] - for mobj in re.finditer( - r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>', - webpage): - params = extract_attributes(mobj.group(0)) - lecture_url = urljoin(url, params.get('data-url')) - lecture_id = params.get('data-id') + for lecture in course.get('lectures', []): + lecture_id = str_or_none(lecture.get('id')) + lecture_url = lecture.get('url') + if lecture_url: + lecture_url = urljoin(url, lecture_url) + else: + lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id) entries.append(self.url_result( lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) - - title = self._search_regex( - r']+class=["\']content-title[^>]+>([^<]+)', webpage, - 'title', default=None) - - return self.playlist_result(entries, display_id, title) + return self.playlist_result( + entries, display_id, course.get('title'), + clean_html(course.get('description'))) class LecturioDeCourseIE(LecturioBaseIE): diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 8dd1ce0d0..7dc0ad794 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -326,7 +326,7 @@ class LetvCloudIE(InfoExtractor): elif play_json.get('code'): raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True) else: - raise ExtractorError('Letv cloud returned an unknwon error') + raise ExtractorError('Letv cloud returned an unknown error') def b64decode(s): return compat_b64decode(s).decode('utf-8') diff --git a/youtube_dl/extractor/livejournal.py b/youtube_dl/extractor/livejournal.py new file mode 100644 index 000000000..3a9f4553f --- /dev/null +++ b/youtube_dl/extractor/livejournal.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class LiveJournalIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P\d+)' + _TEST = { + 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', + 'md5': 'adaf018388572ced8a6f301ace49d4b2', + 'info_dict': { + 'id': '1263729', + 'ext': 'mp4', + 'title': 'Истребители против БПЛА', + 'upload_date': '20190624', + 'timestamp': 1561406715, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + record = self._parse_json(self._search_regex( + r'Site\.page\s*=\s*({.+?});', webpage, + 'page data'), video_id)['video']['record'] + storage_id = compat_str(record['storageid']) + title = record.get('name') + if title: + # remove filename extension(.mp4, .mov, etc...) + title = title.rsplit('.', 1)[0] + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'thumbnail': record.get('thumbnail'), + 'timestamp': int_or_none(record.get('timecreate')), + 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id, + 'ie_key': 'EaglePlatform', + } diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 3084c6dff..b3d8653d0 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', 'only_matching': True, + }, { + # Status="NotFound", Message="Transcript not found" + 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html', + 'only_matching': True, }] def _raise_unavailable(self, video_id): @@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE): def _get_subtitles(self, video_id): url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id - subs = self._download_json(url, None, False) + subs = self._download_webpage( + url, video_id, 'Downloading subtitles JSON', fatal=False) + if not subs or 'Status="NotFound"' in subs: + return {} + subs = self._parse_json(subs, video_id, fatal=False) + if not subs: + return {} fixed_subs = self._fix_subtitles(subs) if fixed_subs: return {'en': [{'ext': 'srt', 'data': fixed_subs}]} - else: - return {} + return {} class LyndaCourseIE(LyndaBaseIE): diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 84137df50..71fc3ec56 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -79,6 +79,10 @@ class MGTVIE(InfoExtractor): 'ext': 'mp4', 'tbr': tbr, 'protocol': 'm3u8_native', + 'http_headers': { + 'Referer': url, + }, + 'format_note': stream.get('name'), }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bcac13ec5..bf5353ef9 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -164,7 +164,7 @@ class MixcloudIE(InfoExtractor): def decrypt_url(f_url): for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'): decrypted_url = self._decrypt_xor_cipher(k, f_url) - if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): + if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url): return decrypted_url for url_key in ('url', 'hlsUrl', 'dashUrl'): diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 5e34d776b..2e8b302ac 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor): https?:// (?: (?P(?:www\.)?nickjr|mundonick\.uol)\.com\.br| - (?:www\.)?nickjr\.[a-z]{2} + (?:www\.)?nickjr\.[a-z]{2}| + (?:www\.)?nickelodeonjunior\.fr ) /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P[^/?\#.]+) ''' @@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/', 'only_matching': True, + }, { + 'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index f32f530f7..6157dc7c1 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor): webpage = self._download_webpage(url, display_id) page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, - 'page data'), display_id) + 'page data', default='{}'), display_id, fatal=False) + if not page_data: + page_data = self._parse_json(self._parse_json(self._search_regex( + r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;', + webpage, 'page data'), display_id), display_id) for kind in ('episode', 'clip'): current_key = page_data.get(kind, {}).get( diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b2918dc85..679eaf6c3 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import json import os -import random import re import subprocess import tempfile @@ -244,7 +243,13 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r''' + (?: + openload\.(?:co|io|link|pw)| + oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website|vip)| + oladblock\.(?:services|xyz|me)|openloed\.co + ) + ''' _VALID_URL = r'''(?x) https?:// (?P @@ -366,6 +371,12 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', 'only_matching': True, + }, { + 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', + 'only_matching': True, + }, { + 'url': 'https://oload.best/embed/kkz9JgVZeWc/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, @@ -378,1597 +389,20 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://openloed.co/f/b8NWEgkqNLI/', 'only_matching': True, + }, { + 'url': 'https://oload.vip/f/kUEfGclsU9o', + 'only_matching': True, }] - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '74.0.3729.129', - '76.0.3780.3', - '76.0.3780.2', - '74.0.3729.128', - '76.0.3780.1', - '76.0.3780.0', - '75.0.3770.15', - '74.0.3729.127', - '74.0.3729.126', - '76.0.3779.1', - '76.0.3779.0', - '75.0.3770.14', - '74.0.3729.125', - '76.0.3778.1', - '76.0.3778.0', - '75.0.3770.13', - '74.0.3729.124', - '74.0.3729.123', - '73.0.3683.121', - '76.0.3777.1', - '76.0.3777.0', - '75.0.3770.12', - '74.0.3729.122', - '76.0.3776.4', - '75.0.3770.11', - '74.0.3729.121', - '76.0.3776.3', - '76.0.3776.2', - '73.0.3683.120', - '74.0.3729.120', - '74.0.3729.119', - '74.0.3729.118', - '76.0.3776.1', - '76.0.3776.0', - '76.0.3775.5', - '75.0.3770.10', - '74.0.3729.117', - '76.0.3775.4', - '76.0.3775.3', - '74.0.3729.116', - '75.0.3770.9', - '76.0.3775.2', - '76.0.3775.1', - '76.0.3775.0', - '75.0.3770.8', - '74.0.3729.115', - '74.0.3729.114', - '76.0.3774.1', - '76.0.3774.0', - '75.0.3770.7', - '74.0.3729.113', - '74.0.3729.112', - '74.0.3729.111', - '76.0.3773.1', - '76.0.3773.0', - '75.0.3770.6', - '74.0.3729.110', - '74.0.3729.109', - '76.0.3772.1', - '76.0.3772.0', - '75.0.3770.5', - '74.0.3729.108', - '74.0.3729.107', - '76.0.3771.1', - '76.0.3771.0', - '75.0.3770.4', - '74.0.3729.106', - '74.0.3729.105', - '75.0.3770.3', - '74.0.3729.104', - '74.0.3729.103', - '74.0.3729.102', - '75.0.3770.2', - '74.0.3729.101', - '75.0.3770.1', - '75.0.3770.0', - '74.0.3729.100', - '75.0.3769.5', - '75.0.3769.4', - '74.0.3729.99', - '75.0.3769.3', - '75.0.3769.2', - '75.0.3768.6', - '74.0.3729.98', - '75.0.3769.1', - '75.0.3769.0', - '74.0.3729.97', - '73.0.3683.119', - '73.0.3683.118', - '74.0.3729.96', - '75.0.3768.5', - '75.0.3768.4', - '75.0.3768.3', - '75.0.3768.2', - '74.0.3729.95', - '74.0.3729.94', - '75.0.3768.1', - '75.0.3768.0', - '74.0.3729.93', - '74.0.3729.92', - '73.0.3683.117', - '74.0.3729.91', - '75.0.3766.3', - '74.0.3729.90', - '75.0.3767.2', - '75.0.3767.1', - '75.0.3767.0', - '74.0.3729.89', - '73.0.3683.116', - '75.0.3766.2', - '74.0.3729.88', - '75.0.3766.1', - '75.0.3766.0', - '74.0.3729.87', - '73.0.3683.115', - '74.0.3729.86', - '75.0.3765.1', - '75.0.3765.0', - '74.0.3729.85', - '73.0.3683.114', - '74.0.3729.84', - '75.0.3764.1', - '75.0.3764.0', - '74.0.3729.83', - '73.0.3683.113', - '75.0.3763.2', - '75.0.3761.4', - '74.0.3729.82', - '75.0.3763.1', - '75.0.3763.0', - '74.0.3729.81', - '73.0.3683.112', - '75.0.3762.1', - '75.0.3762.0', - '74.0.3729.80', - '75.0.3761.3', - '74.0.3729.79', - '73.0.3683.111', - '75.0.3761.2', - '74.0.3729.78', - '74.0.3729.77', - '75.0.3761.1', - '75.0.3761.0', - '73.0.3683.110', - '74.0.3729.76', - '74.0.3729.75', - '75.0.3760.0', - '74.0.3729.74', - '75.0.3759.8', - '75.0.3759.7', - '75.0.3759.6', - '74.0.3729.73', - '75.0.3759.5', - '74.0.3729.72', - '73.0.3683.109', - '75.0.3759.4', - '75.0.3759.3', - '74.0.3729.71', - '75.0.3759.2', - '74.0.3729.70', - '73.0.3683.108', - '74.0.3729.69', - '75.0.3759.1', - '75.0.3759.0', - '74.0.3729.68', - '73.0.3683.107', - '74.0.3729.67', - '75.0.3758.1', - '75.0.3758.0', - '74.0.3729.66', - '73.0.3683.106', - '74.0.3729.65', - '75.0.3757.1', - '75.0.3757.0', - '74.0.3729.64', - '73.0.3683.105', - '74.0.3729.63', - '75.0.3756.1', - '75.0.3756.0', - '74.0.3729.62', - '73.0.3683.104', - '75.0.3755.3', - '75.0.3755.2', - '73.0.3683.103', - '75.0.3755.1', - '75.0.3755.0', - '74.0.3729.61', - '73.0.3683.102', - '74.0.3729.60', - '75.0.3754.2', - '74.0.3729.59', - '75.0.3753.4', - '74.0.3729.58', - '75.0.3754.1', - '75.0.3754.0', - '74.0.3729.57', - '73.0.3683.101', - '75.0.3753.3', - '75.0.3752.2', - '75.0.3753.2', - '74.0.3729.56', - '75.0.3753.1', - '75.0.3753.0', - '74.0.3729.55', - '73.0.3683.100', - '74.0.3729.54', - '75.0.3752.1', - '75.0.3752.0', - '74.0.3729.53', - '73.0.3683.99', - '74.0.3729.52', - '75.0.3751.1', - '75.0.3751.0', - '74.0.3729.51', - '73.0.3683.98', - '74.0.3729.50', - '75.0.3750.0', - '74.0.3729.49', - '74.0.3729.48', - '74.0.3729.47', - '75.0.3749.3', - '74.0.3729.46', - '73.0.3683.97', - '75.0.3749.2', - '74.0.3729.45', - '75.0.3749.1', - '75.0.3749.0', - '74.0.3729.44', - '73.0.3683.96', - '74.0.3729.43', - '74.0.3729.42', - '75.0.3748.1', - '75.0.3748.0', - '74.0.3729.41', - '75.0.3747.1', - '73.0.3683.95', - '75.0.3746.4', - '74.0.3729.40', - '74.0.3729.39', - '75.0.3747.0', - '75.0.3746.3', - '75.0.3746.2', - '74.0.3729.38', - '75.0.3746.1', - '75.0.3746.0', - '74.0.3729.37', - '73.0.3683.94', - '75.0.3745.5', - '75.0.3745.4', - '75.0.3745.3', - '75.0.3745.2', - '74.0.3729.36', - '75.0.3745.1', - '75.0.3745.0', - '75.0.3744.2', - '74.0.3729.35', - '73.0.3683.93', - '74.0.3729.34', - '75.0.3744.1', - '75.0.3744.0', - '74.0.3729.33', - '73.0.3683.92', - '74.0.3729.32', - '74.0.3729.31', - '73.0.3683.91', - '75.0.3741.2', - '75.0.3740.5', - '74.0.3729.30', - '75.0.3741.1', - '75.0.3741.0', - '74.0.3729.29', - '75.0.3740.4', - '73.0.3683.90', - '74.0.3729.28', - '75.0.3740.3', - '73.0.3683.89', - '75.0.3740.2', - '74.0.3729.27', - '75.0.3740.1', - '75.0.3740.0', - '74.0.3729.26', - '73.0.3683.88', - '73.0.3683.87', - '74.0.3729.25', - '75.0.3739.1', - '75.0.3739.0', - '73.0.3683.86', - '74.0.3729.24', - '73.0.3683.85', - '75.0.3738.4', - '75.0.3738.3', - '75.0.3738.2', - '75.0.3738.1', - '75.0.3738.0', - '74.0.3729.23', - '73.0.3683.84', - '74.0.3729.22', - '74.0.3729.21', - '75.0.3737.1', - '75.0.3737.0', - '74.0.3729.20', - '73.0.3683.83', - '74.0.3729.19', - '75.0.3736.1', - '75.0.3736.0', - '74.0.3729.18', - '73.0.3683.82', - '74.0.3729.17', - '75.0.3735.1', - '75.0.3735.0', - '74.0.3729.16', - '73.0.3683.81', - '75.0.3734.1', - '75.0.3734.0', - '74.0.3729.15', - '73.0.3683.80', - '74.0.3729.14', - '75.0.3733.1', - '75.0.3733.0', - '75.0.3732.1', - '74.0.3729.13', - '74.0.3729.12', - '73.0.3683.79', - '74.0.3729.11', - '75.0.3732.0', - '74.0.3729.10', - '73.0.3683.78', - '74.0.3729.9', - '74.0.3729.8', - '74.0.3729.7', - '75.0.3731.3', - '75.0.3731.2', - '75.0.3731.0', - '74.0.3729.6', - '73.0.3683.77', - '73.0.3683.76', - '75.0.3730.5', - '75.0.3730.4', - '73.0.3683.75', - '74.0.3729.5', - '73.0.3683.74', - '75.0.3730.3', - '75.0.3730.2', - '74.0.3729.4', - '73.0.3683.73', - '73.0.3683.72', - '75.0.3730.1', - '75.0.3730.0', - '74.0.3729.3', - '73.0.3683.71', - '74.0.3729.2', - '73.0.3683.70', - '74.0.3729.1', - '74.0.3729.0', - '74.0.3726.4', - '73.0.3683.69', - '74.0.3726.3', - '74.0.3728.0', - '74.0.3726.2', - '73.0.3683.68', - '74.0.3726.1', - '74.0.3726.0', - '74.0.3725.4', - '73.0.3683.67', - '73.0.3683.66', - '74.0.3725.3', - '74.0.3725.2', - '74.0.3725.1', - '74.0.3724.8', - '74.0.3725.0', - '73.0.3683.65', - '74.0.3724.7', - '74.0.3724.6', - '74.0.3724.5', - '74.0.3724.4', - '74.0.3724.3', - '74.0.3724.2', - '74.0.3724.1', - '74.0.3724.0', - '73.0.3683.64', - '74.0.3723.1', - '74.0.3723.0', - '73.0.3683.63', - '74.0.3722.1', - '74.0.3722.0', - '73.0.3683.62', - '74.0.3718.9', - '74.0.3702.3', - '74.0.3721.3', - '74.0.3721.2', - '74.0.3721.1', - '74.0.3721.0', - '74.0.3720.6', - '73.0.3683.61', - '72.0.3626.122', - '73.0.3683.60', - '74.0.3720.5', - '72.0.3626.121', - '74.0.3718.8', - '74.0.3720.4', - '74.0.3720.3', - '74.0.3718.7', - '74.0.3720.2', - '74.0.3720.1', - '74.0.3720.0', - '74.0.3718.6', - '74.0.3719.5', - '73.0.3683.59', - '74.0.3718.5', - '74.0.3718.4', - '74.0.3719.4', - '74.0.3719.3', - '74.0.3719.2', - '74.0.3719.1', - '73.0.3683.58', - '74.0.3719.0', - '73.0.3683.57', - '73.0.3683.56', - '74.0.3718.3', - '73.0.3683.55', - '74.0.3718.2', - '74.0.3718.1', - '74.0.3718.0', - '73.0.3683.54', - '74.0.3717.2', - '73.0.3683.53', - '74.0.3717.1', - '74.0.3717.0', - '73.0.3683.52', - '74.0.3716.1', - '74.0.3716.0', - '73.0.3683.51', - '74.0.3715.1', - '74.0.3715.0', - '73.0.3683.50', - '74.0.3711.2', - '74.0.3714.2', - '74.0.3713.3', - '74.0.3714.1', - '74.0.3714.0', - '73.0.3683.49', - '74.0.3713.1', - '74.0.3713.0', - '72.0.3626.120', - '73.0.3683.48', - '74.0.3712.2', - '74.0.3712.1', - '74.0.3712.0', - '73.0.3683.47', - '72.0.3626.119', - '73.0.3683.46', - '74.0.3710.2', - '72.0.3626.118', - '74.0.3711.1', - '74.0.3711.0', - '73.0.3683.45', - '72.0.3626.117', - '74.0.3710.1', - '74.0.3710.0', - '73.0.3683.44', - '72.0.3626.116', - '74.0.3709.1', - '74.0.3709.0', - '74.0.3704.9', - '73.0.3683.43', - '72.0.3626.115', - '74.0.3704.8', - '74.0.3704.7', - '74.0.3708.0', - '74.0.3706.7', - '74.0.3704.6', - '73.0.3683.42', - '72.0.3626.114', - '74.0.3706.6', - '72.0.3626.113', - '74.0.3704.5', - '74.0.3706.5', - '74.0.3706.4', - '74.0.3706.3', - '74.0.3706.2', - '74.0.3706.1', - '74.0.3706.0', - '73.0.3683.41', - '72.0.3626.112', - '74.0.3705.1', - '74.0.3705.0', - '73.0.3683.40', - '72.0.3626.111', - '73.0.3683.39', - '74.0.3704.4', - '73.0.3683.38', - '74.0.3704.3', - '74.0.3704.2', - '74.0.3704.1', - '74.0.3704.0', - '73.0.3683.37', - '72.0.3626.110', - '72.0.3626.109', - '74.0.3703.3', - '74.0.3703.2', - '73.0.3683.36', - '74.0.3703.1', - '74.0.3703.0', - '73.0.3683.35', - '72.0.3626.108', - '74.0.3702.2', - '74.0.3699.3', - '74.0.3702.1', - '74.0.3702.0', - '73.0.3683.34', - '72.0.3626.107', - '73.0.3683.33', - '74.0.3701.1', - '74.0.3701.0', - '73.0.3683.32', - '73.0.3683.31', - '72.0.3626.105', - '74.0.3700.1', - '74.0.3700.0', - '73.0.3683.29', - '72.0.3626.103', - '74.0.3699.2', - '74.0.3699.1', - '74.0.3699.0', - '73.0.3683.28', - '72.0.3626.102', - '73.0.3683.27', - '73.0.3683.26', - '74.0.3698.0', - '74.0.3696.2', - '72.0.3626.101', - '73.0.3683.25', - '74.0.3696.1', - '74.0.3696.0', - '74.0.3694.8', - '72.0.3626.100', - '74.0.3694.7', - '74.0.3694.6', - '74.0.3694.5', - '74.0.3694.4', - '72.0.3626.99', - '72.0.3626.98', - '74.0.3694.3', - '73.0.3683.24', - '72.0.3626.97', - '72.0.3626.96', - '72.0.3626.95', - '73.0.3683.23', - '72.0.3626.94', - '73.0.3683.22', - '73.0.3683.21', - '72.0.3626.93', - '74.0.3694.2', - '72.0.3626.92', - '74.0.3694.1', - '74.0.3694.0', - '74.0.3693.6', - '73.0.3683.20', - '72.0.3626.91', - '74.0.3693.5', - '74.0.3693.4', - '74.0.3693.3', - '74.0.3693.2', - '73.0.3683.19', - '74.0.3693.1', - '74.0.3693.0', - '73.0.3683.18', - '72.0.3626.90', - '74.0.3692.1', - '74.0.3692.0', - '73.0.3683.17', - '72.0.3626.89', - '74.0.3687.3', - '74.0.3691.1', - '74.0.3691.0', - '73.0.3683.16', - '72.0.3626.88', - '72.0.3626.87', - '73.0.3683.15', - '74.0.3690.1', - '74.0.3690.0', - '73.0.3683.14', - '72.0.3626.86', - '73.0.3683.13', - '73.0.3683.12', - '74.0.3689.1', - '74.0.3689.0', - '73.0.3683.11', - '72.0.3626.85', - '73.0.3683.10', - '72.0.3626.84', - '73.0.3683.9', - '74.0.3688.1', - '74.0.3688.0', - '73.0.3683.8', - '72.0.3626.83', - '74.0.3687.2', - '74.0.3687.1', - '74.0.3687.0', - '73.0.3683.7', - '72.0.3626.82', - '74.0.3686.4', - '72.0.3626.81', - '74.0.3686.3', - '74.0.3686.2', - '74.0.3686.1', - '74.0.3686.0', - '73.0.3683.6', - '72.0.3626.80', - '74.0.3685.1', - '74.0.3685.0', - '73.0.3683.5', - '72.0.3626.79', - '74.0.3684.1', - '74.0.3684.0', - '73.0.3683.4', - '72.0.3626.78', - '72.0.3626.77', - '73.0.3683.3', - '73.0.3683.2', - '72.0.3626.76', - '73.0.3683.1', - '73.0.3683.0', - '72.0.3626.75', - '71.0.3578.141', - '73.0.3682.1', - '73.0.3682.0', - '72.0.3626.74', - '71.0.3578.140', - '73.0.3681.4', - '73.0.3681.3', - '73.0.3681.2', - '73.0.3681.1', - '73.0.3681.0', - '72.0.3626.73', - '71.0.3578.139', - '72.0.3626.72', - '72.0.3626.71', - '73.0.3680.1', - '73.0.3680.0', - '72.0.3626.70', - '71.0.3578.138', - '73.0.3678.2', - '73.0.3679.1', - '73.0.3679.0', - '72.0.3626.69', - '71.0.3578.137', - '73.0.3678.1', - '73.0.3678.0', - '71.0.3578.136', - '73.0.3677.1', - '73.0.3677.0', - '72.0.3626.68', - '72.0.3626.67', - '71.0.3578.135', - '73.0.3676.1', - '73.0.3676.0', - '73.0.3674.2', - '72.0.3626.66', - '71.0.3578.134', - '73.0.3674.1', - '73.0.3674.0', - '72.0.3626.65', - '71.0.3578.133', - '73.0.3673.2', - '73.0.3673.1', - '73.0.3673.0', - '72.0.3626.64', - '71.0.3578.132', - '72.0.3626.63', - '72.0.3626.62', - '72.0.3626.61', - '72.0.3626.60', - '73.0.3672.1', - '73.0.3672.0', - '72.0.3626.59', - '71.0.3578.131', - '73.0.3671.3', - '73.0.3671.2', - '73.0.3671.1', - '73.0.3671.0', - '72.0.3626.58', - '71.0.3578.130', - '73.0.3670.1', - '73.0.3670.0', - '72.0.3626.57', - '71.0.3578.129', - '73.0.3669.1', - '73.0.3669.0', - '72.0.3626.56', - '71.0.3578.128', - '73.0.3668.2', - '73.0.3668.1', - '73.0.3668.0', - '72.0.3626.55', - '71.0.3578.127', - '73.0.3667.2', - '73.0.3667.1', - '73.0.3667.0', - '72.0.3626.54', - '71.0.3578.126', - '73.0.3666.1', - '73.0.3666.0', - '72.0.3626.53', - '71.0.3578.125', - '73.0.3665.4', - '73.0.3665.3', - '72.0.3626.52', - '73.0.3665.2', - '73.0.3664.4', - '73.0.3665.1', - '73.0.3665.0', - '72.0.3626.51', - '71.0.3578.124', - '72.0.3626.50', - '73.0.3664.3', - '73.0.3664.2', - '73.0.3664.1', - '73.0.3664.0', - '73.0.3663.2', - '72.0.3626.49', - '71.0.3578.123', - '73.0.3663.1', - '73.0.3663.0', - '72.0.3626.48', - '71.0.3578.122', - '73.0.3662.1', - '73.0.3662.0', - '72.0.3626.47', - '71.0.3578.121', - '73.0.3661.1', - '72.0.3626.46', - '73.0.3661.0', - '72.0.3626.45', - '71.0.3578.120', - '73.0.3660.2', - '73.0.3660.1', - '73.0.3660.0', - '72.0.3626.44', - '71.0.3578.119', - '73.0.3659.1', - '73.0.3659.0', - '72.0.3626.43', - '71.0.3578.118', - '73.0.3658.1', - '73.0.3658.0', - '72.0.3626.42', - '71.0.3578.117', - '73.0.3657.1', - '73.0.3657.0', - '72.0.3626.41', - '71.0.3578.116', - '73.0.3656.1', - '73.0.3656.0', - '72.0.3626.40', - '71.0.3578.115', - '73.0.3655.1', - '73.0.3655.0', - '72.0.3626.39', - '71.0.3578.114', - '73.0.3654.1', - '73.0.3654.0', - '72.0.3626.38', - '71.0.3578.113', - '73.0.3653.1', - '73.0.3653.0', - '72.0.3626.37', - '71.0.3578.112', - '73.0.3652.1', - '73.0.3652.0', - '72.0.3626.36', - '71.0.3578.111', - '73.0.3651.1', - '73.0.3651.0', - '72.0.3626.35', - '71.0.3578.110', - '73.0.3650.1', - '73.0.3650.0', - '72.0.3626.34', - '71.0.3578.109', - '73.0.3649.1', - '73.0.3649.0', - '72.0.3626.33', - '71.0.3578.108', - '73.0.3648.2', - '73.0.3648.1', - '73.0.3648.0', - '72.0.3626.32', - '71.0.3578.107', - '73.0.3647.2', - '73.0.3647.1', - '73.0.3647.0', - '72.0.3626.31', - '71.0.3578.106', - '73.0.3635.3', - '73.0.3646.2', - '73.0.3646.1', - '73.0.3646.0', - '72.0.3626.30', - '71.0.3578.105', - '72.0.3626.29', - '73.0.3645.2', - '73.0.3645.1', - '73.0.3645.0', - '72.0.3626.28', - '71.0.3578.104', - '72.0.3626.27', - '72.0.3626.26', - '72.0.3626.25', - '72.0.3626.24', - '73.0.3644.0', - '73.0.3643.2', - '72.0.3626.23', - '71.0.3578.103', - '73.0.3643.1', - '73.0.3643.0', - '72.0.3626.22', - '71.0.3578.102', - '73.0.3642.1', - '73.0.3642.0', - '72.0.3626.21', - '71.0.3578.101', - '73.0.3641.1', - '73.0.3641.0', - '72.0.3626.20', - '71.0.3578.100', - '72.0.3626.19', - '73.0.3640.1', - '73.0.3640.0', - '72.0.3626.18', - '73.0.3639.1', - '71.0.3578.99', - '73.0.3639.0', - '72.0.3626.17', - '73.0.3638.2', - '72.0.3626.16', - '73.0.3638.1', - '73.0.3638.0', - '72.0.3626.15', - '71.0.3578.98', - '73.0.3635.2', - '71.0.3578.97', - '73.0.3637.1', - '73.0.3637.0', - '72.0.3626.14', - '71.0.3578.96', - '71.0.3578.95', - '72.0.3626.13', - '71.0.3578.94', - '73.0.3636.2', - '71.0.3578.93', - '73.0.3636.1', - '73.0.3636.0', - '72.0.3626.12', - '71.0.3578.92', - '73.0.3635.1', - '73.0.3635.0', - '72.0.3626.11', - '71.0.3578.91', - '73.0.3634.2', - '73.0.3634.1', - '73.0.3634.0', - '72.0.3626.10', - '71.0.3578.90', - '71.0.3578.89', - '73.0.3633.2', - '73.0.3633.1', - '73.0.3633.0', - '72.0.3610.4', - '72.0.3626.9', - '71.0.3578.88', - '73.0.3632.5', - '73.0.3632.4', - '73.0.3632.3', - '73.0.3632.2', - '73.0.3632.1', - '73.0.3632.0', - '72.0.3626.8', - '71.0.3578.87', - '73.0.3631.2', - '73.0.3631.1', - '73.0.3631.0', - '72.0.3626.7', - '71.0.3578.86', - '72.0.3626.6', - '73.0.3630.1', - '73.0.3630.0', - '72.0.3626.5', - '71.0.3578.85', - '72.0.3626.4', - '73.0.3628.3', - '73.0.3628.2', - '73.0.3629.1', - '73.0.3629.0', - '72.0.3626.3', - '71.0.3578.84', - '73.0.3628.1', - '73.0.3628.0', - '71.0.3578.83', - '73.0.3627.1', - '73.0.3627.0', - '72.0.3626.2', - '71.0.3578.82', - '71.0.3578.81', - '71.0.3578.80', - '72.0.3626.1', - '72.0.3626.0', - '71.0.3578.79', - '70.0.3538.124', - '71.0.3578.78', - '72.0.3623.4', - '72.0.3625.2', - '72.0.3625.1', - '72.0.3625.0', - '71.0.3578.77', - '70.0.3538.123', - '72.0.3624.4', - '72.0.3624.3', - '72.0.3624.2', - '71.0.3578.76', - '72.0.3624.1', - '72.0.3624.0', - '72.0.3623.3', - '71.0.3578.75', - '70.0.3538.122', - '71.0.3578.74', - '72.0.3623.2', - '72.0.3610.3', - '72.0.3623.1', - '72.0.3623.0', - '72.0.3622.3', - '72.0.3622.2', - '71.0.3578.73', - '70.0.3538.121', - '72.0.3622.1', - '72.0.3622.0', - '71.0.3578.72', - '70.0.3538.120', - '72.0.3621.1', - '72.0.3621.0', - '71.0.3578.71', - '70.0.3538.119', - '72.0.3620.1', - '72.0.3620.0', - '71.0.3578.70', - '70.0.3538.118', - '71.0.3578.69', - '72.0.3619.1', - '72.0.3619.0', - '71.0.3578.68', - '70.0.3538.117', - '71.0.3578.67', - '72.0.3618.1', - '72.0.3618.0', - '71.0.3578.66', - '70.0.3538.116', - '72.0.3617.1', - '72.0.3617.0', - '71.0.3578.65', - '70.0.3538.115', - '72.0.3602.3', - '71.0.3578.64', - '72.0.3616.1', - '72.0.3616.0', - '71.0.3578.63', - '70.0.3538.114', - '71.0.3578.62', - '72.0.3615.1', - '72.0.3615.0', - '71.0.3578.61', - '70.0.3538.113', - '72.0.3614.1', - '72.0.3614.0', - '71.0.3578.60', - '70.0.3538.112', - '72.0.3613.1', - '72.0.3613.0', - '71.0.3578.59', - '70.0.3538.111', - '72.0.3612.2', - '72.0.3612.1', - '72.0.3612.0', - '70.0.3538.110', - '71.0.3578.58', - '70.0.3538.109', - '72.0.3611.2', - '72.0.3611.1', - '72.0.3611.0', - '71.0.3578.57', - '70.0.3538.108', - '72.0.3610.2', - '71.0.3578.56', - '71.0.3578.55', - '72.0.3610.1', - '72.0.3610.0', - '71.0.3578.54', - '70.0.3538.107', - '71.0.3578.53', - '72.0.3609.3', - '71.0.3578.52', - '72.0.3609.2', - '71.0.3578.51', - '72.0.3608.5', - '72.0.3609.1', - '72.0.3609.0', - '71.0.3578.50', - '70.0.3538.106', - '72.0.3608.4', - '72.0.3608.3', - '72.0.3608.2', - '71.0.3578.49', - '72.0.3608.1', - '72.0.3608.0', - '70.0.3538.105', - '71.0.3578.48', - '72.0.3607.1', - '72.0.3607.0', - '71.0.3578.47', - '70.0.3538.104', - '72.0.3606.2', - '72.0.3606.1', - '72.0.3606.0', - '71.0.3578.46', - '70.0.3538.103', - '70.0.3538.102', - '72.0.3605.3', - '72.0.3605.2', - '72.0.3605.1', - '72.0.3605.0', - '71.0.3578.45', - '70.0.3538.101', - '71.0.3578.44', - '71.0.3578.43', - '70.0.3538.100', - '70.0.3538.99', - '71.0.3578.42', - '72.0.3604.1', - '72.0.3604.0', - '71.0.3578.41', - '70.0.3538.98', - '71.0.3578.40', - '72.0.3603.2', - '72.0.3603.1', - '72.0.3603.0', - '71.0.3578.39', - '70.0.3538.97', - '72.0.3602.2', - '71.0.3578.38', - '71.0.3578.37', - '72.0.3602.1', - '72.0.3602.0', - '71.0.3578.36', - '70.0.3538.96', - '72.0.3601.1', - '72.0.3601.0', - '71.0.3578.35', - '70.0.3538.95', - '72.0.3600.1', - '72.0.3600.0', - '71.0.3578.34', - '70.0.3538.94', - '72.0.3599.3', - '72.0.3599.2', - '72.0.3599.1', - '72.0.3599.0', - '71.0.3578.33', - '70.0.3538.93', - '72.0.3598.1', - '72.0.3598.0', - '71.0.3578.32', - '70.0.3538.87', - '72.0.3597.1', - '72.0.3597.0', - '72.0.3596.2', - '71.0.3578.31', - '70.0.3538.86', - '71.0.3578.30', - '71.0.3578.29', - '72.0.3596.1', - '72.0.3596.0', - '71.0.3578.28', - '70.0.3538.85', - '72.0.3595.2', - '72.0.3591.3', - '72.0.3595.1', - '72.0.3595.0', - '71.0.3578.27', - '70.0.3538.84', - '72.0.3594.1', - '72.0.3594.0', - '71.0.3578.26', - '70.0.3538.83', - '72.0.3593.2', - '72.0.3593.1', - '72.0.3593.0', - '71.0.3578.25', - '70.0.3538.82', - '72.0.3589.3', - '72.0.3592.2', - '72.0.3592.1', - '72.0.3592.0', - '71.0.3578.24', - '72.0.3589.2', - '70.0.3538.81', - '70.0.3538.80', - '72.0.3591.2', - '72.0.3591.1', - '72.0.3591.0', - '71.0.3578.23', - '70.0.3538.79', - '71.0.3578.22', - '72.0.3590.1', - '72.0.3590.0', - '71.0.3578.21', - '70.0.3538.78', - '70.0.3538.77', - '72.0.3589.1', - '72.0.3589.0', - '71.0.3578.20', - '70.0.3538.76', - '71.0.3578.19', - '70.0.3538.75', - '72.0.3588.1', - '72.0.3588.0', - '71.0.3578.18', - '70.0.3538.74', - '72.0.3586.2', - '72.0.3587.0', - '71.0.3578.17', - '70.0.3538.73', - '72.0.3586.1', - '72.0.3586.0', - '71.0.3578.16', - '70.0.3538.72', - '72.0.3585.1', - '72.0.3585.0', - '71.0.3578.15', - '70.0.3538.71', - '71.0.3578.14', - '72.0.3584.1', - '72.0.3584.0', - '71.0.3578.13', - '70.0.3538.70', - '72.0.3583.2', - '71.0.3578.12', - '72.0.3583.1', - '72.0.3583.0', - '71.0.3578.11', - '70.0.3538.69', - '71.0.3578.10', - '72.0.3582.0', - '72.0.3581.4', - '71.0.3578.9', - '70.0.3538.67', - '72.0.3581.3', - '72.0.3581.2', - '72.0.3581.1', - '72.0.3581.0', - '71.0.3578.8', - '70.0.3538.66', - '72.0.3580.1', - '72.0.3580.0', - '71.0.3578.7', - '70.0.3538.65', - '71.0.3578.6', - '72.0.3579.1', - '72.0.3579.0', - '71.0.3578.5', - '70.0.3538.64', - '71.0.3578.4', - '71.0.3578.3', - '71.0.3578.2', - '71.0.3578.1', - '71.0.3578.0', - '70.0.3538.63', - '69.0.3497.128', - '70.0.3538.62', - '70.0.3538.61', - '70.0.3538.60', - '70.0.3538.59', - '71.0.3577.1', - '71.0.3577.0', - '70.0.3538.58', - '69.0.3497.127', - '71.0.3576.2', - '71.0.3576.1', - '71.0.3576.0', - '70.0.3538.57', - '70.0.3538.56', - '71.0.3575.2', - '70.0.3538.55', - '69.0.3497.126', - '70.0.3538.54', - '71.0.3575.1', - '71.0.3575.0', - '71.0.3574.1', - '71.0.3574.0', - '70.0.3538.53', - '69.0.3497.125', - '70.0.3538.52', - '71.0.3573.1', - '71.0.3573.0', - '70.0.3538.51', - '69.0.3497.124', - '71.0.3572.1', - '71.0.3572.0', - '70.0.3538.50', - '69.0.3497.123', - '71.0.3571.2', - '70.0.3538.49', - '69.0.3497.122', - '71.0.3571.1', - '71.0.3571.0', - '70.0.3538.48', - '69.0.3497.121', - '71.0.3570.1', - '71.0.3570.0', - '70.0.3538.47', - '69.0.3497.120', - '71.0.3568.2', - '71.0.3569.1', - '71.0.3569.0', - '70.0.3538.46', - '69.0.3497.119', - '70.0.3538.45', - '71.0.3568.1', - '71.0.3568.0', - '70.0.3538.44', - '69.0.3497.118', - '70.0.3538.43', - '70.0.3538.42', - '71.0.3567.1', - '71.0.3567.0', - '70.0.3538.41', - '69.0.3497.117', - '71.0.3566.1', - '71.0.3566.0', - '70.0.3538.40', - '69.0.3497.116', - '71.0.3565.1', - '71.0.3565.0', - '70.0.3538.39', - '69.0.3497.115', - '71.0.3564.1', - '71.0.3564.0', - '70.0.3538.38', - '69.0.3497.114', - '71.0.3563.0', - '71.0.3562.2', - '70.0.3538.37', - '69.0.3497.113', - '70.0.3538.36', - '70.0.3538.35', - '71.0.3562.1', - '71.0.3562.0', - '70.0.3538.34', - '69.0.3497.112', - '70.0.3538.33', - '71.0.3561.1', - '71.0.3561.0', - '70.0.3538.32', - '69.0.3497.111', - '71.0.3559.6', - '71.0.3560.1', - '71.0.3560.0', - '71.0.3559.5', - '71.0.3559.4', - '70.0.3538.31', - '69.0.3497.110', - '71.0.3559.3', - '70.0.3538.30', - '69.0.3497.109', - '71.0.3559.2', - '71.0.3559.1', - '71.0.3559.0', - '70.0.3538.29', - '69.0.3497.108', - '71.0.3558.2', - '71.0.3558.1', - '71.0.3558.0', - '70.0.3538.28', - '69.0.3497.107', - '71.0.3557.2', - '71.0.3557.1', - '71.0.3557.0', - '70.0.3538.27', - '69.0.3497.106', - '71.0.3554.4', - '70.0.3538.26', - '71.0.3556.1', - '71.0.3556.0', - '70.0.3538.25', - '71.0.3554.3', - '69.0.3497.105', - '71.0.3554.2', - '70.0.3538.24', - '69.0.3497.104', - '71.0.3555.2', - '70.0.3538.23', - '71.0.3555.1', - '71.0.3555.0', - '70.0.3538.22', - '69.0.3497.103', - '71.0.3554.1', - '71.0.3554.0', - '70.0.3538.21', - '69.0.3497.102', - '71.0.3553.3', - '70.0.3538.20', - '69.0.3497.101', - '71.0.3553.2', - '69.0.3497.100', - '71.0.3553.1', - '71.0.3553.0', - '70.0.3538.19', - '69.0.3497.99', - '69.0.3497.98', - '69.0.3497.97', - '71.0.3552.6', - '71.0.3552.5', - '71.0.3552.4', - '71.0.3552.3', - '71.0.3552.2', - '71.0.3552.1', - '71.0.3552.0', - '70.0.3538.18', - '69.0.3497.96', - '71.0.3551.3', - '71.0.3551.2', - '71.0.3551.1', - '71.0.3551.0', - '70.0.3538.17', - '69.0.3497.95', - '71.0.3550.3', - '71.0.3550.2', - '71.0.3550.1', - '71.0.3550.0', - '70.0.3538.16', - '69.0.3497.94', - '71.0.3549.1', - '71.0.3549.0', - '70.0.3538.15', - '69.0.3497.93', - '69.0.3497.92', - '71.0.3548.1', - '71.0.3548.0', - '70.0.3538.14', - '69.0.3497.91', - '71.0.3547.1', - '71.0.3547.0', - '70.0.3538.13', - '69.0.3497.90', - '71.0.3546.2', - '69.0.3497.89', - '71.0.3546.1', - '71.0.3546.0', - '70.0.3538.12', - '69.0.3497.88', - '71.0.3545.4', - '71.0.3545.3', - '71.0.3545.2', - '71.0.3545.1', - '71.0.3545.0', - '70.0.3538.11', - '69.0.3497.87', - '71.0.3544.5', - '71.0.3544.4', - '71.0.3544.3', - '71.0.3544.2', - '71.0.3544.1', - '71.0.3544.0', - '69.0.3497.86', - '70.0.3538.10', - '69.0.3497.85', - '70.0.3538.9', - '69.0.3497.84', - '71.0.3543.4', - '70.0.3538.8', - '71.0.3543.3', - '71.0.3543.2', - '71.0.3543.1', - '71.0.3543.0', - '70.0.3538.7', - '69.0.3497.83', - '71.0.3542.2', - '71.0.3542.1', - '71.0.3542.0', - '70.0.3538.6', - '69.0.3497.82', - '69.0.3497.81', - '71.0.3541.1', - '71.0.3541.0', - '70.0.3538.5', - '69.0.3497.80', - '71.0.3540.1', - '71.0.3540.0', - '70.0.3538.4', - '69.0.3497.79', - '70.0.3538.3', - '71.0.3539.1', - '71.0.3539.0', - '69.0.3497.78', - '68.0.3440.134', - '69.0.3497.77', - '70.0.3538.2', - '70.0.3538.1', - '70.0.3538.0', - '69.0.3497.76', - '68.0.3440.133', - '69.0.3497.75', - '70.0.3537.2', - '70.0.3537.1', - '70.0.3537.0', - '69.0.3497.74', - '68.0.3440.132', - '70.0.3536.0', - '70.0.3535.5', - '70.0.3535.4', - '70.0.3535.3', - '69.0.3497.73', - '68.0.3440.131', - '70.0.3532.8', - '70.0.3532.7', - '69.0.3497.72', - '69.0.3497.71', - '70.0.3535.2', - '70.0.3535.1', - '70.0.3535.0', - '69.0.3497.70', - '68.0.3440.130', - '69.0.3497.69', - '68.0.3440.129', - '70.0.3534.4', - '70.0.3534.3', - '70.0.3534.2', - '70.0.3534.1', - '70.0.3534.0', - '69.0.3497.68', - '68.0.3440.128', - '70.0.3533.2', - '70.0.3533.1', - '70.0.3533.0', - '69.0.3497.67', - '68.0.3440.127', - '70.0.3532.6', - '70.0.3532.5', - '70.0.3532.4', - '69.0.3497.66', - '68.0.3440.126', - '70.0.3532.3', - '70.0.3532.2', - '70.0.3532.1', - '69.0.3497.60', - '69.0.3497.65', - '69.0.3497.64', - '70.0.3532.0', - '70.0.3531.0', - '70.0.3530.4', - '70.0.3530.3', - '70.0.3530.2', - '69.0.3497.58', - '68.0.3440.125', - '69.0.3497.57', - '69.0.3497.56', - '69.0.3497.55', - '69.0.3497.54', - '70.0.3530.1', - '70.0.3530.0', - '69.0.3497.53', - '68.0.3440.124', - '69.0.3497.52', - '70.0.3529.3', - '70.0.3529.2', - '70.0.3529.1', - '70.0.3529.0', - '69.0.3497.51', - '70.0.3528.4', - '68.0.3440.123', - '70.0.3528.3', - '70.0.3528.2', - '70.0.3528.1', - '70.0.3528.0', - '69.0.3497.50', - '68.0.3440.122', - '70.0.3527.1', - '70.0.3527.0', - '69.0.3497.49', - '68.0.3440.121', - '70.0.3526.1', - '70.0.3526.0', - '68.0.3440.120', - '69.0.3497.48', - '69.0.3497.47', - '68.0.3440.119', - '68.0.3440.118', - '70.0.3525.5', - '70.0.3525.4', - '70.0.3525.3', - '68.0.3440.117', - '69.0.3497.46', - '70.0.3525.2', - '70.0.3525.1', - '70.0.3525.0', - '69.0.3497.45', - '68.0.3440.116', - '70.0.3524.4', - '70.0.3524.3', - '69.0.3497.44', - '70.0.3524.2', - '70.0.3524.1', - '70.0.3524.0', - '70.0.3523.2', - '69.0.3497.43', - '68.0.3440.115', - '70.0.3505.9', - '69.0.3497.42', - '70.0.3505.8', - '70.0.3523.1', - '70.0.3523.0', - '69.0.3497.41', - '68.0.3440.114', - '70.0.3505.7', - '69.0.3497.40', - '70.0.3522.1', - '70.0.3522.0', - '70.0.3521.2', - '69.0.3497.39', - '68.0.3440.113', - '70.0.3505.6', - '70.0.3521.1', - '70.0.3521.0', - '69.0.3497.38', - '68.0.3440.112', - '70.0.3520.1', - '70.0.3520.0', - '69.0.3497.37', - '68.0.3440.111', - '70.0.3519.3', - '70.0.3519.2', - '70.0.3519.1', - '70.0.3519.0', - '69.0.3497.36', - '68.0.3440.110', - '70.0.3518.1', - '70.0.3518.0', - '69.0.3497.35', - '69.0.3497.34', - '68.0.3440.109', - '70.0.3517.1', - '70.0.3517.0', - '69.0.3497.33', - '68.0.3440.108', - '69.0.3497.32', - '70.0.3516.3', - '70.0.3516.2', - '70.0.3516.1', - '70.0.3516.0', - '69.0.3497.31', - '68.0.3440.107', - '70.0.3515.4', - '68.0.3440.106', - '70.0.3515.3', - '70.0.3515.2', - '70.0.3515.1', - '70.0.3515.0', - '69.0.3497.30', - '68.0.3440.105', - '68.0.3440.104', - '70.0.3514.2', - '70.0.3514.1', - '70.0.3514.0', - '69.0.3497.29', - '68.0.3440.103', - '70.0.3513.1', - '70.0.3513.0', - '69.0.3497.28', - ) - @classmethod def _extract_urls(cls, webpage): return re.findall( - r']+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' + r'(?x)]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' % (cls._DOMAINS, cls._EMBED_WORD), webpage) - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) return webpage def _real_extract(self, url): @@ -1977,16 +411,13 @@ class OpenloadIE(InfoExtractor): video_id = mobj.group('id') url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - headers = { - 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), - } for path in (self._EMBED_WORD, self._STREAM_WORD): page_url = url_pattern % path last = path == self._STREAM_WORD webpage = self._download_webpage( page_url, video_id, 'Downloading %s webpage' % path, - headers=headers, fatal=last) + fatal=last) if not webpage: continue if 'File not found' in webpage or 'deleted by the owner' in webpage: @@ -1995,7 +426,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True, video_id=video_id) break - webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers) + webpage = self._extract_decrypted_page(page_url, webpage, video_id) for element_id in self._URL_IDS: decoded_id = get_element_by_id(element_id, webpage) if decoded_id: @@ -2026,14 +457,13 @@ class OpenloadIE(InfoExtractor): 'url': video_url, 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), 'subtitles': subtitles, - 'http_headers': headers, } class VerystreamIE(OpenloadIE): IE_NAME = 'verystream' - _DOMAINS = r'(?:verystream\.com)' + _DOMAINS = r'(?:verystream\.com|woof\.tube)' _VALID_URL = r'''(?x) https?:// (?P @@ -2061,5 +491,5 @@ class VerystreamIE(OpenloadIE): 'only_matching': True, }] - def _extract_decrypted_page(self, page_url, webpage, video_id, headers): + def _extract_decrypted_page(self, page_url, webpage, video_id): return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 1324137df..11ad3b3b8 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -5,26 +5,27 @@ import re from .common import InfoExtractor from ..compat import ( - compat_str, + # compat_str, compat_HTTPError, ) from ..utils import ( clean_html, ExtractorError, - remove_end, + # remove_end, + str_or_none, strip_or_none, unified_timestamp, - urljoin, + # urljoin, ) class PacktPubBaseIE(InfoExtractor): - _PACKT_BASE = 'https://www.packtpub.com' - _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + # _PACKT_BASE = 'https://www.packtpub.com' + _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/' class PacktPubIE(PacktPubBaseIE): - _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P\d+)/(?P\d+)/(?P\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P\d+)/(?P[^/]+)/(?P[^/]+)(?:/(?P[^/?&#]+))?' _TESTS = [{ 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', @@ -40,6 +41,9 @@ class PacktPubIE(PacktPubBaseIE): }, { 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro', 'only_matching': True, + }, { + 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project', + 'only_matching': True, }] _NETRC_MACHINE = 'packtpub' _TOKEN = None @@ -50,9 +54,9 @@ class PacktPubIE(PacktPubBaseIE): return try: self._TOKEN = self._download_json( - self._MAPT_REST + '/users/tokens', None, + 'https://services.packtpub.com/auth-v1/users/tokens', None, 'Downloading Authorization Token', data=json.dumps({ - 'email': username, + 'username': username, 'password': password, }).encode())['data']['access'] except ExtractorError as e: @@ -61,54 +65,40 @@ class PacktPubIE(PacktPubBaseIE): raise ExtractorError(message, expected=True) raise - def _handle_error(self, response): - if response.get('status') != 'success': - raise ExtractorError( - '% said: %s' % (self.IE_NAME, response['message']), - expected=True) - - def _download_json(self, *args, **kwargs): - response = super(PacktPubIE, self)._download_json(*args, **kwargs) - self._handle_error(response) - return response - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_id, chapter_id, video_id = mobj.group( - 'course_id', 'chapter_id', 'id') + course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups() headers = {} if self._TOKEN: headers['Authorization'] = 'Bearer ' + self._TOKEN - video = self._download_json( - '%s/users/me/products/%s/chapters/%s/sections/%s' - % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, - 'Downloading JSON video', headers=headers)['data'] + try: + video_url = self._download_json( + 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id, + 'Downloading JSON video', headers=headers)['data'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + self.raise_login_required('This video is locked') + raise - content = video.get('content') - if not content: - self.raise_login_required('This video is locked') + # TODO: find a better way to avoid duplicating course requests + # metadata = self._download_json( + # '%s/products/%s/chapters/%s/sections/%s/metadata' + # % (self._MAPT_REST, course_id, chapter_id, video_id), + # video_id)['data'] - video_url = content['file'] - - metadata = self._download_json( - '%s/products/%s/chapters/%s/sections/%s/metadata' - % (self._MAPT_REST, course_id, chapter_id, video_id), - video_id)['data'] - - title = metadata['pageTitle'] - course_title = metadata.get('title') - if course_title: - title = remove_end(title, ' - %s' % course_title) - timestamp = unified_timestamp(metadata.get('publicationDate')) - thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + # title = metadata['pageTitle'] + # course_title = metadata.get('title') + # if course_title: + # title = remove_end(title, ' - %s' % course_title) + # timestamp = unified_timestamp(metadata.get('publicationDate')) + # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) return { 'id': video_id, 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'timestamp': timestamp, + 'title': display_id or video_id, # title, + # 'thumbnail': thumbnail, + # 'timestamp': timestamp, } @@ -119,6 +109,7 @@ class PacktPubCourseIE(PacktPubBaseIE): 'info_dict': { 'id': '9781787122215', 'title': 'Learn Nodejs by building 12 projects [Video]', + 'description': 'md5:489da8d953f416e51927b60a1c7db0aa', }, 'playlist_count': 90, }, { @@ -136,35 +127,38 @@ class PacktPubCourseIE(PacktPubBaseIE): url, course_id = mobj.group('url', 'id') course = self._download_json( - '%s/products/%s/metadata' % (self._MAPT_REST, course_id), - course_id)['data'] + self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id) + metadata = self._download_json( + self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id, + course_id, fatal=False) or {} entries = [] - for chapter_num, chapter in enumerate(course['tableOfContents'], 1): - if chapter.get('type') != 'chapter': - continue - children = chapter.get('children') - if not isinstance(children, list): + for chapter_num, chapter in enumerate(course['chapters'], 1): + chapter_id = str_or_none(chapter.get('id')) + sections = chapter.get('sections') + if not chapter_id or not isinstance(sections, list): continue chapter_info = { 'chapter': chapter.get('title'), 'chapter_number': chapter_num, - 'chapter_id': chapter.get('id'), + 'chapter_id': chapter_id, } - for section in children: - if section.get('type') != 'section': - continue - section_url = section.get('seoUrl') - if not isinstance(section_url, compat_str): + for section in sections: + section_id = str_or_none(section.get('id')) + if not section_id or section.get('contentType') != 'video': continue entry = { '_type': 'url_transparent', - 'url': urljoin(url + '/', section_url), + 'url': '/'.join([url, chapter_id, section_id]), 'title': strip_or_none(section.get('title')), 'description': clean_html(section.get('summary')), + 'thumbnail': metadata.get('coverImage'), + 'timestamp': unified_timestamp(metadata.get('publicationDate')), 'ie_key': PacktPubIE.ie_key(), } entry.update(chapter_info) entries.append(entry) - return self.playlist_result(entries, course_id, course.get('title')) + return self.playlist_result( + entries, course_id, metadata.get('title'), + clean_html(metadata.get('about'))) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e03c3d1d3..b50543e32 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -168,7 +168,7 @@ class PeerTubeIE(InfoExtractor): @staticmethod def _extract_peertube_url(webpage, source_url): mobj = re.match( - r'https?://(?P[^/]+)/videos/watch/(?P%s)' + r'https?://(?P[^/]+)/videos/(?:watch|embed)/(?P%s)' % PeerTubeIE._UUID_RE, source_url) if mobj and any(p in webpage for p in ( 'PeerTube<', diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index f723a2b3b..03da64b11 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -14,7 +14,7 @@ class PhilharmonieDeParisIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| pad\.philharmoniedeparis\.fr/doc/CIMU/ ) (?P<id>\d+) @@ -40,6 +40,12 @@ class PhilharmonieDeParisIE(InfoExtractor): }, { 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, }] _LIVE_URL = 'https://live.philharmoniedeparis.fr' diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index c0c276a50..401298cb8 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -18,15 +18,14 @@ class PikselIE(InfoExtractor): _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)' _TESTS = [ { - 'url': 'http://player.piksel.com/v/nv60p12f', - 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'url': 'http://player.piksel.com/v/ums2867l', + 'md5': '34e34c8d89dc2559976a6079db531e85', 'info_dict': { - 'id': 'nv60p12f', + 'id': 'ums2867l', 'ext': 'mp4', - 'title': 'فن الحياة - الحلقة 1', - 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', - 'timestamp': 1465231790, - 'upload_date': '20160606', + 'title': 'GX-005 with Caption', + 'timestamp': 1481335659, + 'upload_date': '20161210' } }, { @@ -39,7 +38,7 @@ class PikselIE(InfoExtractor): 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', 'timestamp': 1486171129, - 'upload_date': '20170204', + 'upload_date': '20170204' } } ] @@ -113,6 +112,13 @@ class PikselIE(InfoExtractor): }) self._sort_formats(formats) + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + if caption_url: + subtitles.setdefault(caption.get('locale', 'en'), []).append({ + 'url': caption_url}) + return { 'id': video_id, 'title': title, @@ -120,4 +126,5 @@ class PikselIE(InfoExtractor): 'thumbnail': video_data.get('thumbnailUrl'), 'timestamp': parse_iso8601(video_data.get('dateadd')), 'formats': formats, + 'subtitles': subtitles, } diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py index 557b2b5ad..602207beb 100644 --- a/youtube_dl/extractor/platzi.py +++ b/youtube_dl/extractor/platzi.py @@ -18,43 +18,10 @@ from ..utils import ( ) -class PlatziIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - platzi\.com/clases| # es version - courses\.platzi\.com/classes # en version - )/[^/]+/(?P<id>\d+)-[^/?\#&]+ - ''' +class PlatziBaseIE(InfoExtractor): _LOGIN_URL = 'https://platzi.com/login/' _NETRC_MACHINE = 'platzi' - _TESTS = [{ - 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', - 'md5': '8f56448241005b561c10f11a595b37e3', - 'info_dict': { - 'id': '12074', - 'ext': 'mp4', - 'title': 'Creando nuestra primera página', - 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', - 'duration': 420, - }, - 'skip': 'Requires platzi account credentials', - }, { - 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', - 'info_dict': { - 'id': '13430', - 'ext': 'mp4', - 'title': 'Background', - 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', - 'duration': 360, - }, - 'skip': 'Requires platzi account credentials', - 'params': { - 'skip_download': True, - }, - }] - def _real_initialize(self): self._login() @@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor): 'Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') + +class PlatziIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P<id>\d+)-[^/?\#&]+ + ''' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + def _real_extract(self, url): lecture_id = self._match_id(url) @@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor): data = self._parse_json( self._search_regex( - r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), + # client_data may contain "};" so that we have to try more + # strict regex first + (r'client_data\s*=\s*({.+?})\s*;\s*\n', + r'client_data\s*=\s*({.+?})\s*;'), + webpage, 'client data'), lecture_id) material = data['initialState']['material'] @@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor): } -class PlatziCourseIE(InfoExtractor): +class PlatziCourseIE(PlatziBaseIE): _VALID_URL = r'''(?x) https?:// (?: diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 24c3600fe..20eac647a 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -39,7 +39,12 @@ class Porn91IE(InfoExtractor): r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title') title = title.replace('\n', '') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + video_link_url = self._search_regex( + r'<textarea[^>]+id=["\']fm-video_link[^>]+>([^<]+)</textarea>', + webpage, 'video link') + videopage = self._download_webpage(video_link_url, video_id) + + info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0] duration = parse_duration(self._search_regex( r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False)) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 857434540..8d88ee499 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -4,32 +4,34 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, - strip_or_none, - unescapeHTML, + str_or_none, urlencode_postdata, ) class RoosterTeethIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'md5': 'e2bd7764732d785ef797700a2489f212', 'info_dict': { - 'id': '26576', + 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', - 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', - 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', + 'title': 'Million Dollars, But... The Game Announcement', + 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', - 'comment_count': int, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -47,6 +49,9 @@ class RoosterTeethIE(InfoExtractor): # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, }] def _login(self): @@ -89,60 +94,55 @@ class RoosterTeethIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id - webpage = self._download_webpage(url, display_id) - - episode = strip_or_none(unescapeHTML(self._search_regex( - (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', - r'<title>(?P<title>[^<]+)'), webpage, 'title', - default=None, group='title'))) - - title = strip_or_none(self._og_search_title( - webpage, default=None)) or episode - - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', - webpage, 'm3u8 url', default=None, group='url') - - if not m3u8_url: - if re.search(r']+class=["\']non-sponsor', webpage): - self.raise_login_required( - '%s is only available for FIRST members' % display_id) - - if re.search(r']+class=["\']golive-gate', webpage): - self.raise_login_required('%s is not available yet' % display_id) - - raise ExtractorError('Unable to extract m3u8 URL') + try: + m3u8_url = self._download_json( + api_episode_url + '/videos', display_id, + 'Downloading video JSON metadata')['data'][0]['attributes']['url'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + raise formats = self._extract_m3u8_formats( - m3u8_url, display_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - description = strip_or_none(self._og_search_description(webpage)) - thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) + episode = self._download_json( + api_episode_url, display_id, + 'Downloading episode JSON metadata')['data'][0] + attributes = episode['attributes'] + title = attributes.get('title') or attributes['display_title'] + video_id = compat_str(episode['id']) - series = self._search_regex( - (r'

More ([^<]+)

', r']+>See All ([^<]+) Videos<'), - webpage, 'series', fatal=False) - - comment_count = int_or_none(self._search_regex( - r'>Comments \((\d+)\)<', webpage, - 'comment count', fatal=False)) - - video_id = self._search_regex( - (r'containerId\s*=\s*["\']episode-(\d+)\1', - r'[0-9a-zA-Z]+)' - - _TEST = { - 'url': 'http://rudo.video/vod/oTzw0MGnyG', - 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', - 'info_dict': { - 'id': 'oTzw0MGnyG', - 'ext': 'mp4', - 'title': 'Comentario Tomás Mosciatti', - 'upload_date': '20160617', - }, - } - - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, - transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) - - info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') - - info_dict.update({ - 'title': self._og_search_title(webpage), - 'upload_date': unified_strdate(get_element_by_class('date', webpage)), - }) - - return info_dict diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 8d4806794..bd9ee1647 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -68,9 +68,10 @@ class SafariBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login: %s' % credentials, expected=True) - # oreilly serves two same groot_sessionid cookies in Set-Cookie header - # and expects first one to be actually set - self._apply_first_set_cookie_header(urlh, 'groot_sessionid') + # oreilly serves two same instances of the following cookies + # in Set-Cookie header and expects first one to be actually set + for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): + self._apply_first_set_cookie_header(urlh, cookie) _, urlh = self._download_webpage_handle( auth.get('redirect_uri') or next_uri, None, 'Completing login',) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 277c3c7b4..05538f3d6 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -197,7 +197,7 @@ class SoundcloudIE(InfoExtractor): 'skip_download': True, }, }, - # not avaialble via api.soundcloud.com/i1/tracks/id/streams + # not available via api.soundcloud.com/i1/tracks/id/streams { 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', @@ -221,7 +221,7 @@ class SoundcloudIE(InfoExtractor): } ] - _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7' + _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' @staticmethod def _extract_urls(webpage): diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index f11d728ca..e040ada29 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + merge_dicts, orderedSet, parse_duration, parse_resolution, @@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor): 'description': 'dillion harper masturbates on a bed', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', + 'timestamp': 1422571989, + 'upload_date': '20150129', 'age_limit': 18, } }, { @@ -106,31 +109,36 @@ class SpankBangIE(InfoExtractor): for format_id, format_url in stream.items(): if format_id.startswith(STREAM_URL_PREFIX): + if format_url and isinstance(format_url, list): + format_url = format_url[0] extract_format( format_id[len(STREAM_URL_PREFIX):], format_url) self._sort_formats(formats) + info = self._search_json_ld(webpage, video_id, default={}) + title = self._html_search_regex( - r'(?s)]*>(.+?)', webpage, 'title') + r'(?s)]*>(.+?)', webpage, 'title', default=None) description = self._search_regex( r']+\bclass=["\']bottom[^>]+>\s*

[^<]*

\s*

([^<]+)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r'class="user"[^>]*>]+>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + uploader = self._html_search_regex( + (r'(?s)]+class=["\']profile[^>]+>(.+?)', + r'class="user"[^>]*>]+>([^<]+)'), webpage, 'uploader', default=None) duration = parse_duration(self._search_regex( r']+\bclass=["\']right_side[^>]+>\s*([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) age_limit = self._rta_search(webpage) - return { + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': title or video_id, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -138,7 +146,8 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - } + }, info + ) class SpankBangPlaylistIE(InfoExtractor): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 21b93a5b3..7c11ea7aa 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor): 'only_matching': True, }] - _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _FEED_URL = 'http://www.bellator.com/feeds/mrss/' _GEO_COUNTRIES = ['US'] diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 9b60cc462..db5a4f44e 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -133,7 +133,7 @@ class TEDIE(InfoExtractor): def _extract_info(self, webpage): info_json = self._search_regex( - r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*', + r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*', webpage, 'info json') return json.loads(info_json) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 51923e44a..a819d048c 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -72,8 +72,13 @@ class TV4IE(InfoExtractor): video_id = self._match_id(url) info = self._download_json( - 'http://www.tv4play.se/player/assets/%s.json' % video_id, - video_id, 'Downloading video info JSON') + 'https://playback-api.b17g.net/asset/%s' % video_id, + video_id, 'Downloading video info JSON', query={ + 'service': 'tv4', + 'device': 'browser', + 'protocol': 'hls,dash', + 'drm': 'widevine', + })['metadata'] title = info['title'] @@ -111,5 +116,9 @@ class TV4IE(InfoExtractor): 'timestamp': parse_iso8601(info.get('broadcast_date_time')), 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('image'), - 'is_live': info.get('is_live') is True, + 'is_live': info.get('isLive') is True, + 'series': info.get('seriesTitle'), + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode': info.get('episodeTitle'), + 'episode_number': int_or_none(info.get('episodeNumber')), } diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 3475ef4c3..180259aba 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -9,6 +9,8 @@ from ..utils import ( float_or_none, int_or_none, parse_age_limit, + try_get, + url_or_none, ) @@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.tvigle.ru/video/sokrat/', - 'md5': '36514aed3657d4f70b4b2cef8eb520cd', 'info_dict': { 'id': '1848932', 'display_id': 'sokrat', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Сократ', 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17', 'duration': 6586, @@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor): }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', - 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', 'info_dict': { 'id': '5142516', 'ext': 'flv', @@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( (r']+class=["\']player["\'][^>]+id=["\'](\d+)', - r'var\s+cloudId\s*=\s*["\'](\d+)', + r'cloudId\s*=\s*["\'](\d+)', r'class="video-preview current_playing" id="(\d+)"'), webpage, 'video id') @@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor): age_limit = parse_age_limit(item.get('ageRestrictions')) formats = [] - for vcodec, fmts in item['videos'].items(): + for vcodec, url_or_fmts in item['videos'].items(): if vcodec == 'hls': - continue - for format_id, video_url in fmts.items(): - if format_id == 'm3u8': + m3u8_url = url_or_none(url_or_fmts) + if not m3u8_url: continue - height = self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None) - formats.append({ - 'url': video_url, - 'format_id': '%s-%s' % (vcodec, format_id), - 'vcodec': vcodec, - 'height': int_or_none(height), - 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), - }) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif vcodec == 'dash': + mpd_url = url_or_none(url_or_fmts) + if not mpd_url: + continue + formats.extend(self._extract_mpd_formats( + mpd_url, video_id, mpd_id='dash', fatal=False)) + else: + if not isinstance(url_or_fmts, dict): + continue + for format_id, video_url in url_or_fmts.items(): + if format_id == 'm3u8': + continue + video_url = url_or_none(video_url) + if not video_url: + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + filesize = int_or_none(try_get( + item, lambda x: x['video_files_size'][vcodec][format_id])) + formats.append({ + 'url': video_url, + 'format_id': '%s-%s' % (vcodec, format_id), + 'vcodec': vcodec, + 'height': int_or_none(height), + 'filesize': filesize, + }) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index 957cf1ea2..791144128 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -1,32 +1,35 @@ # coding: utf-8 from __future__ import unicode_literals -from .mtv import MTVServicesInfoExtractor +from .spike import ParamountNetworkIE -class TVLandIE(MTVServicesInfoExtractor): +class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a # proxy it redirects to http://m.tvland.com/app/ - 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', + 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19', 'info_dict': { - 'description': 'md5:80973e81b916a324e05c14a3fb506d29', - 'title': 'The Invasion', + 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a', + 'title': 'The Dog', }, - 'playlist': [], + 'playlist_mincount': 5, }, { - 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', + 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6', 'md5': 'e2c6389401cf485df26c79c247b08713', 'info_dict': { - 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', + 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757', 'ext': 'mp4', - 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies', - 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269', - 'upload_date': '20151228', - 'timestamp': 1451289600, + 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6', + 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2', + 'upload_date': '20190430', + 'timestamp': 1556658000, + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 6590e1fd0..de0fb5063 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + NO_DEFAULT, unescapeHTML, ) @@ -17,9 +18,21 @@ class TVN24IE(InfoExtractor): 'id': '1584444', 'ext': 'mp4', 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', - 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', 'thumbnail': 're:https?://.*[.]jpeg', } + }, { + # different layout + 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', + 'info_dict': { + 'id': '1771763', + 'ext': 'mp4', + 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', + 'thumbnail': 're:https?://.*', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', 'only_matching': True, @@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - title = self._og_search_title(webpage) + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r']+class=["\']magazineItemHeader[^>]+>(.+?)(?!\1).+?)\1' % attr, webpage, - name, group='json', fatal=fatal) or '{}', - video_id, transform_source=unescapeHTML, fatal=fatal) + name, group='json', default=default, fatal=fatal) or '{}', + display_id, transform_source=unescapeHTML, fatal=fatal) quality_data = extract_json('data-quality', 'formats') @@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor): }) self._sort_formats(formats) - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_regex( r'\bdata-poster=(["\'])(?P(?!\1).+?)\1', webpage, 'thumbnail', group='url') + video_id = None + share_params = extract_json( - 'data-share-params', 'share params', fatal=False) + 'data-share-params', 'share params', default=None) if isinstance(share_params, dict): - video_id = share_params.get('id') or video_id + video_id = share_params.get('id') + + if not video_id: + video_id = self._search_regex( + r'data-vid-id=["\'](\d+)', webpage, 'video id', + default=None) or self._search_regex( + r',(\d+)\.html', url, 'video id', default=display_id) return { 'id': video_id, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index dc5ff29c3..0500e33a6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -317,7 +317,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( - '%s/vod/%s?%s' % ( + '%s/vod/%s.m3u8?%s' % ( self._USHER_BASE, item_id, compat_urllib_parse_urlencode({ 'allow_source': 'true', diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 41d0b6be8..cebb6238c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -428,11 +428,22 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/foobar/status/1087791357756956680', + 'info_dict': { + 'id': '1087791357756956680', + 'ext': 'mp4', + 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:66d493500c013e3e2d434195746a7f78', + 'uploader': 'Twitter', + 'uploader_id': 'Twitter', + 'duration': 61.567, + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - user_id = mobj.group('user_id') twid = mobj.group('id') webpage, urlh = self._download_webpage_handle( @@ -441,8 +452,13 @@ class TwitterIE(InfoExtractor): if 'twitter.com/account/suspended' in urlh.geturl(): raise ExtractorError('Account suspended by Twitter.', expected=True) - if user_id is None: - mobj = re.match(self._VALID_URL, urlh.geturl()) + user_id = None + + redirect_mobj = re.match(self._VALID_URL, urlh.geturl()) + if redirect_mobj: + user_id = redirect_mobj.group('user_id') + + if not user_id: user_id = mobj.group('user_id') username = remove_end(self._og_search_title(webpage), ' on Twitter') diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py index 823340776..54c7495cc 100644 --- a/youtube_dl/extractor/usanetwork.py +++ b/youtube_dl/extractor/usanetwork.py @@ -1,11 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .adobepass import AdobePassIE from ..utils import ( - extract_attributes, + NO_DEFAULT, smuggle_url, update_url_query, ) @@ -31,22 +29,22 @@ class USANetworkIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_params = extract_attributes(self._search_regex( - r'(]+data-usa-tve-player-container[^>]*>)', webpage, 'player params')) - video_id = player_params['data-mpx-guid'] - title = player_params['data-episode-title'] + def _x(name, default=NO_DEFAULT): + return self._search_regex( + r'data-%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' % name, + webpage, name, default=default, group='value') - account_pid, path = re.search( - r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)', - webpage).groups() + video_id = _x('mpx-guid') + title = _x('episode-title') + mpx_account_id = _x('mpx-account-id', '2304992029') query = { 'mbr': 'true', } - if player_params.get('data-is-full-episode') == '1': + if _x('is-full-episode', None) == '1': query['manifest'] = 'm3u' - if player_params.get('data-entitlement') == 'auth': + if _x('is-entitlement', None) == '1': adobe_pass = {} drupal_settings = self._search_regex( r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', @@ -57,7 +55,7 @@ class USANetworkIE(AdobePassIE): adobe_pass = drupal_settings.get('adobePass', {}) resource = self._get_mvpd_resource( adobe_pass.get('adobePassResourceId', 'usa'), - title, video_id, player_params.get('data-episode-rating', 'TV-14')) + title, video_id, _x('episode-rating', 'TV-14')) query['auth'] = self._extract_mvpd_auth( url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource) @@ -65,11 +63,11 @@ class USANetworkIE(AdobePassIE): info.update({ '_type': 'url_transparent', 'url': smuggle_url(update_url_query( - 'http://link.theplatform.com/s/%s/%s' % (account_pid, path), + 'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id), query), {'force_smil_url': True}), 'id': video_id, 'title': title, - 'series': player_params.get('data-show-title'), + 'series': _x('show-title', None), 'episode': title, 'ie_key': 'ThePlatform', }) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 232e05816..4ea9f1b4b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -34,6 +34,7 @@ class VevoIE(VevoBaseIE): (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + https?://embed\.vevo\.com/.*?[?&]isrc=| vevo:) (?P[^&?#]+)''' @@ -144,6 +145,9 @@ class VevoIE(VevoBaseIE): # Geo-restricted to Netherlands/Germany 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 'only_matching': True, + }, { + 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', + 'only_matching': True, }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b5b44a79a..ddf375c6c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -2,12 +2,14 @@ from __future__ import unicode_literals import base64 +import functools import json import re import itertools from .common import InfoExtractor from ..compat import ( + compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -19,6 +21,7 @@ from ..utils import ( int_or_none, merge_dicts, NO_DEFAULT, + OnDemandPagedList, parse_filesize, qualities, RegexNotFoundError, @@ -98,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): webpage, 'vuid', group='vuid') return xsrft, vuid + def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', + webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + if vimeo_config: + return self._parse_json(vimeo_config, video_id) + def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) @@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?Ppro)?\.com/ - (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) - vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, - 'vimeo config', default=None) + vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + seed_status = vimeo_config.get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'