1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-24 01:14:32 +01:00

Merge branch 'ytdl-org-master'

This commit is contained in:
Thomas Tsiakalakis 2019-11-01 18:57:10 +01:00
commit 1e19182ace
60 changed files with 2002 additions and 2640 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.09.28 [debug] youtube-dl version 2019.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.09.28 [debug] youtube-dl version 2019.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.09.28** - [ ] I've verified that I'm running youtube-dl version **2019.10.29**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

107
ChangeLog
View File

@ -1,3 +1,110 @@
version 2019.10.29
Core
* [utils] Actualize major IPv4 address blocks per country
Extractors
+ [go] Add support for abc.com and freeform.com (#22823, #22864)
+ [mtv] Add support for mtvjapan.com
* [mtv] Fix extraction for mtv.de (#22113)
* [videodetective] Fix extraction
* [internetvideoarchive] Fix extraction
* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
- [hark] Remove extractor
- [tutv] Remove extractor
- [learnr] Remove extractor
- [macgamestore] Remove extractor
* [la7] Update Kaltura service URL (#22358)
* [thesun] Fix extraction (#16966)
- [makertv] Remove extractor
+ [tenplay] Add support for 10play.com.au (#21446)
* [soundcloud] Improve extraction
* Improve format extraction (#22123)
+ Extract uploader_id and uploader_url (#21916)
+ Extract all known thumbnails (#19071, #20659)
* Fix extration for private playlists (#20976)
+ Add support for playlist embeds (#20976)
* Skip preview formats (#22806)
* [dplay] Improve extraction
+ Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
* Fix it.dplay.com extraction (#22826)
+ Extract creator, tags and thumbnails
* Handle playback API call errors
+ [discoverynetworks] Add support for dplay.co.uk
* [vk] Improve extraction
+ Add support for Odnoklassniki embeds
+ Extract more videos from user lists (#4470)
+ Fix wall post audio extraction (#18332)
* Improve error detection (#22568)
+ [odnoklassniki] Add support for embeds
* [puhutv] Improve extraction
* Fix subtitles extraction
* Transform HLS URLs to HTTP URLs
* Improve metadata extraction
* [ceskatelevize] Skip DRM media
+ [facebook] Extract subtitles (#22777)
* [globo] Handle alternative hash signing method
version 2019.10.22
Core
* [utils] Improve subtitles_filename (#22753)
Extractors
* [facebook] Bypass download rate limits (#21018)
+ [contv] Add support for contv.com
- [viewster] Remove extractor
* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
* Update the list of domains
+ Add support for aa-encoded video data
* Improve jwplayer format extraction
+ Add support for Clappr sources
* [mangomolo] Fix video format extraction and add support for player URLs
* [audioboom] Improve metadata extraction
* [twitch] Update VOD URL matching (#22395, #22727)
- [mit] Remove support for video.mit.edu (#22403)
- [servingsys] Remove extractor (#22639)
* [dumpert] Fix extraction (#22428, #22564)
* [atresplayer] Fix extraction (#16277, #16716)
version 2019.10.16
Core
* [extractor/common] Make _is_valid_url more relaxed
Extractors
* [vimeo] Improve album videos id extraction (#22599)
+ [globo] Extract subtitles (#22713)
* [bokecc] Improve player params extraction (#22638)
* [nexx] Handle result list (#22666)
* [vimeo] Fix VHX embed extraction
* [nbc] Switch to graphql API (#18581, #22693, #22701)
- [vessel] Remove extractor
- [promptfile] Remove extractor (#6239)
* [kaltura] Fix service URL extraction (#22658)
* [kaltura] Fix embed info strip (#22658)
* [globo] Fix format extraction (#20319)
* [redtube] Improve metadata extraction (#22492, #22615)
* [pornhub:uservideos:upload] Fix extraction (#22619)
+ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
- [wimp] Remove extractor (#22088, #22091)
+ [gfycat] Extend URL regular expression (#22225)
+ [chaturbate] Extend URL regular expression (#22309)
* [peertube] Update instances (#22414)
+ [telequebec] Add support for coucou.telequebec.tv (#22482)
+ [xvideos] Extend URL regular expression (#22471)
- [youtube] Remove support for invidious.enkirton.net (#22543)
+ [openload] Add support for oload.monster (#22592)
* [nrktv:seriebase] Fix extraction (#22596)
+ [youtube] Add support for yt.lelux.fi (#22597)
* [orf:tvthek] Make manifest requests non fatal (#22578)
* [teachable] Skip login when already logged in (#22572)
* [viewlift] Improve extraction (#22545)
* [nonktube] Fix extraction (#22544)
version 2019.09.28 version 2019.09.28
Core Core

View File

@ -183,6 +183,7 @@
- **ComedyCentralShortname** - **ComedyCentralShortname**
- **ComedyCentralTV** - **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **CONtv**
- **Corus** - **Corus**
- **Coub** - **Coub**
- **Cracked** - **Cracked**
@ -231,7 +232,6 @@
- **DouyuShow** - **DouyuShow**
- **DouyuTV**: 斗鱼 - **DouyuTV**: 斗鱼
- **DPlay** - **DPlay**
- **DPlayIt**
- **DRBonanza** - **DRBonanza**
- **Dropbox** - **Dropbox**
- **DrTuber** - **DrTuber**
@ -338,7 +338,6 @@
- **Goshgay** - **Goshgay**
- **GPUTechConf** - **GPUTechConf**
- **Groupon** - **Groupon**
- **Hark**
- **hbo** - **hbo**
- **HearThisAt** - **HearThisAt**
- **Heise** - **Heise**
@ -431,7 +430,6 @@
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
- **Le**: 乐视网 - **Le**: 乐视网
- **Learnr**
- **Lecture2Go** - **Lecture2Go**
- **Lecturio** - **Lecturio**
- **LecturioCourse** - **LecturioCourse**
@ -465,11 +463,9 @@
- **lynda**: lynda.com videos - **lynda**: lynda.com videos
- **lynda:course**: lynda.com online courses - **lynda:course**: lynda.com online courses
- **m6** - **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru - **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru - **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru
- **MakerTV**
- **MallTV** - **MallTV**
- **mangomolo:live** - **mangomolo:live**
- **mangomolo:video** - **mangomolo:video**
@ -525,8 +521,8 @@
- **mtg**: MTG services - **mtg**: MTG services
- **mtv** - **mtv**
- **mtv.de** - **mtv.de**
- **mtv81**
- **mtv:video** - **mtv:video**
- **mtvjapan**
- **mtvservices:embedded** - **mtvservices:embedded**
- **MuenchenTV**: münchen.tv - **MuenchenTV**: münchen.tv
- **MusicPlayOn** - **MusicPlayOn**
@ -694,7 +690,6 @@
- **PornoXO** - **PornoXO**
- **PornTube** - **PornTube**
- **PressTV** - **PressTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital - **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv** - **puhutv**
- **puhutv:serie** - **puhutv:serie**
@ -785,7 +780,6 @@
- **Seeker** - **Seeker**
- **SenateISVP** - **SenateISVP**
- **SendtoNews** - **SendtoNews**
- **ServingSys**
- **Servus** - **Servus**
- **Sexu** - **Sexu**
- **SeznamZpravy** - **SeznamZpravy**
@ -816,6 +810,7 @@
- **soundcloud:set** - **soundcloud:set**
- **soundcloud:trackstation** - **soundcloud:trackstation**
- **soundcloud:user** - **soundcloud:user**
- **SoundcloudEmbed**
- **soundgasm** - **soundgasm**
- **soundgasm:profile** - **soundgasm:profile**
- **southpark.cc.com** - **southpark.cc.com**
@ -884,9 +879,11 @@
- **TeleQuebec** - **TeleQuebec**
- **TeleQuebecEmission** - **TeleQuebecEmission**
- **TeleQuebecLive** - **TeleQuebecLive**
- **TeleQuebecSquat**
- **TeleTask** - **TeleTask**
- **Telewebion** - **Telewebion**
- **TennisTV** - **TennisTV**
- **TenPlay**
- **TF1** - **TF1**
- **TFO** - **TFO**
- **TheIntercept** - **TheIntercept**
@ -925,7 +922,6 @@
- **tunein:topic** - **tunein:topic**
- **TunePk** - **TunePk**
- **Turbo** - **Turbo**
- **Tutv**
- **tv.dfb.de** - **tv.dfb.de**
- **TV2** - **TV2**
- **tv2.hu** - **tv2.hu**
@ -991,7 +987,6 @@
- **VeeHD** - **VeeHD**
- **Veoh** - **Veoh**
- **verystream** - **verystream**
- **Vessel**
- **Vesti**: Вести.Ru - **Vesti**: Вести.Ru
- **Vevo** - **Vevo**
- **VevoPlaylist** - **VevoPlaylist**
@ -1006,7 +1001,6 @@
- **Viddler** - **Viddler**
- **Videa** - **Videa**
- **video.google:search**: Google Video search - **video.google:search**: Google Video search
- **video.mit.edu**
- **VideoDetective** - **VideoDetective**
- **videofy.me** - **videofy.me**
- **videomore** - **videomore**
@ -1024,7 +1018,6 @@
- **vier:videos** - **vier:videos**
- **ViewLift** - **ViewLift**
- **ViewLiftEmbed** - **ViewLiftEmbed**
- **Viewster**
- **Viidea** - **Viidea**
- **viki** - **viki**
- **viki:channel** - **viki:channel**
@ -1090,7 +1083,6 @@
- **Weibo** - **Weibo**
- **WeiboMobile** - **WeiboMobile**
- **WeiqiTV**: WQTV - **WeiqiTV**: WQTV
- **Wimp**
- **Wistia** - **Wistia**
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop** - **WorldStarHipHop**
@ -1099,7 +1091,7 @@
- **WWE** - **WWE**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **XHamsterUser** - **XHamsterUser**

View File

@ -123,12 +123,6 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
def test_yahoo_https(self):
# https://github.com/ytdl-org/youtube-dl/issues/2701
self.assertMatch(
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
def test_no_duplicated_ie_names(self): def test_no_duplicated_ie_names(self):
name_accu = collections.defaultdict(list) name_accu = collections.defaultdict(list)
for ie in self.ies: for ie in self.ies:

View File

@ -74,6 +74,7 @@ from youtube_dl.utils import (
str_to_int, str_to_int,
strip_jsonp, strip_jsonp,
strip_or_none, strip_or_none,
subtitles_filename,
timeconvert, timeconvert,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -261,6 +262,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
def test_subtitles_filename(self):
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
def test_remove_start(self): def test_remove_start(self):
self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('A - B', 'A - '), 'B')

View File

@ -1814,7 +1814,7 @@ class YoutubeDL(object):
ie = self.get_info_extractor(info_dict['extractor_key']) ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext'] sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format) sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else: else:

View File

@ -1,202 +1,118 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time
import hmac
import hashlib
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
xpath_text,
) )
class AtresPlayerIE(InfoExtractor): class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer' _NETRC_MACHINE = 'atresplayer'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
'md5': 'efd56753cda1bb64df52a3074f62e38a',
'info_dict': { 'info_dict': {
'id': 'capitulo-10-especial-solidario-nochebuena', 'id': '5d4aa2c57ed1a88fc715a615',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Especial Solidario de Nochebuena', 'title': 'Capítulo 7: Asuntos pendientes',
'description': 'md5:e2d52ff12214fa937107d21064075bf1', 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
'duration': 5527.6, 'duration': 3413,
'thumbnail': r're:^https?://.*\.jpg$', },
'params': {
'format': 'bestvideo',
}, },
'skip': 'This video is only available for registered users' 'skip': 'This video is only available for registered users'
}, },
{ {
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'md5': '6e52cbb513c405e403dbacb7aacf8747', 'only_matching': True,
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
}, },
{ {
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True, 'only_matching': True,
}, },
] ]
_API_BASE = 'https://api.atresplayer.com/'
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
_TIMESTAMP_SHIFT = 30000
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
_ERRORS = {
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
'DELETED': 'This video has expired and is no longer available for online streaming.',
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
# 'PREMIUM': 'PREMIUM',
}
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _handle_error(self, e, code):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
error = self._parse_json(e.cause.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
def _login(self): def _login(self):
username, password = self._get_login_info() username, password = self._get_login_info()
if username is None: if username is None:
return return
login_form = { self._request_webpage(
'j_username': username, self._API_BASE + 'login', None, 'Downloading login page')
'j_password': password,
}
request = sanitized_Request( try:
self._LOGIN_URL, urlencode_postdata(login_form)) target_url = self._download_json(
request.add_header('Content-Type', 'application/x-www-form-urlencoded') 'https://account.atresmedia.com/api/login', None,
response = self._download_webpage( 'Logging in', headers={
request, None, 'Logging in') 'Content-Type': 'application/x-www-form-urlencoded'
}, data=urlencode_postdata({
'username': username,
'password': password,
}))['targetUrl']
except ExtractorError as e:
self._handle_error(e, 400)
error = self._html_search_regex( self._request_webpage(target_url, None, 'Following Target URL')
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
response, 'error', default=None)
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id) try:
episode = self._download_json(
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e:
self._handle_error(e, 403)
episode_id = self._search_regex( title = episode['titulo']
r'episode="([^"]+)"', webpage, 'episode id')
request = sanitized_Request(
self._PLAYER_URL_TEMPLATE % episode_id,
headers={'User-Agent': self._USER_AGENT})
player = self._download_json(request, episode_id, 'Downloading player JSON')
episode_type = player.get('typeOfEpisode')
error_message = self._ERRORS.get(episode_type)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
formats = [] formats = []
video_url = player.get('urlVideo') for source in episode.get('sources', []):
if video_url: src = source.get('src')
format_info = { if not src:
'url': video_url,
'format_id': 'http',
}
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
if mobj:
format_info.update({
'width': int_or_none(mobj.group('width')),
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
formats.append(format_info)
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest()
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
headers={'User-Agent': self._USER_AGENT})
fmt_json = self._download_json(
request, video_id, 'Downloading windows video JSON')
result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'):
continue continue
if 'geodeswowsmpra3player' in video_url: src_type = source.get('type')
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] if src_type == 'application/vnd.apple.mpegurl':
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) formats.extend(self._extract_m3u8_formats(
# this videos are protected by DRM, the f4m downloader doesn't support them src, video_id, 'mp4', 'm3u8_native',
continue m3u8_id='hls', fatal=False))
video_url_hd = video_url.replace('free_es', 'es') elif src_type == 'application/dash+xml':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_mpd_formats(
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', src, video_id, mpd_id='dash', fatal=False))
fatal=False))
formats.extend(self._extract_mpd_formats(
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
path_data = player.get('pathData') heartbeat = episode.get('heartbeat') or {}
omniture = episode.get('omniture') or {}
episode = self._download_xml( get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
self._EPISODE_URL_TEMPLATE % path_data, video_id,
'Downloading episode XML')
duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))
art = episode.find('./media/asset/info/art')
title = xpath_text(art, './name', 'title')
description = xpath_text(art, './description', 'description')
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
subtitles = {}
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle_url:
subtitles['es'] = [{
'ext': 'srt',
'url': subtitle_url,
}]
return { return {
'display_id': display_id,
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': episode.get('descripcion'),
'thumbnail': thumbnail, 'thumbnail': episode.get('imgPoster'),
'duration': duration, 'duration': int_or_none(episode.get('duration')),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
} }

View File

@ -2,22 +2,25 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import float_or_none from ..utils import (
clean_html,
float_or_none,
)
class AudioBoomIE(InfoExtractor): class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
'md5': '63a8d73a055c6ed0f1e51921a10a5a76', 'md5': '7b00192e593ff227e6a315486979a42d',
'info_dict': { 'info_dict': {
'id': '4279833', 'id': '7398103',
'ext': 'mp3', 'ext': 'mp3',
'title': '3/09/2016 Czaban Hour 3', 'title': 'Asim Chaudhry',
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
'duration': 2245.72, 'duration': 4000.99,
'uploader': 'SB Nation A.M.', 'uploader': 'Sue Perkins: An hour or so with...',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
} }
}, { }, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor):
clip = None clip = None
clip_store = self._parse_json( clip_store = self._parse_json(
self._search_regex( self._html_search_regex(
r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id, r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
webpage, 'clip store', default='{}', group='json'), webpage, 'clip store', default='{}', group='json'),
video_id, fatal=False) video_id, fatal=False)
if clip_store: if clip_store:
@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor):
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
'audio', webpage, 'audio url') 'audio', webpage, 'audio url')
title = from_clip('title') or self._og_search_title(webpage) title = from_clip('title') or self._html_search_meta(
description = from_clip('description') or self._og_search_description(webpage) ['og:title', 'og:audio:title', 'audio_title'], webpage)
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
duration = float_or_none(from_clip('duration') or self._html_search_meta( duration = float_or_none(from_clip('duration') or self._html_search_meta(
'weibo:audio:duration', webpage)) 'weibo:audio:duration', webpage))
uploader = from_clip('author') or self._og_search_property( uploader = from_clip('author') or self._html_search_meta(
'audio:artist', webpage, 'uploader', fatal=False) ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
uploader_url = from_clip('author_url') or self._html_search_meta( uploader_url = from_clip('author_url') or self._html_search_meta(
'audioboo:channel', webpage, 'uploader url') 'audioboo:channel', webpage, 'uploader url')

View File

@ -11,8 +11,8 @@ from ..utils import ExtractorError
class BokeCCBaseIE(InfoExtractor): class BokeCCBaseIE(InfoExtractor):
def _extract_bokecc_formats(self, webpage, video_id, format_id=None): def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
player_params_str = self._html_search_regex( player_params_str = self._html_search_regex(
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)', r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
webpage, 'player params') webpage, 'player params', group='query')
player_params = compat_parse_qs(player_params_str) player_params = compat_parse_qs(player_params_str)
@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE):
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
'info_dict': { 'info_dict': {
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30', 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
'ext': 'flv', 'ext': 'flv',
'title': 'BokeCC Video', 'title': 'BokeCC Video',
}, },

View File

@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor):
is_live = item.get('type') == 'LIVE' is_live = item.get('type') == 'LIVE'
formats = [] formats = []
for format_id, stream_url in item.get('streamUrls', {}).items(): for format_id, stream_url in item.get('streamUrls', {}).items():
if 'drmOnly=true' in stream_url:
continue
if 'playerType=flash' in stream_url: if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats( stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native', stream_url, playlist_id, 'mp4', 'm3u8_native',

View File

@ -0,0 +1,118 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
class CONtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
'info_dict': {
'id': 'CEG10022949',
'ext': 'mp4',
'title': 'Days Of Thrills & Laughter',
'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
'upload_date': '20180703',
'timestamp': 1530634789.61,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
'info_dict': {
'id': 'CLIP-show_fotld_bts',
'title': 'Fight of the Living Dead: Behind the Scenes Bites',
},
'playlist_mincount': 7,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
details = self._download_json(
'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
video_id, query={'device': 'web'})
if details.get('type') == 'episodic':
seasons = self._download_json(
'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
video_id)
entries = []
for season in seasons:
for episode in season.get('episodes', []):
episode_id = episode.get('id')
if not episode_id:
continue
entries.append(self.url_result(
'https://www.contv.com/details-movie/' + episode_id,
CONtvIE.ie_key(), episode_id))
return self.playlist_result(entries, video_id, details.get('title'))
m_details = details['details']
title = details['title']
formats = []
media_hls_url = m_details.get('media_hls_url')
if media_hls_url:
formats.extend(self._extract_m3u8_formats(
media_hls_url, video_id, 'mp4',
m3u8_id='hls', fatal=False))
media_mp4_url = m_details.get('media_mp4_url')
if media_mp4_url:
formats.append({
'format_id': 'http',
'url': media_mp4_url,
})
self._sort_formats(formats)
subtitles = {}
captions = m_details.get('captions') or {}
for caption_url in captions.values():
subtitles.setdefault('en', []).append({
'url': caption_url
})
thumbnails = []
for image in m_details.get('images', []):
image_url = image.get('url')
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
description = None
for p in ('large_', 'medium_', 'small_', ''):
d = m_details.get(p + 'description')
if d:
description = d
break
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': description,
'timestamp': float_or_none(details.get('metax_added_on'), 1000),
'subtitles': subtitles,
'duration': float_or_none(m_details.get('duration'), 1000),
'view_count': int_or_none(details.get('num_watched')),
'like_count': int_or_none(details.get('num_fav')),
'categories': details.get('category'),
'tags': details.get('tags'),
'season_number': int_or_none(details.get('season')),
'episode_number': int_or_none(details.get('episode')),
'release_year': int_or_none(details.get('pub_year')),
}

View File

@ -2,25 +2,21 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
compat_urlparse, compat_urlparse,
) )
from ..utils import (
int_or_none,
str_to_int,
xpath_text,
unescapeHTML,
)
class DaumIE(InfoExtractor): class DaumBaseIE(InfoExtractor):
_KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
class DaumIE(DaumBaseIE):
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)' _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
IE_NAME = 'daum.net' IE_NAME = 'daum.net'
@ -36,6 +32,9 @@ class DaumIE(InfoExtractor):
'duration': 2117, 'duration': 2117,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'uploader_id': 186139,
'uploader': '콘간지',
'timestamp': 1387310323,
}, },
}, { }, {
'url': 'http://m.tvpot.daum.net/v/65139429', 'url': 'http://m.tvpot.daum.net/v/65139429',
@ -44,11 +43,14 @@ class DaumIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118', 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
'description': 'md5:79794514261164ff27e36a21ad229fc5', 'description': 'md5:79794514261164ff27e36a21ad229fc5',
'upload_date': '20150604', 'upload_date': '20150118',
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 154, 'duration': 154,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'uploader': 'MBC 예능',
'uploader_id': 132251,
'timestamp': 1421604228,
}, },
}, { }, {
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
@ -59,12 +61,15 @@ class DaumIE(InfoExtractor):
'id': 'vwIpVpCQsT8$', 'id': 'vwIpVpCQsT8$',
'ext': 'flv', 'ext': 'flv',
'title': '01-Korean War ( Trouble on the horizon )', 'title': '01-Korean War ( Trouble on the horizon )',
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름', 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
'upload_date': '20080223', 'upload_date': '20080223',
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 249, 'duration': 249,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'uploader': '까칠한 墮落始祖 황비홍님의',
'uploader_id': 560824,
'timestamp': 1203770745,
}, },
}, { }, {
# Requires dte_type=WEB (#9972) # Requires dte_type=WEB (#9972)
@ -73,60 +78,24 @@ class DaumIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 's3794Uf1NZeZ1qMpGpeqeRU', 'id': 's3794Uf1NZeZ1qMpGpeqeRU',
'ext': 'mp4', 'ext': 'mp4',
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611', 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회', 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'upload_date': '20160611', 'upload_date': '20170129',
'uploader': '쇼! 음악중심',
'uploader_id': 2653210,
'timestamp': 1485684628,
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = compat_urllib_parse_unquote(self._match_id(url)) video_id = compat_urllib_parse_unquote(self._match_id(url))
movie_data = self._download_json( if not video_id.isdigit():
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json', video_id += '@my'
video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'}) return self.url_result(
self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
'Downloading video info', query={'vid': video_id})
formats = []
for format_el in movie_data['output_list']['output_list']:
profile = format_el['profile']
format_query = compat_urllib_parse_urlencode({
'vid': video_id,
'profile': profile,
})
url_doc = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note='Downloading video data for %s format' % profile)
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,
'format_id': profile,
'width': int_or_none(format_el.get('width')),
'height': int_or_none(format_el.get('height')),
'filesize': int_or_none(format_el.get('filesize')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': info.find('TITLE').text,
'formats': formats,
'thumbnail': xpath_text(info, 'THUMB_URL'),
'description': xpath_text(info, 'CONTENTS'),
'duration': int_or_none(xpath_text(info, 'DURATION')),
'upload_date': info.find('REGDTTM').text[:8],
'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
}
class DaumClipIE(InfoExtractor): class DaumClipIE(DaumBaseIE):
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)' _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
IE_NAME = 'daum.net:clip' IE_NAME = 'daum.net:clip'
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s' _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
@ -142,6 +111,9 @@ class DaumClipIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 3868, 'duration': 3868,
'view_count': int, 'view_count': int,
'uploader': 'GOMeXP',
'uploader_id': 6667,
'timestamp': 1377911092,
}, },
}, { }, {
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
@ -154,22 +126,8 @@ class DaumClipIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
clip_info = self._download_json( return self.url_result(
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
video_id, 'Downloading clip info')['clip_bean']
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
'title': unescapeHTML(clip_info['title']),
'thumbnail': clip_info.get('thumb_url'),
'description': clip_info.get('contents'),
'duration': int_or_none(clip_info.get('duration')),
'upload_date': clip_info.get('up_date')[:8],
'view_count': int_or_none(clip_info.get('play_count')),
'ie_key': 'Daum',
}
class DaumListIE(InfoExtractor): class DaumListIE(InfoExtractor):

View File

@ -3,63 +3,38 @@ from __future__ import unicode_literals
import re import re
from .brightcove import BrightcoveLegacyIE
from .dplay import DPlayIE from .dplay import DPlayIE
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
from ..utils import smuggle_url
class DiscoveryNetworksDeIE(DPlayIE): class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
(?:
.*\#(?P<id>\d+)|
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
)'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
'info_dict': { 'info_dict': {
'id': '3235167922001', 'id': '78867',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Breaking Amish: Die Welt da draußen', 'title': 'Die Welt da draußen',
'description': ( 'description': 'md5:61033c12b73286e409d99a41742ef608',
'Vier Amische und eine Mennonitin wagen in New York' 'timestamp': 1554069600,
' den Sprung in ein komplett anderes Leben. Begleitet sie auf' 'upload_date': '20190331',
' ihrem spannenden Weg.'), },
'timestamp': 1396598084, 'params': {
'upload_date': '20140404', 'format': 'bestvideo',
'uploader_id': '1659832546', 'skip_download': True,
}, },
}, { }, {
'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/', 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.discovery.de/#5332316765001', 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True, 'only_matching': True,
}] }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
alternate_id = mobj.group('alternate_id') country = 'GB' if domain == 'dplay.co.uk' else 'DE'
if alternate_id: realm = 'questuk' if country == 'GB' else domain.replace('.', '')
self._initialize_geo_bypass({ return self._get_disco_api_info(
'countries': ['DE'], url, '%s/%s' % (programme, alternate_id),
}) 'sonic-eu1-prod.disco-api.com', realm, country)
return self._get_disco_api_info(
url, '%s/%s' % (mobj.group('programme'), alternate_id),
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
brightcove_id = mobj.group('id')
if not brightcove_id:
title = mobj.group('title')
webpage = self._download_webpage(url, title)
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
brightcove_legacy_url).query)['@videoPlayer'][0]
return self.url_result(smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
'BrightcoveNew', brightcove_id)

View File

@ -1,74 +1,68 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_HTTPError
compat_HTTPError,
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
remove_end,
try_get,
unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query,
urljoin,
USER_AGENTS,
) )
class DPlayIE(InfoExtractor): class DPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' _VALID_URL = r'''(?x)https?://
(?P<domain>
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
(?P<subdomain_country>es|it)\.dplay\.com
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
_TESTS = [{ _TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL # non geo restricted, via secure api, unsigned download hls URL
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
'info_dict': { 'info_dict': {
'id': '3172', 'id': '13628',
'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Svensken lär sig njuta av livet', 'title': 'Svensken lär sig njuta av livet',
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
'duration': 2650, 'duration': 2649.856,
'timestamp': 1365454320, 'timestamp': 1365453720,
'upload_date': '20130408', 'upload_date': '20130408',
'creator': 'Kanal 5 (Home)', 'creator': 'Kanal 5',
'series': 'Nugammalt - 77 händelser som format Sverige', 'series': 'Nugammalt - 77 händelser som format Sverige',
'season_number': 1, 'season_number': 1,
'episode_number': 1, 'episode_number': 1,
'age_limit': 0, },
'params': {
'format': 'bestvideo',
'skip_download': True,
}, },
}, { }, {
# geo restricted, via secure api, unsigned download hls URL # geo restricted, via secure api, unsigned download hls URL
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
'info_dict': { 'info_dict': {
'id': '70816', 'id': '104465',
'display_id': 'mig-og-min-mor/season-6-episode-12', 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Episode 12', 'title': 'Ted Bundy: Mind Of A Monster',
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
'duration': 2563, 'duration': 5290.027,
'timestamp': 1429696800, 'timestamp': 1570694400,
'upload_date': '20150422', 'upload_date': '20191010',
'creator': 'Kanal 4 (Home)', 'creator': 'ID - Investigation Discovery',
'series': 'Mig og min mor', 'series': 'Ted Bundy: Mind Of A Monster',
'season_number': 6, 'season_number': 1,
'episode_number': 12, 'episode_number': 1,
'age_limit': 0, },
'params': {
'format': 'bestvideo',
'skip_download': True,
}, },
}, {
# geo restricted, via direct unsigned hls URL
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
'only_matching': True,
}, { }, {
# disco-api # disco-api
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
@ -89,19 +83,59 @@ class DPlayIE(InfoExtractor):
'format': 'bestvideo', 'format': 'bestvideo',
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Available for Premium users',
}, { }, {
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', 'md5': '2b808ffb00fc47b884a172ca5d13053c',
'info_dict': {
'id': '6918',
'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
'ext': 'mp4',
'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
'thumbnail': r're:^https?://.*\.jpe?g',
'upload_date': '20160524',
'timestamp': 1464076800,
'series': 'Biografie imbarazzanti',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
},
}, {
'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
'info_dict': {
'id': '21652',
'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
'ext': 'mp4',
'title': 'Episodio 1',
'description': 'md5:b9dcff2071086e003737485210675f69',
'thumbnail': r're:^https?://.*\.png',
'upload_date': '20180709',
'timestamp': 1531173540,
'series': 'La fiebre del oro',
'season_number': 8,
'episode': 'Episode 1',
'episode_number': 1,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', 'url': 'https://www.dplay.jp/video/gold-rush/24086',
'only_matching': True, 'only_matching': True,
}] }]
def _get_disco_api_info(self, url, display_id, disco_host, realm): def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
disco_base = 'https://' + disco_host geo_countries = [country.upper()]
self._initialize_geo_bypass({
'countries': geo_countries,
})
disco_base = 'https://%s/' % disco_host
token = self._download_json( token = self._download_json(
'%s/token' % disco_base, display_id, 'Downloading token', disco_base + 'token', display_id, 'Downloading token',
query={ query={
'realm': realm, 'realm': realm,
})['data']['attributes']['token'] })['data']['attributes']['token']
@ -110,17 +144,30 @@ class DPlayIE(InfoExtractor):
'Authorization': 'Bearer ' + token, 'Authorization': 'Bearer ' + token,
} }
video = self._download_json( video = self._download_json(
'%s/content/videos/%s' % (disco_base, display_id), display_id, disco_base + 'content/videos/' + display_id, display_id,
headers=headers, query={ headers=headers, query={
'include': 'show' 'include': 'images,primaryChannel,show,tags'
}) })
video_id = video['data']['id'] video_id = video['data']['id']
info = video['data']['attributes'] info = video['data']['attributes']
title = info['name'] title = info['name'].strip()
formats = [] formats = []
for format_id, format_dict in self._download_json( try:
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), streaming = self._download_json(
display_id, headers=headers)['data']['attributes']['streaming'].items(): disco_base + 'playback/videoPlaybackInfo/' + video_id,
display_id, headers=headers)['data']['attributes']['streaming']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
error = info['errors'][0]
error_code = error.get('code')
if error_code == 'access.denied.geoblocked':
self.raise_geo_restricted(countries=geo_countries)
elif error_code == 'access.denied.missingpackage':
self.raise_login_required()
raise ExtractorError(info['errors'][0]['detail'], expected=True)
raise
for format_id, format_dict in streaming.items():
if not isinstance(format_dict, dict): if not isinstance(format_dict, dict):
continue continue
format_url = format_dict.get('url') format_url = format_dict.get('url')
@ -142,235 +189,55 @@ class DPlayIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
series = None creator = series = None
try: tags = []
included = video.get('included') thumbnails = []
if isinstance(included, list): included = video.get('included') or []
show = next(e for e in included if e.get('type') == 'show') if isinstance(included, list):
series = try_get( for e in included:
show, lambda x: x['attributes']['name'], compat_str) attributes = e.get('attributes')
except StopIteration: if not attributes:
pass continue
e_type = e.get('type')
if e_type == 'channel':
creator = attributes.get('name')
elif e_type == 'image':
src = attributes.get('src')
if src:
thumbnails.append({
'url': src,
'width': int_or_none(attributes.get('width')),
'height': int_or_none(attributes.get('height')),
})
if e_type == 'show':
series = attributes.get('name')
elif e_type == 'tag':
name = attributes.get('name')
if name:
tags.append(name)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': info.get('description'), 'description': info.get('description'),
'duration': float_or_none( 'duration': float_or_none(info.get('videoDuration'), 1000),
info.get('videoDuration'), scale=1000),
'timestamp': unified_timestamp(info.get('publishStart')), 'timestamp': unified_timestamp(info.get('publishStart')),
'series': series, 'series': series,
'season_number': int_or_none(info.get('seasonNumber')), 'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')), 'episode_number': int_or_none(info.get('episodeNumber')),
'age_limit': int_or_none(info.get('minimum_age')), 'age_limit': int_or_none(info.get('minimum_age')),
'creator': creator,
'tags': tags,
'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id') display_id = mobj.group('id')
domain = mobj.group('domain') domain = mobj.group('domain').lstrip('www.')
country = mobj.group('country') or mobj.group('subdomain_country')
self._initialize_geo_bypass({ host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
'countries': [mobj.group('country').upper()], return self._get_disco_api_info(
}) url, display_id, host, 'dplay' + country, country)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
if not video_id:
host = mobj.group('host')
return self._get_disco_api_info(
url, display_id, 'disco-api.' + host, host.replace('.', ''))
info = self._download_json(
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
video_id)['data'][0]
title = info['title']
PROTOCOLS = ('hls', 'hds')
formats = []
def extract_formats(protocol, manifest_url):
if protocol == 'hls':
m3u8_formats = self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
# ourselves. Also fragments' URLs are only served signed for
# Safari user agent.
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
for m3u8_format in m3u8_formats:
m3u8_format.update({
'url': update_url_query(m3u8_format['url'], query),
'http_headers': {
'User-Agent': USER_AGENTS['Safari'],
},
})
formats.extend(m3u8_formats)
elif protocol == 'hds':
formats.extend(self._extract_f4m_formats(
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
video_id, f4m_id=protocol, fatal=False))
domain_tld = domain.split('.')[-1]
if domain_tld in ('se', 'dk', 'no'):
for protocol in PROTOCOLS:
# Providing dsc-geo allows to bypass geo restriction in some cases
self._set_cookie(
'secure.dplay.%s' % domain_tld, 'dsc-geo',
json.dumps({
'countryCode': domain_tld.upper(),
'expiry': (time.time() + 20 * 60) * 1000,
}))
stream = self._download_json(
'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
% (domain_tld, video_id, protocol), video_id,
'Downloading %s stream JSON' % protocol, fatal=False)
if stream and stream.get(protocol):
extract_formats(protocol, stream[protocol])
# The last resort is to try direct unsigned hls/hds URLs from info dictionary.
# Sometimes this does work even when secure API with dsc-geo has failed (e.g.
# http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
if not formats:
for protocol in PROTOCOLS:
if info.get(protocol):
extract_formats(protocol, info[protocol])
self._sort_formats(formats)
subtitles = {}
for lang in ('se', 'sv', 'da', 'nl', 'no'):
for format_id in ('web_vtt', 'vtt', 'srt'):
subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
if subtitle_url:
subtitles.setdefault(lang, []).append({'url': subtitle_url})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': info.get('video_metadata_longDescription'),
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
'timestamp': int_or_none(info.get('video_publish_date')),
'creator': info.get('video_metadata_homeChannel'),
'series': info.get('video_metadata_show'),
'season_number': int_or_none(info.get('season')),
'episode_number': int_or_none(info.get('episode')),
'age_limit': int_or_none(info.get('minimum_age')),
'formats': formats,
'subtitles': subtitles,
}
class DPlayItIE(InfoExtractor):
_VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['IT']
_TEST = {
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
'md5': '2b808ffb00fc47b884a172ca5d13053c',
'info_dict': {
'id': '6918',
'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
'ext': 'mp4',
'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
'thumbnail': r're:^https?://.*\.jpe?g',
'upload_date': '20160524',
'series': 'Biografie imbarazzanti',
'season_number': 1,
'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
'episode_number': 1,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = remove_end(self._og_search_title(webpage), ' | Dplay')
video_id = None
info = self._search_regex(
r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
webpage, 'playback JSON', default=None)
if info:
for _ in range(2):
info = self._parse_json(info, display_id, fatal=False)
if not info:
break
else:
video_id = try_get(info, lambda x: x['data']['id'])
if not info:
info_url = self._search_regex(
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
webpage, 'info url', group='url')
info_url = urljoin(url, info_url)
video_id = info_url.rpartition('/')[-1]
try:
info = self._download_json(
info_url, display_id, headers={
'Authorization': 'Bearer %s' % self._get_cookies(url).get(
'dplayit_token').value,
'Referer': url,
})
if isinstance(info, compat_str):
info = self._parse_json(info, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
error = info['errors'][0]
if error.get('code') == 'access.denied.geoblocked':
self.raise_geo_restricted(
msg=error.get('detail'), countries=self._GEO_COUNTRIES)
raise ExtractorError(info['errors'][0]['detail'], expected=True)
raise
hls_url = info['data']['attributes']['streaming']['hls']['url']
formats = self._extract_m3u8_formats(
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
series = self._html_search_regex(
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
webpage, 'series', fatal=False)
episode = self._search_regex(
r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
webpage, 'episode', fatal=False)
mobj = re.search(
r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
webpage)
if mobj:
season_number = int(mobj.group('season_number'))
episode_number = int(mobj.group('episode_number'))
upload_date = unified_strdate(mobj.group('upload_date'))
else:
season_number = episode_number = upload_date = None
return {
'id': compat_str(video_id or display_id),
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'upload_date': upload_date,
'formats': formats,
}

View File

@ -1,20 +1,17 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
int_or_none,
qualities, qualities,
sanitized_Request,
) )
class DumpertIE(InfoExtractor): class DumpertIE(InfoExtractor):
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)' _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
'info_dict': { 'info_dict': {
'id': '6646981/951bc60f', 'id': '6646981/951bc60f',
@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
} }
}, { }, {
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/', 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
'only_matching': True,
}, {
'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
'only_matching': True,
}, {
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url).replace('_', '/')
video_id = mobj.group('id') item = self._download_json(
protocol = mobj.group('protocol') 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
video_id)['items'][0]
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id) title = item['title']
req = sanitized_Request(url) media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)
files_base64 = self._search_regex(
r'data-files="([^"]+)"', webpage, 'data files')
files = self._parse_json(
compat_b64decode(files_base64).decode('utf-8'),
video_id)
quality = qualities(['flv', 'mobile', 'tablet', '720p']) quality = qualities(['flv', 'mobile', 'tablet', '720p'])
formats = []
formats = [{ for variant in media.get('variants', []):
'url': video_url, uri = variant.get('uri')
'format_id': format_id, if not uri:
'quality': quality(format_id), continue
} for format_id, video_url in files.items() if format_id != 'still'] version = variant.get('version')
formats.append({
'url': uri,
'format_id': version,
'quality': quality(version),
})
self._sort_formats(formats) self._sort_formats(formats)
title = self._html_search_meta( thumbnails = []
'title', webpage) or self._og_search_title(webpage) stills = item.get('stills') or {}
description = self._html_search_meta( for t in ('thumb', 'still'):
'description', webpage) or self._og_search_description(webpage) for s in ('', '-medium', '-large'):
thumbnail = files.get('still') or self._og_search_thumbnail(webpage) still_id = t + s
still_url = stills.get(still_id)
if not still_url:
continue
thumbnails.append({
'id': still_id,
'url': still_url,
})
stats = item.get('stats') or {}
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': item.get('description'),
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'formats': formats 'formats': formats,
'duration': int_or_none(media.get('duration')),
'like_count': int_or_none(stats.get('kudos_total')),
'view_count': int_or_none(stats.get('views_total')),
} }

View File

@ -231,6 +231,7 @@ from .commonprotocols import (
RtmpIE, RtmpIE,
) )
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .contv import CONtvIE
from .corus import CorusIE from .corus import CorusIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .crackle import CrackleIE from .crackle import CrackleIE
@ -276,10 +277,7 @@ from .douyutv import (
DouyuShowIE, DouyuShowIE,
DouyuTVIE, DouyuTVIE,
) )
from .dplay import ( from .dplay import DPlayIE
DPlayIE,
DPlayItIE,
)
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE from .drtuber import DrTuberIE
@ -373,7 +371,10 @@ from .fourtube import (
FuxIE, FuxIE,
) )
from .fox import FOXIE from .fox import FOXIE
from .fox9 import FOX9IE from .fox9 import (
FOX9IE,
FOX9NewsIE,
)
from .foxgay import FoxgayIE from .foxgay import FoxgayIE
from .foxnews import ( from .foxnews import (
FoxNewsIE, FoxNewsIE,
@ -425,7 +426,6 @@ from .globo import (
GloboArticleIE, GloboArticleIE,
) )
from .go import GoIE from .go import GoIE
from .go90 import Go90IE
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
@ -434,7 +434,6 @@ from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE
from .hbo import HBOIE from .hbo import HBOIE
from .hearthisat import HearThisAtIE from .hearthisat import HearThisAtIE
from .heise import HeiseIE from .heise import HeiseIE
@ -552,7 +551,6 @@ from .lcp import (
LcpPlayIE, LcpPlayIE,
LcpIE, LcpIE,
) )
from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE from .lecture2go import Lecture2GoIE
from .lecturio import ( from .lecturio import (
LecturioIE, LecturioIE,
@ -604,13 +602,11 @@ from .lynda import (
LyndaCourseIE LyndaCourseIE
) )
from .m6 import M6IE from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import ( from .mailru import (
MailRuIE, MailRuIE,
MailRuMusicIE, MailRuMusicIE,
MailRuMusicSearchIE, MailRuMusicSearchIE,
) )
from .makertv import MakerTVIE
from .malltv import MallTVIE from .malltv import MallTVIE
from .mangomolo import ( from .mangomolo import (
MangomoloVideoIE, MangomoloVideoIE,
@ -648,7 +644,7 @@ from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE from .minoto import MinotoIE
from .miomio import MioMioIE from .miomio import MioMioIE
from .mit import TechTVMITIE, MITIE, OCWMITIE from .mit import TechTVMITIE, OCWMITIE
from .mitele import MiTeleIE from .mitele import MiTeleIE
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,
@ -676,7 +672,7 @@ from .mtv import (
MTVVideoIE, MTVVideoIE,
MTVServicesEmbeddedIE, MTVServicesEmbeddedIE,
MTVDEIE, MTVDEIE,
MTV81IE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE from .musicplayon import MusicPlayOnIE
@ -999,7 +995,6 @@ from .scrippsnetworks import ScrippsNetworksWatchIE
from .seeker import SeekerIE from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .servus import ServusIE from .servus import ServusIE
from .sevenplus import SevenPlusIE from .sevenplus import SevenPlusIE
from .sexu import SexuIE from .sexu import SexuIE
@ -1040,6 +1035,7 @@ from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .sonyliv import SonyLIVIE from .sonyliv import SonyLIVIE
from .soundcloud import ( from .soundcloud import (
SoundcloudEmbedIE,
SoundcloudIE, SoundcloudIE,
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
@ -1139,6 +1135,7 @@ from .telequebec import (
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .telewebion import TelewebionIE from .telewebion import TelewebionIE
from .tennistv import TennisTVIE from .tennistv import TennisTVIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .tfo import TFOIE from .tfo import TFOIE
@ -1191,11 +1188,11 @@ from .tunein import (
) )
from .tunepk import TunePkIE from .tunepk import TunePkIE
from .turbo import TurboIE from .turbo import TurboIE
from .tutv import TutvIE
from .tv2 import ( from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
) )
from .tv2dk import TV2DKIE
from .tv2hu import TV2HuIE from .tv2hu import TV2HuIE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE
@ -1327,7 +1324,6 @@ from .viewlift import (
ViewLiftIE, ViewLiftIE,
ViewLiftEmbedIE, ViewLiftEmbedIE,
) )
from .viewster import ViewsterIE
from .viidea import ViideaIE from .viidea import ViideaIE
from .vimeo import ( from .vimeo import (
VimeoIE, VimeoIE,

View File

@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor):
if not video_data: if not video_data:
raise ExtractorError('Cannot parse data') raise ExtractorError('Cannot parse data')
subtitles = {}
formats = [] formats = []
for f in video_data: for f in video_data:
format_id = f['stream_type'] format_id = f['stream_type']
@ -402,9 +403,17 @@ class FacebookIE(InfoExtractor):
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
subtitles_src = f[0].get('subtitles_src')
if subtitles_src:
subtitles.setdefault('en', []).append({'url': subtitles_src})
if not formats: if not formats:
raise ExtractorError('Cannot find video formats') raise ExtractorError('Cannot find video formats')
# Downloads with browser's User-Agent are rate limited. Working around
# with non-browser User-Agent.
for f in formats:
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
self._sort_formats(formats) self._sort_formats(formats)
video_title = self._html_search_regex( video_title = self._html_search_regex(
@ -442,6 +451,7 @@ class FacebookIE(InfoExtractor):
'timestamp': timestamp, 'timestamp': timestamp,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'view_count': view_count, 'view_count': view_count,
'subtitles': subtitles,
} }
return webpage, info_dict return webpage, info_dict

View File

@ -1,13 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .anvato import AnvatoIE from .common import InfoExtractor
class FOX9IE(AnvatoIE): class FOX9IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story' _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fox9.com/news/215123287-story', def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
'Anvato', video_id)
class FOX9NewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
'md5': 'd6e1b2572c3bab8a849c9103615dd243', 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': { 'info_dict': {
'id': '314473', 'id': '314473',
@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE):
'categories': ['News', 'Sports'], 'categories': ['News', 'Sports'],
'tags': ['news', 'video'], 'tags': ['news', 'video'],
}, },
}, { }
'url': 'http://www.fox9.com/news/investigators/214070684-story',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, video_id) anvato_id = self._search_regex(
r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
video_id = self._parse_json( return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
self._search_regex(
r"this\.videosJson\s*=\s*'(\[.+?\])';",
webpage, 'anvato playlist'),
video_id)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
video_id)

View File

@ -80,7 +80,7 @@ from .theplatform import ThePlatformIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .soundcloud import SoundcloudIE from .soundcloud import SoundcloudEmbedIE
from .tunein import TuneInBaseIE from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .dbtv import DBTVIE from .dbtv import DBTVIE
@ -118,6 +118,7 @@ from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE from .viqeo import ViqeoIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2627,9 +2628,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'VK') return self.url_result(mobj.group('url'), 'VK')
# Look for embedded Odnoklassniki player # Look for embedded Odnoklassniki player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage) odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
if mobj is not None: if odnoklassniki_url:
return self.url_result(mobj.group('url'), 'Odnoklassniki') return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for embedded ivi player # Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
@ -2748,9 +2749,9 @@ class GenericIE(InfoExtractor):
return self.url_result(myvi_url) return self.url_result(myvi_url)
# Look for embedded soundcloud player # Look for embedded soundcloud player
soundcloud_urls = SoundcloudIE._extract_urls(webpage) soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls: if soundcloud_urls:
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for tunein player # Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage) tunein_urls = TuneInBaseIE._extract_urls(webpage)
@ -2962,10 +2963,14 @@ class GenericIE(InfoExtractor):
# Look for Mangomolo embeds # Look for Mangomolo embeds
mobj = re.search( mobj = re.search(
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
(?:
admin\.mangomolo\.com/analytics/index\.php/customers/embed|
player\.mangomolo\.com/v1
)/
(?: (?:
video\?.*?\bid=(?P<video_id>\d+)| video\?.*?\bid=(?P<video_id>\d+)|
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
).+?)\1''', webpage) ).+?)\1''', webpage)
if mobj is not None: if mobj is not None:
info = { info = {

View File

@ -102,10 +102,18 @@ class GloboIE(InfoExtractor):
title = video['title'] title = video['title']
formats = [] formats = []
subtitles = {}
for resource in video['resources']: for resource in video['resources']:
resource_id = resource.get('_id') resource_id = resource.get('_id')
resource_url = resource.get('url') resource_url = resource.get('url')
if not resource_id or not resource_url: resource_type = resource.get('type')
if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
continue
if resource_type == 'subtitle':
subtitles.setdefault(resource.get('language') or 'por', []).append({
'url': resource_url,
})
continue continue
security = self._download_json( security = self._download_json(
@ -124,18 +132,24 @@ class GloboIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, message), expected=True) '%s returned error: %s' % (self.IE_NAME, message), expected=True)
continue continue
assert security_hash[:2] in ('04', '14') hash_code = security_hash[:2]
received_time = security_hash[3:13]
received_md5 = security_hash[24:]
sign_time = compat_str(int(received_time) + 86400)
padding = '%010d' % random.randint(1, 10000000000) padding = '%010d' % random.randint(1, 10000000000)
if hash_code in ('04', '14'):
received_time = security_hash[3:13]
received_md5 = security_hash[24:]
hash_prefix = security_hash[:23]
elif hash_code in ('02', '12', '03', '13'):
received_time = security_hash[2:12]
received_md5 = security_hash[22:]
padding += '1'
hash_prefix = '05' + security_hash[:22]
md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode() padded_sign_time = compat_str(int(received_time) + 86400) + padding
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
signed_hash = security_hash[:23] + sign_time + padding + signed_md5 signed_hash = hash_prefix + padded_sign_time + signed_md5
signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
@ -165,7 +179,8 @@ class GloboIE(InfoExtractor):
'duration': duration, 'duration': duration,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'formats': formats 'formats': formats,
'subtitles': subtitles,
} }

View File

@ -40,8 +40,17 @@ class GoIE(AdobePassIE):
'resource_id': 'Disney', 'resource_id': 'Disney',
} }
} }
_VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ _VALID_URL = r'''(?x)
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow']) https?://
(?:
(?:(?P<sub_domain>%s)\.)?go|
(?P<sub_domain_2>abc|freeform|disneynow)
)\.com/
(?:
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
)
''' % '|'.join(list(_SITE_INFO.keys()))
_TESTS = [{ _TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': { 'info_dict': {
@ -54,6 +63,7 @@ class GoIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This content is no longer available.',
}, { }, {
'url': 'http://watchdisneyxd.go.com/doraemon', 'url': 'http://watchdisneyxd.go.com/doraemon',
'info_dict': { 'info_dict': {
@ -61,6 +71,34 @@ class GoIE(AdobePassIE):
'id': 'SH55574025', 'id': 'SH55574025',
}, },
'playlist_mincount': 51, 'playlist_mincount': 51,
}, {
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
'info_dict': {
'id': 'VDKA3609139',
'ext': 'mp4',
'title': 'This Guilty Blood',
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
'age_limit': 14,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
'info_dict': {
'id': 'VDKA13435179',
'ext': 'mp4',
'title': 'The Bet',
'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
'age_limit': 14,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'only_matching': True, 'only_matching': True,
@ -95,10 +133,13 @@ class GoIE(AdobePassIE):
if not video_id or not site_info: if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id) webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex( video_id = self._search_regex(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'" (
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
default=video_id) r'data-video-id=["\']*(VDKA\w+)',
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
), webpage, 'video id', default=video_id)
if not site_info: if not site_info:
brand = self._search_regex( brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)', (r'data-brand=\s*["\']\s*(\d+)',

View File

@ -1,149 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
)
class Go90IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
'info_dict': {
'id': '84BUqjLpf9D',
'ext': 'mp4',
'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
'timestamp': 1491868800,
'upload_date': '20170411',
'age_limit': 14,
}
}, {
'url': 'https://www.go90.com/embed/261MflWkD3N',
'only_matching': True,
}]
_GEO_BYPASS = False
def _real_extract(self, url):
video_id = self._match_id(url)
try:
headers = self.geo_verification_headers()
headers.update({
'Content-Type': 'application/json; charset=utf-8',
})
video_data = self._download_json(
'https://www.go90.com/api/view/items/' + video_id, video_id,
headers=headers, data=b'{"client":"web","device_type":"pc"}')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
if 'region unavailable' in message:
self.raise_geo_restricted(countries=['US'])
raise ExtractorError(message, expected=True)
raise
if video_data.get('requires_drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
main_video_asset = video_data['main_video_asset']
episode_number = int_or_none(video_data.get('episode_number'))
series = None
season = None
season_id = None
season_number = None
for metadata in video_data.get('__children', {}).get('Item', {}).values():
if metadata.get('type') == 'show':
series = metadata.get('title')
elif metadata.get('type') == 'season':
season = metadata.get('title')
season_id = metadata.get('id')
season_number = int_or_none(metadata.get('season_number'))
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
thumbnails = []
formats = []
subtitles = {}
for asset in video_data.get('assets'):
if asset.get('id') == main_video_asset:
for source in asset.get('sources', []):
source_location = source.get('location')
if not source_location:
continue
source_type = source.get('type')
if source_type == 'hls':
m3u8_formats = self._extract_m3u8_formats(
source_location, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
for f in m3u8_formats:
mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
if mobj:
height, tbr = mobj.groups()
height = int_or_none(height)
f.update({
'height': f.get('height') or height,
'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
'tbr': f.get('tbr') or int_or_none(tbr),
})
formats.extend(m3u8_formats)
elif source_type == 'dash':
formats.extend(self._extract_mpd_formats(
source_location, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'format_id': source.get('name'),
'url': source_location,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('bitrate')),
})
for caption in asset.get('caption_metadata', []):
caption_url = caption.get('source_url')
if not caption_url:
continue
subtitles.setdefault(caption.get('language', 'en'), []).append({
'url': caption_url,
'ext': determine_ext(caption_url, 'vtt'),
})
elif asset.get('type') == 'image':
asset_location = asset.get('location')
if not asset_location:
continue
thumbnails.append({
'url': asset_location,
'width': int_or_none(asset.get('width')),
'height': int_or_none(asset.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': video_data.get('short_description'),
'like_count': int_or_none(video_data.get('like_count')),
'timestamp': parse_iso8601(video_data.get('released_at')),
'series': series,
'episode': episode,
'season': season,
'season_id': season_id,
'season_number': season_number,
'episode_number': episode_number,
'subtitles': subtitles,
'age_limit': parse_age_limit(video_data.get('rating')),
}

View File

@ -1,33 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
_TEST = {
'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
'md5': '6783a58491b47b92c7c1af5a77d4cbee',
'info_dict': {
'id': 'mmbzyhkgny',
'ext': 'mp3',
'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
'duration': 11,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.hark.com/clips/%s.json' % video_id, video_id)
return {
'id': video_id,
'url': data['url'],
'title': data['name'],
'description': data.get('description'),
'thumbnail': data.get('image_original'),
'duration': data.get('duration'),
}

View File

@ -1,15 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urlparse, compat_urlparse,
) )
from ..utils import (
determine_ext,
int_or_none,
xpath_text,
)
class InternetVideoArchiveIE(InfoExtractor): class InternetVideoArchiveIE(InfoExtractor):
@ -20,7 +18,7 @@ class InternetVideoArchiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '194487', 'id': '194487',
'ext': 'mp4', 'ext': 'mp4',
'title': 'KICK-ASS 2', 'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
}, },
'params': { 'params': {
@ -33,68 +31,34 @@ class InternetVideoArchiveIE(InfoExtractor):
def _build_json_url(query): def _build_json_url(query):
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
@staticmethod
def _build_xml_url(query):
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
def _real_extract(self, url): def _real_extract(self, url):
query = compat_urlparse.urlparse(url).query query = compat_parse_qs(compat_urlparse.urlparse(url).query)
query_dic = compat_parse_qs(query) video_id = query['publishedid'][0]
video_id = query_dic['publishedid'][0] data = self._download_json(
'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
if '/player/' in url: video_id, data=json.dumps({
configuration = self._download_json(url, video_id) 'customerid': query['customerid'][0],
'publishedid': video_id,
# There are multiple videos in the playlist whlie only the first one }).encode())
# matches the video played in browsers title = data['Title']
video_info = configuration['playlist'][0] formats = self._extract_m3u8_formats(
title = video_info['title'] data['VideoUrl'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
formats = [] file_url = formats[0]['url']
for source in video_info['sources']: if '.ism/' in file_url:
file_url = source['file'] replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
if determine_ext(file_url) == 'm3u8': formats.extend(self._extract_f4m_formats(
m3u8_formats = self._extract_m3u8_formats( replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) formats.extend(self._extract_mpd_formats(
if m3u8_formats: replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
formats.extend(m3u8_formats) formats.extend(self._extract_ism_formats(
file_url = m3u8_formats[0]['url'] replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
formats.extend(self._extract_f4m_formats( self._sort_formats(formats)
file_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
file_url.replace('.m3u8', '.mpd'),
video_id, mpd_id='dash', fatal=False))
else:
a_format = {
'url': file_url,
}
if source.get('label') and source['label'][-4:] == ' kbs':
tbr = int_or_none(source['label'][:-4])
a_format.update({
'tbr': tbr,
'format_id': 'http-%d' % tbr,
})
formats.append(a_format)
self._sort_formats(formats)
description = video_info.get('description')
thumbnail = video_info.get('image')
else:
configuration = self._download_xml(url, video_id)
formats = [{
'url': xpath_text(configuration, './file', 'file URL', fatal=True),
}]
thumbnail = xpath_text(configuration, './image', 'thumbnail')
title = 'InternetVideoArchive video %s' % video_id
description = None
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': data.get('PosterUrl'),
'description': description, 'description': data.get('Description'),
} }

View File

@ -6,14 +6,15 @@ from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
strip_or_none,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
) )
class KakaoIE(InfoExtractor): class KakaoIE(InfoExtractor):
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)' _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
_TESTS = [{ _TESTS = [{
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor):
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
'uploader_id': 2653210, 'uploader_id': 2653210,
'uploader': ' 음악중심', 'uploader': '! 음악중심',
'timestamp': 1485684628, 'timestamp': 1485684628,
'upload_date': '20170129', 'upload_date': '20170129',
} }
@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
display_id = video_id.rstrip('@my')
api_base = self._API_BASE_TMPL % video_id
player_header = { player_header = {
'Referer': update_url_query( 'Referer': update_url_query(
@ -55,20 +58,23 @@ class KakaoIE(InfoExtractor):
}) })
} }
QUERY_COMMON = { query = {
'player': 'monet_html5', 'player': 'monet_html5',
'referer': url, 'referer': url,
'uuid': '', 'uuid': '',
'service': 'kakao_tv', 'service': 'kakao_tv',
'section': '', 'section': '',
'dteType': 'PC', 'dteType': 'PC',
'fields': ','.join([
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
'description', 'channelId', 'createTime', 'duration', 'playCount',
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
} }
query = QUERY_COMMON.copy()
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
impress = self._download_json( impress = self._download_json(
'%s/%s/impress' % (self._API_BASE, video_id), api_base + 'impress', display_id, 'Downloading video info',
video_id, 'Downloading video info',
query=query, headers=player_header) query=query, headers=player_header)
clip_link = impress['clipLink'] clip_link = impress['clipLink']
@ -76,32 +82,22 @@ class KakaoIE(InfoExtractor):
title = clip.get('title') or clip_link.get('displayTitle') title = clip.get('title') or clip_link.get('displayTitle')
tid = impress.get('tid', '') query['tid'] = impress.get('tid', '')
query = QUERY_COMMON.copy()
query.update({
'tid': tid,
'profile': 'HIGH',
})
raw = self._download_json(
'%s/%s/raw' % (self._API_BASE, video_id),
video_id, 'Downloading video formats info',
query=query, headers=player_header)
formats = [] formats = []
for fmt in raw.get('outputList', []): for fmt in clip.get('videoOutputList', []):
try: try:
profile_name = fmt['profile'] profile_name = fmt['profile']
if profile_name == 'AUDIO':
continue
query.update({
'profile': profile_name,
'fields': '-*,url',
})
fmt_url_json = self._download_json( fmt_url_json = self._download_json(
'%s/%s/raw/videolocation' % (self._API_BASE, video_id), api_base + 'raw/videolocation', display_id,
video_id,
'Downloading video URL for profile %s' % profile_name, 'Downloading video URL for profile %s' % profile_name,
query={ query=query, headers=player_header, fatal=False)
'service': 'kakao_tv',
'section': '',
'tid': tid,
'profile': profile_name
}, headers=player_header, fatal=False)
if fmt_url_json is None: if fmt_url_json is None:
continue continue
@ -113,7 +109,8 @@ class KakaoIE(InfoExtractor):
'width': int_or_none(fmt.get('width')), 'width': int_or_none(fmt.get('width')),
'height': int_or_none(fmt.get('height')), 'height': int_or_none(fmt.get('height')),
'format_note': fmt.get('label'), 'format_note': fmt.get('label'),
'filesize': int_or_none(fmt.get('filesize')) 'filesize': int_or_none(fmt.get('filesize')),
'tbr': int_or_none(fmt.get('kbps')),
}) })
except KeyError: except KeyError:
pass pass
@ -134,9 +131,9 @@ class KakaoIE(InfoExtractor):
}) })
return { return {
'id': video_id, 'id': display_id,
'title': title, 'title': title,
'description': clip.get('description'), 'description': strip_or_none(clip.get('description')),
'uploader': clip_link.get('channel', {}).get('name'), 'uploader': clip_link.get('channel', {}).get('name'),
'uploader_id': clip_link.get('channelId'), 'uploader_id': clip_link.get('channelId'),
'thumbnails': thumbs, 'thumbnails': thumbs,
@ -146,4 +143,5 @@ class KakaoIE(InfoExtractor):
'like_count': int_or_none(clip.get('likeCount')), 'like_count': int_or_none(clip.get('likeCount')),
'comment_count': int_or_none(clip.get('commentCount')), 'comment_count': int_or_none(clip.get('commentCount')),
'formats': formats, 'formats': formats,
'tags': clip.get('tagList'),
} }

View File

@ -20,7 +20,7 @@ class LA7IE(InfoExtractor):
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': { 'info_dict': {
'id': 'inccool8-02-10-2015-163722', 'id': '0_42j6wd36',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Inc.Cool8', 'title': 'Inc.Cool8',
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
@ -57,7 +57,7 @@ class LA7IE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], { 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
'service_url': 'http://kdam.iltrovatore.it', 'service_url': 'http://nkdam.iltrovatore.it',
}), }),
'id': video_id, 'id': video_id,
'title': player_data['title'], 'title': player_data['title'],

View File

@ -1,33 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class LearnrIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
'md5': '3719fdf0a68397f49899e82c308a89de',
'info_dict': {
'id': '51624',
'ext': 'mp4',
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
'uploader': 'LearnCode.academy',
'uploader_id': 'learncodeacademy',
'upload_date': '20131021',
},
'add_ie': ['Youtube'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'url': self._search_regex(
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
'id': video_id,
}

View File

@ -1,42 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class MacGameStoreIE(InfoExtractor):
IE_NAME = 'macgamestore'
IE_DESC = 'MacGameStore trailers'
_VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
_TEST = {
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
'md5': '8649b8ea684b6666b4c5be736ecddc61',
'info_dict': {
'id': '2450',
'ext': 'm4v',
'title': 'Crow',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, 'Downloading trailer page')
if '>Missing Media<' in webpage:
raise ExtractorError(
'Trailer %s does not exist' % video_id, expected=True)
video_title = self._html_search_regex(
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
video_url = self._html_search_regex(
r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': video_title
}

View File

@ -1,32 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': {
'id': 'Fh3QgymL9gsc',
'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
'upload_date': '20150918',
'timestamp': 1442549540,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}

View File

@ -10,18 +10,21 @@ from ..utils import int_or_none
class MangomoloBaseIE(InfoExtractor): class MangomoloBaseIE(InfoExtractor):
_BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
def _get_real_id(self, page_id): def _get_real_id(self, page_id):
return page_id return page_id
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._get_real_id(self._match_id(url)) page_id = self._get_real_id(self._match_id(url))
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(
'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
hidden_inputs = self._hidden_inputs(webpage) hidden_inputs = self._hidden_inputs(webpage)
m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
format_url = self._html_search_regex( format_url = self._html_search_regex(
[ [
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
r'<a[^>]+href="(rtsp://[^"]+)"' r'<a[^>]+href="(rtsp://[^"]+)"'
], webpage, 'format url') ], webpage, 'format url')
formats = self._extract_wowza_formats( formats = self._extract_wowza_formats(
@ -39,14 +42,16 @@ class MangomoloBaseIE(InfoExtractor):
class MangomoloVideoIE(MangomoloBaseIE): class MangomoloVideoIE(MangomoloBaseIE):
IE_NAME = 'mangomolo:video' _TYPE = 'video'
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' IE_NAME = 'mangomolo:' + _TYPE
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
_IS_LIVE = False _IS_LIVE = False
class MangomoloLiveIE(MangomoloBaseIE): class MangomoloLiveIE(MangomoloBaseIE):
IE_NAME = 'mangomolo:live' _TYPE = 'live'
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' IE_NAME = 'mangomolo:' + _TYPE
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_IS_LIVE = True _IS_LIVE = True
def _get_real_id(self, page_id): def _get_real_id(self, page_id):

View File

@ -65,30 +65,6 @@ class TechTVMITIE(InfoExtractor):
} }
class MITIE(TechTVMITIE):
IE_NAME = 'video.mit.edu'
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
_TEST = {
'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
'md5': '7db01d5ccc1895fc5010e9c9e13648da',
'info_dict': {
'id': '21783',
'ext': 'mp4',
'title': 'The Government is Profiling You',
'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
embed_url = self._search_regex(
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
return self.url_result(embed_url)
class OCWMITIE(InfoExtractor): class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu' IE_NAME = 'ocw.mit.edu'
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'

View File

@ -86,9 +86,10 @@ class MixcloudIE(InfoExtractor):
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>', r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
webpage, 'play info'), 'play info') webpage, 'play info'), 'play info')
for item in full_info_json: for item in full_info_json:
item_data = try_get( item_data = try_get(item, [
item, lambda x: x['cloudcast']['data']['cloudcastLookup'], lambda x: x['cloudcast']['data']['cloudcastLookup'],
dict) lambda x: x['cloudcastLookup']['data']['cloudcastLookup'],
], dict)
if try_get(item_data, lambda x: x['streamInfo']['url']): if try_get(item_data, lambda x: x['streamInfo']['url']):
info_json = item_data info_json = item_data
break break

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -349,33 +350,29 @@ class MTVIE(MTVServicesInfoExtractor):
}] }]
class MTV81IE(InfoExtractor): class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv81' IE_NAME = 'mtvjapan'
_VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)' _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P<id>[0-9a-z]+)'
_TEST = { _TEST = {
'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade',
'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
'info_dict': { 'info_dict': {
'id': '5e14040d-18a4-47c4-a582-43ff602de88e', 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition',
'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', },
'timestamp': 1468846800, 'params': {
'upload_date': '20160718', 'skip_download': True,
}, },
} }
_GEO_COUNTRIES = ['JP']
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
def _extract_mgid(self, webpage): def _get_feed_query(self, uri):
return self._search_regex( return {
r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage, 'arcEp': 'mtvjapan.com',
'mgid', group='id') 'mgid': uri,
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mgid = self._extract_mgid(webpage)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
class MTVVideoIE(MTVServicesInfoExtractor): class MTVVideoIE(MTVServicesInfoExtractor):
@ -425,14 +422,14 @@ class MTVVideoIE(MTVServicesInfoExtractor):
class MTVDEIE(MTVServicesInfoExtractor): class MTVDEIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv.de' IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$' _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum',
'info_dict': { 'info_dict': {
'id': 'music_video-a50bc5f0b3aa4b3190aa', 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5',
'ext': 'flv', 'ext': 'mp4',
'title': 'MusicVideo_cro-traum', 'title': 'Traum',
'description': 'Cro - Traum', 'description': 'Traum',
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -441,11 +438,12 @@ class MTVDEIE(MTVServicesInfoExtractor):
'skip': 'Blocked at Travis CI', 'skip': 'Blocked at Travis CI',
}, { }, {
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1',
'info_dict': { 'info_dict': {
'id': 'local_playlist-f5ae778b9832cc837189', 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285',
'ext': 'flv', 'ext': 'mp4',
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', 'title': 'Teen Mom 2',
'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7',
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -453,7 +451,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
}, },
'skip': 'Blocked at Travis CI', 'skip': 'Blocked at Travis CI',
}, { }, {
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3',
'info_dict': { 'info_dict': {
'id': 'local_playlist-4e760566473c4c8c5344', 'id': 'local_playlist-4e760566473c4c8c5344',
'ext': 'mp4', 'ext': 'mp4',
@ -466,25 +464,11 @@ class MTVDEIE(MTVServicesInfoExtractor):
}, },
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.', 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
}] }]
_GEO_COUNTRIES = ['DE']
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
def _real_extract(self, url): def _get_feed_query(self, uri):
video_id = self._match_id(url) return {
'arcEp': 'mtv.de',
webpage = self._download_webpage(url, video_id) 'mgid': uri,
}
playlist = self._parse_json(
self._search_regex(
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
video_id)
def _mrss_url(item):
return item['mrss'] + item.get('mrssvars', '')
# news pages contain single video in playlist with different id
if len(playlist) == 1:
return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
for item in playlist:
item_id = item.get('id')
if item_id and compat_str(item_id) == video_id:
return self._get_videos_info_from_url(_mrss_url(item), video_id)

View File

@ -9,10 +9,13 @@ from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
int_or_none,
js_to_json,
parse_duration,
smuggle_url, smuggle_url,
try_get, try_get,
unified_timestamp,
update_url_query, update_url_query,
int_or_none,
) )
@ -85,27 +88,41 @@ class NBCIE(AdobePassIE):
permalink, video_id = re.match(self._VALID_URL, url).groups() permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + compat_urllib_parse_unquote(permalink) permalink = 'http' + compat_urllib_parse_unquote(permalink)
response = self._download_json( response = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={ 'https://friendship.nbc.co/v2/graphql', video_id, query={
'filter[permalink]': permalink, 'query': '''{
'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating', page(name: "%s", platform: web, type: VIDEO, userId: "0") {
'fields[shows]': 'shortTitle', data {
'include': 'show.shortTitle', ... on VideoPageData {
description
episodeNumber
keywords
locked
mpxAccountId
mpxGuid
rating
seasonNumber
secondaryTitle
seriesShortTitle
}
}
}
}''' % permalink,
}) })
video_data = response['data'][0]['attributes'] video_data = response['data']['page']['data']
query = { query = {
'mbr': 'true', 'mbr': 'true',
'manifest': 'm3u', 'manifest': 'm3u',
} }
video_id = video_data['guid'] video_id = video_data['mpxGuid']
title = video_data['title'] title = video_data['secondaryTitle']
if video_data.get('entitlement') == 'auth': if video_data.get('locked'):
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
'nbcentertainment', title, video_id, 'nbcentertainment', title, video_id,
video_data.get('vChipRating')) video_data.get('rating'))
query['auth'] = self._extract_mvpd_auth( query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource) url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query( theplatform_url = smuggle_url(update_url_query(
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
query), {'force_smil_url': True}) query), {'force_smil_url': True})
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -117,7 +134,7 @@ class NBCIE(AdobePassIE):
'season_number': int_or_none(video_data.get('seasonNumber')), 'season_number': int_or_none(video_data.get('seasonNumber')),
'episode_number': int_or_none(video_data.get('episodeNumber')), 'episode_number': int_or_none(video_data.get('episodeNumber')),
'episode': title, 'episode': title,
'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']), 'series': video_data.get('seriesShortTitle'),
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',
} }
@ -272,13 +289,12 @@ class NBCNewsIE(ThePlatformIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'af1adfa51312291a017720403826bb64', 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
'info_dict': { 'info_dict': {
'id': '269389891880', 'id': '269389891880',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview', 'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
'uploader': 'NBCU-NEWS',
'timestamp': 1401363060, 'timestamp': 1401363060,
'upload_date': '20140529', 'upload_date': '20140529',
}, },
@ -296,28 +312,26 @@ class NBCNewsIE(ThePlatformIE):
}, },
{ {
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '73135a2e0ef819107bbb55a5a9b2a802', 'md5': '8eb831eca25bfa7d25ddd83e85946548',
'info_dict': { 'info_dict': {
'id': '394064451844', 'id': '394064451844',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'timestamp': 1423104900, 'timestamp': 1423104900,
'uploader': 'NBCU-NEWS',
'upload_date': '20150205', 'upload_date': '20150205',
}, },
}, },
{ {
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'a49e173825e5fcd15c13fc297fced39d', 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
'info_dict': { 'info_dict': {
'id': '529953347624', 'id': 'n431456',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
'description': 'md5:c8be487b2d80ff0594c005add88d8351', 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'upload_date': '20150922', 'upload_date': '20150922',
'timestamp': 1442917800, 'timestamp': 1442917800,
'uploader': 'NBCU-NEWS',
}, },
}, },
{ {
@ -330,7 +344,6 @@ class NBCNewsIE(ThePlatformIE):
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420', 'upload_date': '20160420',
'timestamp': 1461152093, 'timestamp': 1461152093,
'uploader': 'NBCU-NEWS',
}, },
}, },
{ {
@ -344,7 +357,6 @@ class NBCNewsIE(ThePlatformIE):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406937606, 'timestamp': 1406937606,
'upload_date': '20140802', 'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
}, },
}, },
{ {
@ -360,20 +372,61 @@ class NBCNewsIE(ThePlatformIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not video_id.isdigit(): webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex( data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.+});', webpage, r'window\.__data\s*=\s*({.+});', webpage,
'bootstrap json'), video_id) 'bootstrap json'), video_id, js_to_json)
video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id'] video_data = try_get(data, lambda x: x['video']['current'], dict)
if not video_data:
video_data = data['article']['content'][0]['primaryMedia']['video']
title = video_data['headline']['primary']
formats = []
for va in video_data.get('videoAssets', []):
public_url = va.get('publicUrl')
if not public_url:
continue
if '://link.theplatform.com/' in public_url:
public_url = update_url_query(public_url, {'format': 'redirect'})
format_id = va.get('format')
if format_id == 'M3U':
formats.extend(self._extract_m3u8_formats(
public_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
continue
tbr = int_or_none(va.get('bitrate'), 1000)
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': public_url,
'width': int_or_none(va.get('width')),
'height': int_or_none(va.get('height')),
'tbr': tbr,
'ext': 'mp4',
})
self._sort_formats(formats)
subtitles = {}
closed_captioning = video_data.get('closedCaptioning')
if closed_captioning:
for cc_url in closed_captioning.values():
if not cc_url:
continue
subtitles.setdefault('en', []).append({
'url': cc_url,
})
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works 'title': title,
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}), 'description': try_get(video_data, lambda x: x['description']['primary']),
'ie_key': 'ThePlatformFeed', 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
'duration': parse_duration(video_data.get('duration')),
'timestamp': unified_timestamp(video_data.get('datePublished')),
'formats': formats,
'subtitles': subtitles,
} }

View File

@ -295,13 +295,23 @@ class NexxIE(InfoExtractor):
video = None video = None
def find_video(result):
if isinstance(result, dict):
return result
elif isinstance(result, list):
vid = int(video_id)
for v in result:
if try_get(v, lambda x: x['general']['ID'], int) == vid:
return v
return None
response = self._download_json( response = self._download_json(
'https://arc.nexx.cloud/api/video/%s.json' % video_id, 'https://arc.nexx.cloud/api/video/%s.json' % video_id,
video_id, fatal=False) video_id, fatal=False)
if response and isinstance(response, dict): if response and isinstance(response, dict):
result = response.get('result') result = response.get('result')
if result and isinstance(result, dict): if result:
video = result video = find_video(result)
# not all videos work via arc, e.g. nexx:741:1269984 # not all videos work via arc, e.g. nexx:741:1269984
if not video: if not video:
@ -348,7 +358,7 @@ class NexxIE(InfoExtractor):
request_token = hashlib.md5( request_token = hashlib.md5(
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
video = self._call_api( result = self._call_api(
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
'addInteractionOptions': '1', 'addInteractionOptions': '1',
@ -363,6 +373,7 @@ class NexxIE(InfoExtractor):
'X-Request-CID': cid, 'X-Request-CID': cid,
'X-Request-Token': request_token, 'X-Request-Token': request_token,
}) })
video = find_video(result)
general = video['general'] general = video['general']
title = general['title'] title = general['title']

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
@ -121,6 +123,13 @@ class OdnoklassnikiIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url): def _real_extract(self, url):
start_time = int_or_none(compat_parse_qs( start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])

View File

@ -20,6 +20,8 @@ from ..utils import (
class OnetBaseIE(InfoExtractor): class OnetBaseIE(InfoExtractor):
_URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/'
def _search_mvp_id(self, webpage): def _search_mvp_id(self, webpage):
return self._search_regex( return self._search_regex(
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
@ -45,7 +47,7 @@ class OnetBaseIE(InfoExtractor):
video = response['result'].get('0') video = response['result'].get('0')
formats = [] formats = []
for _, formats_dict in video['formats'].items(): for format_type, formats_dict in video['formats'].items():
if not isinstance(formats_dict, dict): if not isinstance(formats_dict, dict):
continue continue
for format_id, format_list in formats_dict.items(): for format_id, format_list in formats_dict.items():
@ -56,21 +58,31 @@ class OnetBaseIE(InfoExtractor):
if not video_url: if not video_url:
continue continue
ext = determine_ext(video_url) ext = determine_ext(video_url)
if format_id == 'ism': if format_id.startswith('ism'):
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False)) video_url, video_id, 'mss', fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))
elif format_id.startswith('hls'):
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else: else:
formats.append({ http_f = {
'url': video_url, 'url': video_url,
'format_id': format_id, 'format_id': format_id,
'height': int_or_none(f.get('vertical_resolution')),
'width': int_or_none(f.get('horizontal_resolution')),
'abr': float_or_none(f.get('audio_bitrate')), 'abr': float_or_none(f.get('audio_bitrate')),
'vbr': float_or_none(f.get('video_bitrate')), }
}) if format_type == 'audio':
http_f['vcodec'] = 'none'
else:
http_f.update({
'height': int_or_none(f.get('vertical_resolution')),
'width': int_or_none(f.get('horizontal_resolution')),
'vbr': float_or_none(f.get('video_bitrate')),
})
formats.append(http_f)
self._sort_formats(formats) self._sort_formats(formats)
meta = video.get('meta', {}) meta = video.get('meta', {})
@ -105,12 +117,12 @@ class OnetMVPIE(OnetBaseIE):
class OnetIE(OnetBaseIE): class OnetIE(OnetBaseIE):
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
IE_NAME = 'onet.tv' IE_NAME = 'onet.tv'
_TEST = { _TESTS = [{
'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
'md5': 'e3ffbf47590032ac3f27249204173d50', 'md5': '436102770fb095c75b8bb0392d3da9ff',
'info_dict': { 'info_dict': {
'id': 'qbpyqc', 'id': 'qbpyqc',
'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
@ -120,7 +132,10 @@ class OnetIE(OnetBaseIE):
'upload_date': '20160705', 'upload_date': '20160705',
'timestamp': 1467721580, 'timestamp': 1467721580,
}, },
} }, {
'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -140,18 +155,21 @@ class OnetIE(OnetBaseIE):
class OnetChannelIE(OnetBaseIE): class OnetChannelIE(OnetBaseIE):
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)' _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)'
IE_NAME = 'onet.tv:channel' IE_NAME = 'onet.tv:channel'
_TEST = { _TESTS = [{
'url': 'http://onet.tv/k/openerfestival', 'url': 'http://onet.tv/k/openerfestival',
'info_dict': { 'info_dict': {
'id': 'openerfestival', 'id': 'openerfestival',
'title': 'Open\'er Festival Live', 'title': "Open'er Festival",
'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.', 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.",
}, },
'playlist_mincount': 46, 'playlist_mincount': 35,
} }, {
'url': 'https://onet100.vod.pl/k/openerfestival',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
@ -173,7 +191,7 @@ class OnetChannelIE(OnetBaseIE):
'Downloading channel %s - add --no-playlist to just download video %s' % ( 'Downloading channel %s - add --no-playlist to just download video %s' % (
channel_id, video_name)) channel_id, video_name))
matches = re.findall( matches = re.findall(
r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)', r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
webpage) webpage)
entries = [ entries = [
self.url_result(video_link, OnetIE.ie_key()) self.url_result(video_link, OnetIE.ie_key())

View File

@ -25,21 +25,21 @@ class PuhuTVIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# film # film
'url': 'https://puhutv.com/sut-kardesler-izle', 'url': 'https://puhutv.com/sut-kardesler-izle',
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7', 'md5': 'a347470371d56e1585d1b2c8dab01c96',
'info_dict': { 'info_dict': {
'id': '5085', 'id': '5085',
'display_id': 'sut-kardesler', 'display_id': 'sut-kardesler',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Süt Kardeşler', 'title': 'Süt Kardeşler',
'description': 'md5:405fd024df916ca16731114eb18e511a', 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4832.44, 'duration': 4832.44,
'creator': 'Arzu Film', 'creator': 'Arzu Film',
'timestamp': 1469778212, 'timestamp': 1561062602,
'upload_date': '20160729', 'upload_date': '20190620',
'release_year': 1976, 'release_year': 1976,
'view_count': int, 'view_count': int,
'tags': ['Aile', 'Komedi', 'Klasikler'], 'tags': list,
}, },
}, { }, {
# episode, geo restricted, bypassable with --geo-verification-proxy # episode, geo restricted, bypassable with --geo-verification-proxy
@ -64,9 +64,10 @@ class PuhuTVIE(InfoExtractor):
display_id)['data'] display_id)['data']
video_id = compat_str(info['id']) video_id = compat_str(info['id'])
title = info.get('name') or info['title']['name'] show = info.get('title') or {}
title = info.get('name') or show['name']
if info.get('display_name'): if info.get('display_name'):
title = '%s %s' % (title, info.get('display_name')) title = '%s %s' % (title, info['display_name'])
try: try:
videos = self._download_json( videos = self._download_json(
@ -78,17 +79,36 @@ class PuhuTVIE(InfoExtractor):
self.raise_geo_restricted() self.raise_geo_restricted()
raise raise
urls = []
formats = [] formats = []
def add_http_from_hls(m3u8_f):
http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
if http_url != m3u8_f['url']:
f = m3u8_f.copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
'url': http_url,
})
formats.append(f)
for video in videos['data']['videos']: for video in videos['data']['videos']:
media_url = url_or_none(video.get('url')) media_url = url_or_none(video.get('url'))
if not media_url: if not media_url or media_url in urls:
continue continue
urls.append(media_url)
playlist = video.get('is_playlist') playlist = video.get('is_playlist')
if video.get('stream_type') == 'hls' and playlist is True: if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native', media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False)
for m3u8_f in m3u8_formats:
formats.append(m3u8_f)
add_http_from_hls(m3u8_f)
continue continue
quality = int_or_none(video.get('quality')) quality = int_or_none(video.get('quality'))
f = { f = {
'url': media_url, 'url': media_url,
@ -96,34 +116,29 @@ class PuhuTVIE(InfoExtractor):
'height': quality 'height': quality
} }
video_format = video.get('video_format') video_format = video.get('video_format')
if video_format == 'hls' and playlist is False: is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
if is_hls:
format_id = 'hls' format_id = 'hls'
f['protocol'] = 'm3u8_native' f['protocol'] = 'm3u8_native'
elif video_format == 'mp4': elif video_format == 'mp4':
format_id = 'http' format_id = 'http'
else: else:
continue continue
if quality: if quality:
format_id += '-%sp' % quality format_id += '-%sp' % quality
f['format_id'] = format_id f['format_id'] = format_id
formats.append(f) formats.append(f)
if is_hls:
add_http_from_hls(f)
self._sort_formats(formats) self._sort_formats(formats)
description = try_get(
info, lambda x: x['title']['description'],
compat_str) or info.get('description')
timestamp = unified_timestamp(info.get('created_at'))
creator = try_get( creator = try_get(
info, lambda x: x['title']['producer']['name'], compat_str) show, lambda x: x['producer']['name'], compat_str)
duration = float_or_none( content = info.get('content') or {}
try_get(info, lambda x: x['content']['duration_in_ms'], int),
scale=1000)
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
images = try_get( images = try_get(
info, lambda x: x['content']['images']['wide'], dict) or {} content, lambda x: x['images']['wide'], dict) or {}
thumbnails = [] thumbnails = []
for image_id, image_url in images.items(): for image_id, image_url in images.items():
if not isinstance(image_url, compat_str): if not isinstance(image_url, compat_str):
@ -137,14 +152,8 @@ class PuhuTVIE(InfoExtractor):
}) })
thumbnails.append(t) thumbnails.append(t)
release_year = try_get(info, lambda x: x['title']['released_at'], int)
season_number = int_or_none(info.get('season_number'))
season_id = str_or_none(info.get('season_id'))
episode_number = int_or_none(info.get('episode_number'))
tags = [] tags = []
for genre in try_get(info, lambda x: x['title']['genres'], list) or []: for genre in show.get('genres') or []:
if not isinstance(genre, dict): if not isinstance(genre, dict):
continue continue
genre_name = genre.get('name') genre_name = genre.get('name')
@ -152,12 +161,11 @@ class PuhuTVIE(InfoExtractor):
tags.append(genre_name) tags.append(genre_name)
subtitles = {} subtitles = {}
for subtitle in try_get( for subtitle in content.get('subtitles') or []:
info, lambda x: x['content']['subtitles'], list) or []:
if not isinstance(subtitle, dict): if not isinstance(subtitle, dict):
continue continue
lang = subtitle.get('language') lang = subtitle.get('language')
sub_url = url_or_none(subtitle.get('url')) sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
if not lang or not isinstance(lang, compat_str) or not sub_url: if not lang or not isinstance(lang, compat_str) or not sub_url:
continue continue
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
@ -168,15 +176,15 @@ class PuhuTVIE(InfoExtractor):
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': info.get('description') or show.get('description'),
'season_id': season_id, 'season_id': str_or_none(info.get('season_id')),
'season_number': season_number, 'season_number': int_or_none(info.get('season_number')),
'episode_number': episode_number, 'episode_number': int_or_none(info.get('episode_number')),
'release_year': release_year, 'release_year': int_or_none(show.get('released_at')),
'timestamp': timestamp, 'timestamp': unified_timestamp(info.get('created_at')),
'creator': creator, 'creator': creator,
'view_count': view_count, 'view_count': int_or_none(content.get('watch_count')),
'duration': duration, 'duration': float_or_none(content.get('duration_in_ms'), 1000),
'tags': tags, 'tags': tags,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': thumbnails,

View File

@ -1,72 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
)
class ServingSysIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
'info_dict': {
'id': '5349193',
'title': 'AdAPPter_Hyundai_demo',
},
'playlist': [{
'md5': 'baed851342df6846eb8677a60a011a0f',
'info_dict': {
'id': '29955898',
'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (1)',
'duration': 74,
'tbr': 1378,
'width': 640,
'height': 400,
},
}, {
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
'info_dict': {
'id': '29907998',
'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (2)',
'duration': 34,
'width': 854,
'height': 480,
'tbr': 516,
},
}],
'params': {
'playlistend': 2,
},
'_skip': 'Blocked in the US [sic]',
}
def _real_extract(self, url):
pl_id = self._match_id(url)
vast_doc = self._download_xml(url, pl_id)
title = vast_doc.find('.//AdTitle').text
media = vast_doc.find('.//MediaFile').text
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
doc = self._download_xml(info_url, pl_id, 'Downloading video info')
entries = [{
'_type': 'video',
'id': a.attrib['id'],
'title': '%s (%s)' % (title, a.attrib['assetID']),
'url': a.attrib['URL'],
'duration': int_or_none(a.attrib.get('length')),
'tbr': int_or_none(a.attrib.get('bitrate')),
'height': int_or_none(a.attrib.get('height')),
'width': int_or_none(a.attrib.get('width')),
} for a in doc.findall('.//AdditionalAssets/asset')]
return {
'_type': 'playlist',
'id': pl_id,
'title': title,
'entries': entries,
}

View File

@ -11,14 +11,13 @@ from .common import (
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urlparse, compat_urlparse,
compat_urllib_parse_urlencode,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
HEADRequest,
int_or_none, int_or_none,
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext, mimetype2ext,
str_or_none, str_or_none,
try_get, try_get,
@ -28,6 +27,20 @@ from ..utils import (
) )
class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)'
@staticmethod
def _extract_urls(webpage):
return [m.group('url') for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)]
def _real_extract(self, url):
return self.url_result(compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)['url'][0])
class SoundcloudIE(InfoExtractor): class SoundcloudIE(InfoExtractor):
"""Information extractor for soundcloud.com """Information extractor for soundcloud.com
To access the media, the uid of the song and a stream token To access the media, the uid of the song and a stream token
@ -44,9 +57,8 @@ class SoundcloudIE(InfoExtractor):
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/? (?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$) (?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
(?:/?\?secret_token=(?P<secret_token>[^&]+))?) (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
) )
''' '''
IE_NAME = 'soundcloud' IE_NAME = 'soundcloud'
@ -60,6 +72,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music', 'uploader': 'E.T. ExTerrestrial Music',
'uploader_id': '1571244',
'timestamp': 1349920598, 'timestamp': 1349920598,
'upload_date': '20121011', 'upload_date': '20121011',
'duration': 143.216, 'duration': 143.216,
@ -79,6 +92,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Goldrushed', 'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept', 'uploader': 'The Royal Concept',
'uploader_id': '9615865',
'timestamp': 1337635207, 'timestamp': 1337635207,
'upload_date': '20120521', 'upload_date': '20120521',
'duration': 30, 'duration': 30,
@ -92,6 +106,7 @@ class SoundcloudIE(InfoExtractor):
# rtmp # rtmp
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Preview',
}, },
# private link # private link
{ {
@ -103,6 +118,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF', 'uploader': 'jaimeMF',
'uploader_id': '69767071',
'timestamp': 1386604920, 'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9.927, 'duration': 9.927,
@ -123,6 +139,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'description': 'test chars: \"\'/\\ä↭', 'description': 'test chars: \"\'/\\ä↭',
'uploader': 'jaimeMF', 'uploader': 'jaimeMF',
'uploader_id': '69767071',
'timestamp': 1386604920, 'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9.927, 'duration': 9.927,
@ -143,6 +160,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Bus Brakes', 'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples', 'uploader': 'oddsamples',
'uploader_id': '73680509',
'timestamp': 1389232924, 'timestamp': 1389232924,
'upload_date': '20140109', 'upload_date': '20140109',
'duration': 17.346, 'duration': 17.346,
@ -163,6 +181,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music', 'uploader': 'Ori Uplift Music',
'uploader_id': '12563093',
'timestamp': 1504206263, 'timestamp': 1504206263,
'upload_date': '20170831', 'upload_date': '20170831',
'duration': 7449.096, 'duration': 7449.096,
@ -183,6 +202,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Sideways (Prod. Mad Real)', 'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee', 'uploader': 'garyvee',
'uploader_id': '2366352',
'timestamp': 1488152409, 'timestamp': 1488152409,
'upload_date': '20170226', 'upload_date': '20170226',
'duration': 207.012, 'duration': 207.012,
@ -207,6 +227,7 @@ class SoundcloudIE(InfoExtractor):
'title': 'Mezzo Valzer', 'title': 'Mezzo Valzer',
'description': 'md5:4138d582f81866a530317bae316e8b61', 'description': 'md5:4138d582f81866a530317bae316e8b61',
'uploader': 'Giovanni Sarani', 'uploader': 'Giovanni Sarani',
'uploader_id': '3352531',
'timestamp': 1551394171, 'timestamp': 1551394171,
'upload_date': '20190228', 'upload_date': '20190228',
'duration': 180.157, 'duration': 180.157,
@ -221,114 +242,81 @@ class SoundcloudIE(InfoExtractor):
} }
] ]
_API_BASE = 'https://api.soundcloud.com/'
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/'
_CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
@staticmethod _ARTWORK_MAP = {
def _extract_urls(webpage): 'mini': 16,
return [m.group('url') for m in re.finditer( 'tiny': 20,
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', 'small': 32,
webpage)] 'badge': 47,
't67x67': 67,
'large': 100,
't300x300': 300,
'crop': 400,
't500x500': 500,
'original': 0,
}
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
title = info['title'] title = info['title']
name = full_title or track_id track_base_url = self._API_BASE + 'tracks/%s' % track_id
if quiet:
self.report_extraction(name)
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
username = try_get(info, lambda x: x['user']['username'], compat_str)
def extract_count(key):
return int_or_none(info.get('%s_count' % key))
like_count = extract_count('favoritings')
if like_count is None:
like_count = extract_count('likes')
result = {
'id': track_id,
'uploader': username,
'timestamp': unified_timestamp(info.get('created_at')),
'title': title,
'description': info.get('description'),
'thumbnail': thumbnail,
'duration': float_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
'license': info.get('license'),
'view_count': extract_count('playback'),
'like_count': like_count,
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genre': info.get('genre'),
}
format_urls = set() format_urls = set()
formats = [] formats = []
query = {'client_id': self._CLIENT_ID} query = {'client_id': self._CLIENT_ID}
if secret_token is not None: if secret_token:
query['secret_token'] = secret_token query['secret_token'] = secret_token
if info.get('downloadable', False):
# We can build a direct link to the song if info.get('downloadable'):
format_url = update_url_query( format_url = update_url_query(
'https://api.soundcloud.com/tracks/%s/download' % track_id, query) info.get('download_url') or track_base_url + '/download', query)
format_urls.add(format_url) format_urls.add(format_url)
if version == 2:
v1_info = self._download_json(
track_base_url, track_id, query=query, fatal=False) or {}
else:
v1_info = info
formats.append({ formats.append({
'format_id': 'download', 'format_id': 'download',
'ext': info.get('original_format', 'mp3'), 'ext': v1_info.get('original_format') or 'mp3',
'filesize': int_or_none(v1_info.get('original_content_size')),
'url': format_url, 'url': format_url,
'vcodec': 'none',
'preference': 10, 'preference': 10,
}) })
# Old API, does not work for some tracks (e.g. def invalid_url(url):
# https://soundcloud.com/giovannisarani/mezzo-valzer) return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url)
format_dict = self._download_json(
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
track_id, 'Downloading track url', query=query, fatal=False)
if format_dict: def add_format(f, protocol):
for key, stream_url in format_dict.items(): mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
if stream_url in format_urls: if mobj:
continue for k, v in mobj.groupdict().items():
format_urls.add(stream_url) if not f.get(k):
ext, abr = 'mp3', None f[k] = v
mobj = re.search(r'_([^_]+)_(\d+)_url', key) format_id_list = []
if mobj: if protocol:
ext, abr = mobj.groups() format_id_list.append(protocol)
abr = int(abr) for k in ('ext', 'abr'):
if key.startswith('http'): v = f.get(k)
stream_formats = [{ if v:
'format_id': key, format_id_list.append(v)
'ext': ext, abr = f.get('abr')
'url': stream_url, if abr:
}] f['abr'] = int(abr)
elif key.startswith('rtmp'): f.update({
# The url doesn't have an rtmp app, we have to extract the playpath 'format_id': '_'.join(format_id_list),
url, path = stream_url.split('mp3:', 1) 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
stream_formats = [{ })
'format_id': key, formats.append(f)
'url': url,
'play_path': 'mp3:' + path,
'ext': 'flv',
}]
elif key.startswith('hls'):
stream_formats = self._extract_m3u8_formats(
stream_url, track_id, ext, entry_protocol='m3u8_native',
m3u8_id=key, fatal=False)
else:
continue
if abr:
for f in stream_formats:
f['abr'] = abr
formats.extend(stream_formats)
# New API # New API
transcodings = try_get( transcodings = try_get(
@ -337,129 +325,165 @@ class SoundcloudIE(InfoExtractor):
if not isinstance(t, dict): if not isinstance(t, dict):
continue continue
format_url = url_or_none(t.get('url')) format_url = url_or_none(t.get('url'))
if not format_url: if not format_url or t.get('snipped') or '/preview/' in format_url:
continue continue
stream = self._download_json( stream = self._download_json(
update_url_query(format_url, query), track_id, fatal=False) format_url, track_id, query=query, fatal=False)
if not isinstance(stream, dict): if not isinstance(stream, dict):
continue continue
stream_url = url_or_none(stream.get('url')) stream_url = url_or_none(stream.get('url'))
if not stream_url: if invalid_url(stream_url):
continue
if stream_url in format_urls:
continue continue
format_urls.add(stream_url) format_urls.add(stream_url)
protocol = try_get(t, lambda x: x['format']['protocol'], compat_str) stream_format = t.get('format') or {}
protocol = stream_format.get('protocol')
if protocol != 'hls' and '/hls' in format_url: if protocol != 'hls' and '/hls' in format_url:
protocol = 'hls' protocol = 'hls'
ext = None ext = None
preset = str_or_none(t.get('preset')) preset = str_or_none(t.get('preset'))
if preset: if preset:
ext = preset.split('_')[0] ext = preset.split('_')[0]
if ext not in KNOWN_EXTENSIONS: if ext not in KNOWN_EXTENSIONS:
mimetype = try_get( ext = mimetype2ext(stream_format.get('mime_type'))
t, lambda x: x['format']['mime_type'], compat_str) add_format({
ext = mimetype2ext(mimetype) or 'mp3'
format_id_list = []
if protocol:
format_id_list.append(protocol)
format_id_list.append(ext)
format_id = '_'.join(format_id_list)
formats.append({
'url': stream_url, 'url': stream_url,
'format_id': format_id,
'ext': ext, 'ext': ext,
'protocol': 'm3u8_native' if protocol == 'hls' else 'http', }, 'http' if protocol == 'progressive' else protocol)
})
if not formats:
# Old API, does not work for some tracks (e.g.
# https://soundcloud.com/giovannisarani/mezzo-valzer)
# and might serve preview URLs (e.g.
# http://www.soundcloud.com/snbrn/ele)
format_dict = self._download_json(
track_base_url + '/streams', track_id,
'Downloading track url', query=query, fatal=False) or {}
for key, stream_url in format_dict.items():
if invalid_url(stream_url):
continue
format_urls.add(stream_url)
mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
if mobj:
protocol, ext, abr = mobj.groups()
add_format({
'abr': abr,
'ext': ext,
'url': stream_url,
}, protocol)
if not formats: if not formats:
# We fallback to the stream_url in the original info, this # We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error # cannot be always used, sometimes it can give an HTTP 404 error
formats.append({ urlh = self._request_webpage(
'format_id': 'fallback', HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
'url': update_url_query(info['stream_url'], query), track_id, query=query, fatal=False)
'ext': 'mp3', if urlh:
}) stream_url = urlh.geturl()
self._check_formats(formats, track_id) if not invalid_url(stream_url):
add_format({'url': stream_url}, 'http')
for f in formats: for f in formats:
f['vcodec'] = 'none' f['vcodec'] = 'none'
self._sort_formats(formats) self._sort_formats(formats)
result['formats'] = formats
return result user = info.get('user') or {}
thumbnails = []
artwork_url = info.get('artwork_url')
thumbnail = artwork_url or user.get('avatar_url')
if isinstance(thumbnail, compat_str):
if re.search(self._IMAGE_REPL_RE, thumbnail):
for image_id, size in self._ARTWORK_MAP.items():
i = {
'id': image_id,
'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
}
if image_id == 'tiny' and not artwork_url:
size = 18
elif image_id == 'original':
i['preference'] = 10
if size:
i.update({
'width': size,
'height': size,
})
thumbnails.append(i)
else:
thumbnails = [{'url': thumbnail}]
def extract_count(key):
return int_or_none(info.get('%s_count' % key))
return {
'id': track_id,
'uploader': user.get('username'),
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')),
'title': title,
'description': info.get('description'),
'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
'license': info.get('license'),
'view_count': extract_count('playback'),
'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genre': info.get('genre'),
'formats': formats
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
new_info = {}
if track_id is not None: query = {
info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID 'client_id': self._CLIENT_ID,
}
if track_id:
info_json_url = self._API_V2_BASE + 'tracks/' + track_id
full_title = track_id full_title = track_id
token = mobj.group('secret_token') token = mobj.group('secret_token')
if token: if token:
info_json_url += '&secret_token=' + token query['secret_token'] = token
elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
real_url = query['url'][0]
# If the token is in the query of the original url we have to
# manually add it
if 'secret_token' in query:
real_url += '?secret_token=' + query['secret_token'][0]
return self.url_result(real_url)
else: else:
# extract uploader (which is in the url) full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('title')
token = mobj.group('token') token = mobj.group('token')
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token: if token:
resolve_title += '/%s' % token resolve_title += '/%s' % token
info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
webpage = self._download_webpage(url, full_title, fatal=False) version = 2
if webpage:
entries = self._parse_json(
self._search_regex(
r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
'data', default='[]'), full_title, fatal=False)
if entries:
for e in entries:
if not isinstance(e, dict):
continue
if e.get('id') != 67:
continue
data = try_get(e, lambda x: x['data'][0], dict)
if data:
new_info = data
break
info_json_url = self._resolv_url(
'https://soundcloud.com/%s' % resolve_title)
# Contains some additional info missing from new_info
info = self._download_json( info = self._download_json(
info_json_url, full_title, 'Downloading info JSON') info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False)
if not info:
info = self._download_json(
info_json_url.replace(self._API_V2_BASE, self._API_BASE),
full_title, 'Downloading info JSON', query=query)
version = 1
return self._extract_info_dict( return self._extract_info_dict(info, full_title, token, version)
merge_dicts(info, new_info), full_title, secret_token=token)
class SoundcloudPlaylistBaseIE(SoundcloudIE): class SoundcloudPlaylistBaseIE(SoundcloudIE):
@staticmethod def _extract_track_entries(self, tracks, token=None):
def _extract_id(e): entries = []
return compat_str(e['id']) if e.get('id') else None for track in tracks:
track_id = str_or_none(track.get('id'))
def _extract_track_entries(self, tracks): url = track.get('permalink_url')
return [ if not url:
self.url_result( if not track_id:
track['permalink_url'], SoundcloudIE.ie_key(), continue
video_id=self._extract_id(track)) url = self._API_V2_BASE + 'tracks/' + track_id
for track in tracks if track.get('permalink_url')] if token:
url += '?secret_token=' + token
entries.append(self.url_result(
url, SoundcloudIE.ie_key(), track_id))
return entries
class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
@ -480,41 +504,28 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
# extract uploader (which is in the url) full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('slug_title')
full_title = '%s/sets/%s' % (uploader, slug_title)
url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
token = mobj.group('token') token = mobj.group('token')
if token: if token:
full_title += '/' + token full_title += '/' + token
url += '/' + token
resolv_url = self._resolv_url(url) info = self._download_json(self._resolv_url(
info = self._download_json(resolv_url, full_title) self._BASE_URL + full_title), full_title)
if 'errors' in info: if 'errors' in info:
msgs = (compat_str(err['error_message']) for err in info['errors']) msgs = (compat_str(err['error_message']) for err in info['errors'])
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
entries = self._extract_track_entries(info['tracks']) entries = self._extract_track_entries(info['tracks'], token)
return { return self.playlist_result(
'_type': 'playlist', entries, str_or_none(info.get('id')), info.get('title'))
'entries': entries,
'id': '%s' % info['id'],
'title': info['title'],
}
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = { COMMON_QUERY = {
'limit': 50, 'limit': 2000000000,
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
'linked_partitioning': '1', 'linked_partitioning': '1',
} }
@ -522,12 +533,13 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
query = COMMON_QUERY.copy() query = COMMON_QUERY.copy()
query['offset'] = 0 query['offset'] = 0
next_href = base_url + '?' + compat_urllib_parse_urlencode(query) next_href = base_url
entries = [] entries = []
for i in itertools.count(): for i in itertools.count():
response = self._download_json( response = self._download_json(
next_href, playlist_id, 'Downloading track page %s' % (i + 1)) next_href, playlist_id,
'Downloading track page %s' % (i + 1), query=query)
collection = response['collection'] collection = response['collection']
@ -546,9 +558,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
continue continue
return self.url_result( return self.url_result(
permalink_url, permalink_url,
ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
video_id=self._extract_id(cand), str_or_none(cand.get('id')), cand.get('title'))
video_title=cand.get('title'))
for e in collection: for e in collection:
entry = resolve_entry((e, e.get('track'), e.get('playlist'))) entry = resolve_entry((e, e.get('track'), e.get('playlist')))
@ -559,11 +570,10 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
if not next_href: if not next_href:
break break
parsed_next_href = compat_urlparse.urlparse(response['next_href']) next_href = response['next_href']
qs = compat_urlparse.parse_qs(parsed_next_href.query) parsed_next_href = compat_urlparse.urlparse(next_href)
qs.update(COMMON_QUERY) query = compat_urlparse.parse_qs(parsed_next_href.query)
next_href = compat_urlparse.urlunparse( query.update(COMMON_QUERY)
parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
return { return {
'_type': 'playlist', '_type': 'playlist',
@ -609,7 +619,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/jcv246/sets', 'url': 'https://soundcloud.com/jcv246/sets',
'info_dict': { 'info_dict': {
'id': '12982173', 'id': '12982173',
'title': 'Jordi / cv (Playlists)', 'title': 'Jordi / cv (Sets)',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, { }, {
@ -636,39 +646,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
}] }]
_BASE_URL_MAP = { _BASE_URL_MAP = {
'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'all': 'stream/users/%s',
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'tracks': 'users/%s/tracks',
'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'albums': 'users/%s/albums',
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'sets': 'users/%s/playlists',
'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'reposts': 'stream/users/%s/reposts',
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'likes': 'users/%s/likes',
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'spotlight': 'users/%s/spotlight',
}
_TITLE_MAP = {
'all': 'All',
'tracks': 'Tracks',
'albums': 'Albums',
'sets': 'Playlists',
'reposts': 'Reposts',
'likes': 'Likes',
'spotlight': 'Spotlight',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user') uploader = mobj.group('user')
url = 'https://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url)
user = self._download_json( user = self._download_json(
resolv_url, uploader, 'Downloading user info') self._resolv_url(self._BASE_URL + uploader),
uploader, 'Downloading user info')
resource = mobj.group('rsrc') or 'all' resource = mobj.group('rsrc') or 'all'
return self._extract_playlist( return self._extract_playlist(
self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
'%s (%s)' % (user['username'], self._TITLE_MAP[resource])) str_or_none(user.get('id')),
'%s (%s)' % (user['username'], resource.capitalize()))
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
@ -678,7 +678,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
'info_dict': { 'info_dict': {
'id': '286017854', 'id': '286017854',
'title': 'Track station: your-text', 'title': 'Track station: your text',
}, },
'playlist_mincount': 47, 'playlist_mincount': 47,
}] }]
@ -686,19 +686,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
track_name = self._match_id(url) track_name = self._match_id(url)
webpage = self._download_webpage(url, track_name) track = self._download_json(self._resolv_url(url), track_name)
track_id = self._search_regex( track_id = self._search_regex(
r'soundcloud:track-stations:(\d+)', webpage, 'track id') r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
return self._extract_playlist( return self._extract_playlist(
'%s/stations/soundcloud:track-stations:%s/tracks' self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
% (self._API_V2_BASE, track_id), track_id, 'Track station: %s' % track['title'])
track_id, 'Track station: %s' % track_name)
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist' IE_NAME = 'soundcloud:playlist'
_TESTS = [{ _TESTS = [{
'url': 'https://api.soundcloud.com/playlists/4110309', 'url': 'https://api.soundcloud.com/playlists/4110309',
@ -713,29 +711,22 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
data_dict = { query = {
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
} }
token = mobj.group('token') token = mobj.group('token')
if token: if token:
data_dict['secret_token'] = token query['secret_token'] = token
data = compat_urllib_parse_urlencode(data_dict)
data = self._download_json( data = self._download_json(
base_url + data, playlist_id, 'Downloading playlist') self._API_V2_BASE + 'playlists/' + playlist_id,
playlist_id, 'Downloading playlist', query=query)
entries = self._extract_track_entries(data['tracks']) entries = self._extract_track_entries(data['tracks'], token)
return { return self.playlist_result(
'_type': 'playlist', entries, playlist_id, data.get('title'), data.get('description'))
'id': playlist_id,
'title': data.get('title'),
'description': data.get('description'),
'entries': entries,
}
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
@ -753,18 +744,18 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
_SEARCH_KEY = 'scsearch' _SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200 _MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50 _DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query): def _get_collection(self, endpoint, collection_id, **query):
limit = min( limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE) self._MAX_RESULTS_PER_PAGE)
query['limit'] = limit query.update({
query['client_id'] = self._CLIENT_ID 'limit': limit,
query['linked_partitioning'] = '1' 'client_id': self._CLIENT_ID,
query['offset'] = 0 'linked_partitioning': 1,
data = compat_urllib_parse_urlencode(query) 'offset': 0,
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) })
next_url = update_url_query(self._API_V2_BASE + endpoint, query)
collected_results = 0 collected_results = 0
@ -791,5 +782,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
break break
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
tracks = self._get_collection('/search/tracks', query, limit=n, q=query) tracks = self._get_collection('search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query) return self.playlist_result(tracks, playlist_title=query)

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_age_limit,
parse_iso8601,
smuggle_url,
)
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/[^/]+/episodes/[^/]+/[^/]+/(?P<id>tpv\d{6}[a-z]{5})'
_TEST = {
'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
'info_dict': {
'id': '6060533435001',
'ext': 'mp4',
'title': 'MasterChef - S1 Ep. 1',
'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c',
'age_limit': 10,
'timestamp': 1240828200,
'upload_date': '20090427',
'uploader_id': '2199827728001',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
}
}
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
def _real_extract(self, url):
content_id = self._match_id(url)
data = self._download_json(
'https://10play.com.au/api/video/' + content_id, content_id)
video = data.get('video') or {}
metadata = data.get('metaData') or {}
brightcove_id = video.get('videoId') or metadata['showContentVideoId']
brightcove_url = smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': ['AU']})
return {
'_type': 'url_transparent',
'url': brightcove_url,
'id': content_id,
'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
'description': video.get('description'),
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
'series': metadata.get('showName'),
'season': metadata.get('showContentSeason'),
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
'ie_key': 'BrightcoveNew',
}

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE from ..utils import extract_attributes
class TheSunIE(InfoExtractor): class TheSunIE(InfoExtractor):
@ -16,6 +16,7 @@ class TheSunIE(InfoExtractor):
}, },
'playlist_count': 2, 'playlist_count': 2,
} }
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
article_id = self._match_id(url) article_id = self._match_id(url)
@ -23,10 +24,15 @@ class TheSunIE(InfoExtractor):
webpage = self._download_webpage(url, article_id) webpage = self._download_webpage(url, article_id)
entries = [] entries = []
for ooyala_id in re.findall( for video in re.findall(
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)', r'<video[^>]+data-video-id-pending=[^>]+>',
webpage): webpage):
entries.append(OoyalaIE._build_url_result(ooyala_id)) attrs = extract_attributes(video)
video_id = attrs['data-video-id-pending']
account_id = attrs.get('data-account', '5067014667001')
entries.append(self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id),
'BrightcoveNew', video_id))
return self.playlist_result( return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False)) entries, article_id, self._og_search_title(webpage, fatal=False))

View File

@ -1,36 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_parse_qs,
)
class TutvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
'url': 'http://tu.tv/videos/robots-futbolistas',
'md5': '0cd9e28ad270488911b0d2a72323395d',
'info_dict': {
'id': '2973058',
'ext': 'mp4',
'title': 'Robots futbolistas',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
data_content = self._download_webpage(
'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
return {
'id': internal_id,
'url': video_url,
'title': self._og_search_title(webpage),
}

View File

@ -11,6 +11,7 @@ from ..utils import (
js_to_json, js_to_json,
parse_iso8601, parse_iso8601,
remove_end, remove_end,
try_get,
) )
@ -44,7 +45,14 @@ class TV2IE(InfoExtractor):
data = self._download_json( data = self._download_json(
'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
video_id, 'Downloading play JSON')['playback'] video_id, 'Downloading play JSON')['playback']
for item in data['items']['item']: items = try_get(data, lambda x: x['items']['item'])
if not items:
continue
if not isinstance(items, list):
items = [items]
for item in items:
if not isinstance(item, dict):
continue
video_url = item.get('url') video_url = item.get('url')
if not video_url or video_url in format_urls: if not video_url or video_url in format_urls:
continue continue

View File

@ -0,0 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import extract_attributes
class TV2DKIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
tvsyd|
tv2ostjylland|
tvmidtvest|
tv2fyn|
tv2east|
tv2lorry|
tv2nord
)\.dk/
(:[^/]+/)*
(?P<id>[^/?\#&]+)
'''
_TESTS = [{
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
'info_dict': {
'id': '0_52jmwa0p',
'ext': 'mp4',
'title': '19:30 - 28. okt. 2019',
'timestamp': 1572290248,
'upload_date': '20191028',
'uploader_id': 'tvsyd',
'duration': 1347,
'view_count': int,
},
'params': {
'skip_download': True,
},
'add_ie': ['Kaltura'],
}, {
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
'only_matching': True,
}, {
'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player',
'only_matching': True,
}, {
'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019',
'only_matching': True,
}, {
'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof',
'only_matching': True,
}, {
'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy',
'only_matching': True,
}, {
'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player',
'only_matching': True,
}, {
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entries = []
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
video = extract_attributes(video_el)
kaltura_id = video.get('data-entryid')
if not kaltura_id:
continue
partner_id = video.get('data-partnerid')
if not partner_id:
continue
entries.append(self.url_result(
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
video_id=kaltura_id))
return self.playlist_result(entries)

View File

@ -248,7 +248,7 @@ class TwitchVodIE(TwitchItemBaseIE):
https?:// https?://
(?: (?:
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/| (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
player\.twitch\.tv/\?.*?\bvideo=v player\.twitch\.tv/\?.*?\bvideo=v?
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
@ -306,6 +306,9 @@ class TwitchVodIE(TwitchItemBaseIE):
}, { }, {
'url': 'https://www.twitch.tv/northernlion/video/291940395', 'url': 'https://www.twitch.tv/northernlion/video/291940395',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://player.twitch.tv/?video=480452374',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE
@ -13,7 +12,7 @@ class VideoDetectiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '194487', 'id': '194487',
'ext': 'mp4', 'ext': 'mp4',
'title': 'KICK-ASS 2', 'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
}, },
'params': { 'params': {
@ -24,7 +23,7 @@ class VideoDetectiveIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) query = 'customerid=69249&publishedid=' + video_id
og_video = self._og_search_video_url(webpage) return self.url_result(
query = compat_urlparse.urlparse(og_video).query InternetVideoArchiveIE._build_json_url(query),
return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key()) ie=InternetVideoArchiveIE.ie_key())

View File

@ -1,217 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse_unquote,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_iso8601,
sanitized_Request,
HEADRequest,
url_basename,
)
class ViewsterIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
_TESTS = [{
# movie, Type=Movie
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
'info_dict': {
'id': '1140-11855-000',
'ext': 'mp4',
'title': 'The listening Project',
'description': 'md5:bac720244afd1a8ea279864e67baa071',
'timestamp': 1214870400,
'upload_date': '20080701',
'duration': 4680,
},
}, {
# series episode, Type=Episode
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
'md5': '9243079a8531809efe1b089db102c069',
'info_dict': {
'id': '1284-19427-001',
'ext': 'mp4',
'title': 'The World and a Wall',
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
'timestamp': 1428192000,
'upload_date': '20150405',
'duration': 1500,
},
}, {
# serie, Type=Serie
'url': 'http://www.viewster.com/serie/1303-19426-000/',
'info_dict': {
'id': '1303-19426-000',
'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
},
'playlist_count': 13,
}, {
# unfinished serie, no Type
'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
'info_dict': {
'id': '1284-19427-000',
'title': 'Baby Steps—Season 2',
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
},
'playlist_mincount': 16,
}, {
# geo restricted series
'url': 'https://www.viewster.com/serie/1280-18794-002/',
'only_matching': True,
}, {
# geo restricted video
'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
'only_matching': True,
}]
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
request = sanitized_Request(url)
request.add_header('Accept', self._ACCEPT_HEADER)
request.add_header('Auth-token', self._AUTH_TOKEN)
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
def _real_extract(self, url):
video_id = self._match_id(url)
# Get 'api_token' cookie
self._request_webpage(
HEADRequest('http://www.viewster.com/'),
video_id, headers=self.geo_verification_headers())
cookies = self._get_cookies('http://www.viewster.com/')
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
info = self._download_json(
'https://public-api.viewster.com/search/%s' % video_id,
video_id, 'Downloading entry JSON')
entry_id = info.get('Id') or info['id']
# unfinished serie has no Type
if info.get('Type') in ('Serie', None):
try:
episodes = self._download_json(
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
video_id, 'Downloading series JSON')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
self.raise_geo_restricted()
else:
raise
entries = [
self.url_result(
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
for episode in episodes]
title = (info.get('Title') or info['Synopsis']['Title']).strip()
description = info.get('Synopsis', {}).get('Detailed')
return self.playlist_result(entries, video_id, title, description)
formats = []
for language_set in info.get('LanguageSets', []):
manifest_url = None
m3u8_formats = []
audio = language_set.get('Audio') or ''
subtitle = language_set.get('Subtitle') or ''
base_format_id = audio
if subtitle:
base_format_id += '-%s' % subtitle
def concat(suffix, sep='-'):
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
medias = self._download_json(
'https://public-api.viewster.com/movies/%s/videos' % entry_id,
video_id, fatal=False, query={
'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
'language': audio,
'subtitle': subtitle,
})
if not medias:
continue
for media in medias:
video_url = media.get('Uri')
if not video_url:
continue
ext = determine_ext(video_url)
if ext == 'f4m':
manifest_url = video_url
video_url += '&' if '?' in video_url else '?'
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=concat('hds')))
elif ext == 'm3u8':
manifest_url = video_url
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id=concat('hls'),
fatal=False) # m3u8 sometimes fail
if m3u8_formats:
formats.extend(m3u8_formats)
else:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',
manifest_url, 'qualities basename', default=None)
if not qualities_basename:
continue
QUALITIES_RE = r'((,\d+k)+,?)'
qualities = self._search_regex(
QUALITIES_RE, qualities_basename,
'qualities', default=None)
if not qualities:
continue
qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
qualities.sort()
http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
http_url_basename = url_basename(video_url)
if m3u8_formats:
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none', m3u8_formats))
if len(qualities) == len(m3u8_formats):
for q, m3u8_format in zip(qualities, m3u8_formats):
f = m3u8_format.copy()
f.update({
'url': video_url.replace(http_url_basename, http_template % q),
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
for q in qualities:
formats.append({
'url': video_url.replace(http_url_basename, http_template % q),
'ext': 'mp4',
'format_id': 'http-%d' % q,
'tbr': q,
})
if not formats and not info.get('VODSettings'):
self.raise_geo_restricted()
self._sort_formats(formats)
synopsis = info.get('Synopsis') or {}
# Prefer title outside synopsis since it's less messy
title = (info.get('Title') or synopsis['Title']).strip()
description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
duration = int_or_none(info.get('Duration'))
timestamp = parse_iso8601(info.get('ReleaseDate'))
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}

View File

@ -23,7 +23,6 @@ from ..utils import (
NO_DEFAULT, NO_DEFAULT,
OnDemandPagedList, OnDemandPagedList,
parse_filesize, parse_filesize,
qualities,
RegexNotFoundError, RegexNotFoundError,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
@ -211,6 +210,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
video_uploader_url = owner.get('url') video_uploader_url = owner.get('url')
return { return {
'id': video_id,
'title': self._live_title(video_title) if is_live else video_title, 'title': self._live_title(video_title) if is_live else video_title,
'uploader': owner.get('name'), 'uploader': owner.get('name'),
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
@ -730,7 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
info_dict = { info_dict = {
'id': video_id,
'formats': formats, 'formats': formats,
'timestamp': unified_timestamp(timestamp), 'timestamp': unified_timestamp(timestamp),
'description': video_description, 'description': video_description,
@ -939,7 +938,7 @@ class VimeoAlbumIE(VimeoChannelIE):
def _fetch_page(self, album_id, authorizaion, hashed_pass, page): def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
api_page = page + 1 api_page = page + 1
query = { query = {
'fields': 'link', 'fields': 'link,uri',
'page': api_page, 'page': api_page,
'per_page': self._PAGE_SIZE, 'per_page': self._PAGE_SIZE,
} }
@ -954,7 +953,9 @@ class VimeoAlbumIE(VimeoChannelIE):
link = video.get('link') link = video.get('link')
if not link: if not link:
continue continue
yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link)) uri = video.get('uri')
video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
yield self.url_result(link, VimeoIE.ie_key(), video_id)
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) album_id = self._match_id(url)
@ -1061,7 +1062,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
if source_format: if source_format:
info_dict['formats'].append(source_format) info_dict['formats'].append(source_format)
self._vimeo_sort_formats(info_dict['formats']) self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id
return info_dict return info_dict
@ -1115,7 +1115,7 @@ class VimeoLikesIE(VimeoChannelIE):
return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id) return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
class VHXEmbedIE(InfoExtractor): class VHXEmbedIE(VimeoBaseInfoExtractor):
IE_NAME = 'vhx:embed' IE_NAME = 'vhx:embed'
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
@ -1152,81 +1152,10 @@ class VHXEmbedIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
credentials = self._parse_json(self._search_regex( config_url = self._parse_json(self._search_regex(
r'(?s)credentials\s*:\s*({.+?}),', webpage, r'window\.OTTData\s*=\s*({.+})', webpage,
'config'), video_id, js_to_json) 'ott data'), video_id, js_to_json)['config_url']
access_token = credentials['access_token'] config = self._download_json(config_url, video_id)
info = self._parse_config(config, video_id)
query = {} self._vimeo_sort_formats(info['formats'])
for k, v in credentials.items(): return info
if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined':
if k == 'authUserToken':
query['auth_user_token'] = v
else:
query[k] = v
files = self._call_api(video_id, access_token, '/files', query)
formats = []
for f in files:
href = try_get(f, lambda x: x['_links']['source']['href'])
if not href:
continue
method = f.get('method')
if method == 'hls':
formats.extend(self._extract_m3u8_formats(
href, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif method == 'dash':
formats.extend(self._extract_mpd_formats(
href, video_id, mpd_id='dash', fatal=False))
else:
fmt = {
'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])),
'format_id': 'http',
'preference': 1,
'url': href,
'vcodec': f.get('codec'),
}
quality = f.get('quality')
if quality:
fmt.update({
'format_id': 'http-' + quality,
'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)),
})
formats.append(fmt)
self._sort_formats(formats)
video_data = self._call_api(video_id, access_token)
title = video_data.get('title') or video_data['name']
subtitles = {}
for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []:
lang = subtitle.get('srclang') or subtitle.get('label')
for _link in subtitle.get('_links', {}).values():
href = _link.get('href')
if not href:
continue
subtitles.setdefault(lang, []).append({
'url': href,
})
q = qualities(['small', 'medium', 'large', 'source'])
thumbnails = []
for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items():
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'preference': q(thumbnail_id),
})
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'timestamp': unified_timestamp(video_data.get('created_at')),
'view_count': int_or_none(video_data.get('plays_count')),
}

View File

@ -12,7 +12,6 @@ from ..utils import (
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
orderedSet, orderedSet,
remove_start,
str_or_none, str_or_none,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
@ -21,6 +20,7 @@ from ..utils import (
urlencode_postdata, urlencode_postdata,
) )
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE from .pladform import PladformIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .youtube import YoutubeIE from .youtube import YoutubeIE
@ -60,6 +60,18 @@ class VKBaseIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _download_payload(self, path, video_id, data, fatal=True):
data['al'] = 1
code, payload = self._download_json(
'https://vk.com/%s.php' % path, video_id,
data=urlencode_postdata(data), fatal=fatal,
headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
if code == '3':
self.raise_login_required()
elif code == '8':
raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
return payload
class VKIE(VKBaseIE): class VKIE(VKBaseIE):
IE_NAME = 'vk' IE_NAME = 'vk'
@ -96,7 +108,6 @@ class VKIE(VKBaseIE):
}, },
{ {
'url': 'http://vk.com/video205387401_165548505', 'url': 'http://vk.com/video205387401_165548505',
'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': { 'info_dict': {
'id': '205387401_165548505', 'id': '205387401_165548505',
'ext': 'mp4', 'ext': 'mp4',
@ -110,18 +121,18 @@ class VKIE(VKBaseIE):
}, },
{ {
'note': 'Embedded video', 'note': 'Embedded video',
'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': { 'info_dict': {
'id': '32194266_162925554', 'id': '-77521_162222515',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Vladimir Gavrin', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'title': 'Lin Dan', 'title': 'ProtivoGunz - Хуёвая песня',
'duration': 101, 'duration': 195,
'upload_date': '20120730', 'upload_date': '20120212',
'view_count': int, 'timestamp': 1329049880,
'uploader_id': '-77521',
}, },
'skip': 'This video has been removed from public access.',
}, },
{ {
# VIDEO NOW REMOVED # VIDEO NOW REMOVED
@ -138,18 +149,19 @@ class VKIE(VKBaseIE):
'upload_date': '20121218', 'upload_date': '20121218',
'view_count': int, 'view_count': int,
}, },
'skip': 'Requires vk account credentials', 'skip': 'Removed',
}, },
{ {
'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
'md5': '4d7a5ef8cf114dfa09577e57b2993202',
'info_dict': { 'info_dict': {
'id': '-43215063_168067957', 'id': '-43215063_168067957',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино', 'uploader': 'Bro Mazter',
'title': ' ', 'title': ' ',
'duration': 7291, 'duration': 7291,
'upload_date': '20140328', 'upload_date': '20140328',
'uploader_id': '223413403',
'timestamp': 1396018030,
}, },
'skip': 'Requires vk account credentials', 'skip': 'Requires vk account credentials',
}, },
@ -165,7 +177,7 @@ class VKIE(VKBaseIE):
'upload_date': '20140626', 'upload_date': '20140626',
'view_count': int, 'view_count': int,
}, },
'skip': 'Only works from Russia', 'skip': 'Removed',
}, },
{ {
# video (removed?) only available with list id # video (removed?) only available with list id
@ -247,6 +259,9 @@ class VKIE(VKBaseIE):
'uploader_id': '-387766', 'uploader_id': '-387766',
'timestamp': 1475137527, 'timestamp': 1475137527,
}, },
'params': {
'skip_download': True,
},
}, },
{ {
# live stream, hls and rtmp links, most likely already finished live # live stream, hls and rtmp links, most likely already finished live
@ -288,80 +303,94 @@ class VKIE(VKBaseIE):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
mv_data = {}
if video_id: if video_id:
info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id data = {
'act': 'show_inline',
'video': video_id,
}
# Some videos (removed?) can only be downloaded with list id specified # Some videos (removed?) can only be downloaded with list id specified
list_id = mobj.group('list_id') list_id = mobj.group('list_id')
if list_id: if list_id:
info_url += '&list=%s' % list_id data['list'] = list_id
payload = self._download_payload('al_video', video_id, data)
info_page = payload[1]
opts = payload[-1]
mv_data = opts.get('mvData') or {}
player = opts.get('player') or {}
else: else:
info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query')
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
info_page = self._download_webpage(info_url, video_id) info_page = self._download_webpage(
'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
error_message = self._html_search_regex( error_message = self._html_search_regex(
[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'], r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
info_page, 'error message', default=None) info_page, 'error message', default=None)
if error_message: if error_message:
raise ExtractorError(error_message, expected=True) raise ExtractorError(error_message, expected=True)
if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
raise ExtractorError( raise ExtractorError(
'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
expected=True) expected=True)
ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.' ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
ERRORS = { ERRORS = {
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
ERROR_COPYRIGHT, ERROR_COPYRIGHT,
r'>The video .*? was removed from public access by request of the copyright holder.<': r'>The video .*? was removed from public access by request of the copyright holder.<':
ERROR_COPYRIGHT, ERROR_COPYRIGHT,
r'<!>Please log in or <': r'<!>Please log in or <':
'Video %s is only available for registered users, ' 'Video %s is only available for registered users, '
'use --username and --password options to provide account credentials.', 'use --username and --password options to provide account credentials.',
r'<!>Unknown error': r'<!>Unknown error':
'Video %s does not exist.', 'Video %s does not exist.',
r'<!>Видео временно недоступно': r'<!>Видео временно недоступно':
'Video %s is temporarily unavailable.', 'Video %s is temporarily unavailable.',
r'<!>Access denied': r'<!>Access denied':
'Access denied to video %s.', 'Access denied to video %s.',
r'<!>Видеозапись недоступна, так как её автор был заблокирован.': r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
'Video %s is no longer available, because its author has been blocked.', 'Video %s is no longer available, because its author has been blocked.',
r'<!>This video is no longer available, because its author has been blocked.': r'<!>This video is no longer available, because its author has been blocked.':
'Video %s is no longer available, because its author has been blocked.', 'Video %s is no longer available, because its author has been blocked.',
r'<!>This video is no longer available, because it has been deleted.': r'<!>This video is no longer available, because it has been deleted.':
'Video %s is no longer available, because it has been deleted.', 'Video %s is no longer available, because it has been deleted.',
r'<!>The video .+? is not available in your region.': r'<!>The video .+? is not available in your region.':
'Video %s is not available in your region.', 'Video %s is not available in your region.',
} }
for error_re, error_msg in ERRORS.items(): for error_re, error_msg in ERRORS.items():
if re.search(error_re, info_page): if re.search(error_re, info_page):
raise ExtractorError(error_msg % video_id, expected=True) raise ExtractorError(error_msg % video_id, expected=True)
player = self._parse_json(self._search_regex(
r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
info_page, 'player params'), video_id)
youtube_url = YoutubeIE._extract_url(info_page) youtube_url = YoutubeIE._extract_url(info_page)
if youtube_url: if youtube_url:
return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) return self.url_result(youtube_url, YoutubeIE.ie_key())
vimeo_url = VimeoIE._extract_url(url, info_page) vimeo_url = VimeoIE._extract_url(url, info_page)
if vimeo_url is not None: if vimeo_url is not None:
return self.url_result(vimeo_url) return self.url_result(vimeo_url, VimeoIE.ie_key())
pladform_url = PladformIE._extract_url(info_page) pladform_url = PladformIE._extract_url(info_page)
if pladform_url: if pladform_url:
return self.url_result(pladform_url) return self.url_result(pladform_url, PladformIE.ie_key())
m_rutube = re.search( m_rutube = re.search(
r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page) r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
@ -374,6 +403,10 @@ class VKIE(VKBaseIE):
if dailymotion_urls: if dailymotion_urls:
return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key()) return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts: if m_opts:
m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
@ -383,38 +416,7 @@ class VKIE(VKBaseIE):
opts_url = 'http:' + opts_url opts_url = 'http:' + opts_url
return self.url_result(opts_url) return self.url_result(opts_url)
# vars does not look to be served anymore since 24.10.2016 data = player['params'][0]
data = self._parse_json(
self._search_regex(
r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
video_id, fatal=False)
# <!json> is served instead
if not data:
data = self._parse_json(
self._search_regex(
[r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
info_page, 'json', default='{}'),
video_id)
if data:
data = data['player']['params'][0]
if not data:
data = self._parse_json(
self._search_regex(
r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
'player params', default='{}'),
video_id)
if data:
data = data['params'][0]
# <!--{...}
if not data:
data = self._parse_json(
self._search_regex(
r'<!--\s*({.+})', info_page, 'payload'),
video_id)['payload'][-1][-1]['player']['params'][0]
title = unescapeHTML(data['md_title']) title = unescapeHTML(data['md_title'])
# 2 = live # 2 = live
@ -463,12 +465,12 @@ class VKIE(VKBaseIE):
'title': title, 'title': title,
'thumbnail': data.get('jpg'), 'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'), 'uploader': data.get('md_author'),
'uploader_id': str_or_none(data.get('author_id')), 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
'duration': data.get('duration'), 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
'timestamp': timestamp, 'timestamp': timestamp,
'view_count': view_count, 'view_count': view_count,
'like_count': int_or_none(data.get('liked')), 'like_count': int_or_none(mv_data.get('likes')),
'dislike_count': int_or_none(data.get('nolikes')), 'comment_count': int_or_none(mv_data.get('commcount')),
'is_live': is_live, 'is_live': is_live,
} }
@ -482,7 +484,6 @@ class VKUserVideosIE(VKBaseIE):
'url': 'http://vk.com/videos205387401', 'url': 'http://vk.com/videos205387401',
'info_dict': { 'info_dict': {
'id': '205387401', 'id': '205387401',
'title': "Tom Cruise's Videos",
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, { }, {
@ -498,22 +499,25 @@ class VKUserVideosIE(VKBaseIE):
'url': 'http://new.vk.com/videos205387401', 'url': 'http://new.vk.com/videos205387401',
'only_matching': True, 'only_matching': True,
}] }]
_VIDEO = collections.namedtuple(
'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta'])
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) l = self._download_payload('al_video', page_id, {
'act': 'load_videos_silent',
'oid': page_id,
})[0]['']['list']
entries = [ entries = []
self.url_result( for video in l:
'http://vk.com/video' + video_id, 'VK', video_id=video_id) v = self._VIDEO._make(video)
for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))] video_id = '%d_%d' % (v.owner_id, v.id)
entries.append(self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id))
title = unescapeHTML(self._search_regex( return self.playlist_result(entries, page_id)
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
webpage, 'title', default=page_id))
return self.playlist_result(entries, page_id, title)
class VKWallPostIE(VKBaseIE): class VKWallPostIE(VKBaseIE):
@ -523,15 +527,15 @@ class VKWallPostIE(VKBaseIE):
# public page URL, audio playlist # public page URL, audio playlist
'url': 'https://vk.com/bs.official?w=wall-23538238_35', 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
'info_dict': { 'info_dict': {
'id': '23538238_35', 'id': '-23538238_35',
'title': 'Black Shadow - Wall post 23538238_35', 'title': 'Black Shadow - Wall post -23538238_35',
'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
}, },
'playlist': [{ 'playlist': [{
'md5': '5ba93864ec5b85f7ce19a9af4af080f6', 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
'info_dict': { 'info_dict': {
'id': '135220665_111806521', 'id': '135220665_111806521',
'ext': 'mp3', 'ext': 'mp4',
'title': 'Black Shadow - Слепое Верование', 'title': 'Black Shadow - Слепое Верование',
'duration': 370, 'duration': 370,
'uploader': 'Black Shadow', 'uploader': 'Black Shadow',
@ -542,18 +546,16 @@ class VKWallPostIE(VKBaseIE):
'md5': '4cc7e804579122b17ea95af7834c9233', 'md5': '4cc7e804579122b17ea95af7834c9233',
'info_dict': { 'info_dict': {
'id': '135220665_111802303', 'id': '135220665_111802303',
'ext': 'mp3', 'ext': 'mp4',
'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
'duration': 423, 'duration': 423,
'uploader': 'Black Shadow', 'uploader': 'Black Shadow',
'artist': 'Black Shadow', 'artist': 'Black Shadow',
'track': 'Война - Негасимое Бездны Пламя!', 'track': 'Война - Негасимое Бездны Пламя!',
}, },
'params': {
'skip_download': True,
},
}], }],
'params': { 'params': {
'skip_download': True,
'usenetrc': True, 'usenetrc': True,
}, },
'skip': 'Requires vk account credentials', 'skip': 'Requires vk account credentials',
@ -562,7 +564,7 @@ class VKWallPostIE(VKBaseIE):
'url': 'https://vk.com/wall85155021_6319', 'url': 'https://vk.com/wall85155021_6319',
'info_dict': { 'info_dict': {
'id': '85155021_6319', 'id': '85155021_6319',
'title': 'Sergey Gorbunov - Wall post 85155021_6319', 'title': 'Сергей Горбунов - Wall post 85155021_6319',
}, },
'playlist_count': 1, 'playlist_count': 1,
'params': { 'params': {
@ -578,58 +580,72 @@ class VKWallPostIE(VKBaseIE):
'url': 'https://m.vk.com/wall-23538238_35', 'url': 'https://m.vk.com/wall-23538238_35',
'only_matching': True, 'only_matching': True,
}] }]
_BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
_AUDIO = collections.namedtuple(
'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key'])
def _decode(self, enc):
dec = ''
e = n = 0
for c in enc:
r = self._BASE64_CHARS.index(c)
cond = n % 4
e = 64 * e + r if cond else r
n += 1
if cond:
dec += chr(255 & e >> (-2 * n & 6))
return dec
def _unmask_url(self, mask_url, vk_id):
if 'audio_api_unavailable' in mask_url:
extra = mask_url.split('?extra=')[1].split('#')
func, base = self._decode(extra[1]).split(chr(11))
mask_url = list(self._decode(extra[0]))
url_len = len(mask_url)
indexes = [None] * url_len
index = int(base) ^ vk_id
for n in range(url_len - 1, -1, -1):
index = (url_len * (n + 1) ^ index + n) % url_len
indexes[n] = index
for n in range(1, url_len):
c = mask_url[n]
index = indexes[url_len - 1 - n]
mask_url[n] = mask_url[index]
mask_url[index] = c
mask_url = ''.join(mask_url)
return mask_url
def _real_extract(self, url): def _real_extract(self, url):
post_id = self._match_id(url) post_id = self._match_id(url)
wall_url = 'https://vk.com/wall%s' % post_id webpage = self._download_payload('wkview', post_id, {
'act': 'show',
post_id = remove_start(post_id, '-') 'w': 'wall' + post_id,
})[1]
webpage = self._download_webpage(wall_url, post_id)
error = self._html_search_regex(
r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)',
webpage, 'error', default=None)
if error:
raise ExtractorError('VK said: %s' % error, expected=True)
description = clean_html(get_element_by_class('wall_post_text', webpage)) description = clean_html(get_element_by_class('wall_post_text', webpage))
uploader = clean_html(get_element_by_class('author', webpage)) uploader = clean_html(get_element_by_class('author', webpage))
thumbnail = self._og_search_thumbnail(webpage)
entries = [] entries = []
audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) for audio in re.findall(r'data-audio="([^"]+)', webpage):
if audio_ids: audio = self._parse_json(unescapeHTML(audio), post_id)
al_audio = self._download_webpage( a = self._AUDIO._make(audio)
'https://vk.com/al_audio.php', post_id, if not a.url:
note='Downloading audio info', fatal=False, continue
data=urlencode_postdata({ title = unescapeHTML(a.title)
'act': 'reload_audio', entries.append({
'al': '1', 'id': '%s_%s' % (a.owner_id, a.id),
'ids': ','.join(audio_ids) 'url': self._unmask_url(a.url, a.ads['vk_id']),
})) 'title': '%s - %s' % (a.performer, title) if a.performer else title,
if al_audio: 'thumbnail': a.cover_url.split(',') if a.cover_url else None,
Audio = collections.namedtuple( 'duration': a.duration,
'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) 'uploader': uploader,
audios = self._parse_json( 'artist': a.performer,
self._search_regex( 'track': title,
r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'), 'ext': 'mp4',
post_id, fatal=False, transform_source=unescapeHTML) 'protocol': 'm3u8',
if isinstance(audios, list): })
for audio in audios:
a = Audio._make(audio[:6])
entries.append({
'id': '%s_%s' % (a.user_id, a.id),
'url': a.url,
'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
'thumbnail': thumbnail,
'duration': a.duration,
'uploader': uploader,
'artist': a.artist,
'track': a.track,
})
for video in re.finditer( for video in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):

View File

@ -12,7 +12,7 @@ from ..utils import (
class WistiaIE(InfoExtractor): class WistiaIE(InfoExtractor):
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)' _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
@ -43,25 +43,26 @@ class WistiaIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
# https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
match = re.search( match = re.search(
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage)
if match: if match:
return unescapeHTML(match.group('url')) return unescapeHTML(match.group('url'))
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return 'wistia:%s' % match.group('id')
match = re.search( match = re.search(
r'''(?sx) r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
''', webpage) ''', webpage)
if match: if match:
return 'wistia:%s' % match.group('id') return 'wistia:%s' % match.group('id')
match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage)
if match:
return 'wistia:%s' % match.group('id')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -4,37 +4,64 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import ( from ..utils import (
decode_packed_codes, decode_packed_codes,
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
NO_DEFAULT, js_to_json,
urlencode_postdata, urlencode_postdata,
) )
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
def aa_decode(aa_code):
symbol_table = [
('7', '((゚ー゚) + (o^_^o))'),
('6', '((o^_^o) +(o^_^o))'),
('5', '((゚ー゚) + (゚Θ゚))'),
('2', '((o^_^o) - (゚Θ゚))'),
('4', '(゚ー゚)'),
('3', '(o^_^o)'),
('1', '(゚Θ゚)'),
('0', '(c^_^o)'),
]
delim = '(゚Д゚)[゚ε゚]+'
ret = ''
for aa_char in aa_code.split(delim):
for val, pat in symbol_table:
aa_char = aa_char.replace(pat, val)
aa_char = aa_char.replace('+ ', '')
m = re.match(r'^\d+', aa_char)
if m:
ret += compat_chr(int(m.group(0), 8))
else:
m = re.match(r'^u([\da-f]+)', aa_char)
if m:
ret += compat_chr(int(m.group(1), 16))
return ret
class XFileShareIE(InfoExtractor): class XFileShareIE(InfoExtractor):
_SITES = ( _SITES = (
(r'daclips\.(?:in|com)', 'DaClips'), (r'clipwatching\.com', 'ClipWatching'),
(r'filehoot\.com', 'FileHoot'), (r'gounlimited\.to', 'GoUnlimited'),
(r'gorillavid\.(?:in|com)', 'GorillaVid'), (r'govid\.me', 'GoVid'),
(r'movpod\.in', 'MovPod'), (r'holavid\.com', 'HolaVid'),
(r'powerwatch\.pw', 'PowerWatch'), (r'streamty\.com', 'Streamty'),
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
(r'thevideobee\.to', 'TheVideoBee'), (r'thevideobee\.to', 'TheVideoBee'),
(r'vidto\.(?:me|se)', 'Vidto'), (r'uqload\.com', 'Uqload'),
(r'streamin\.to', 'Streamin.To'),
(r'xvidstage\.com', 'XVIDSTAGE'),
(r'vidabc\.com', 'Vid ABC'),
(r'vidbom\.com', 'VidBom'), (r'vidbom\.com', 'VidBom'),
(r'vidlo\.us', 'vidlo'), (r'vidlo\.us', 'vidlo'),
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'), (r'vidlocker\.xyz', 'VidLocker'),
(r'fastvideo\.me', 'FastVideo.me'), (r'vidshare\.tv', 'VidShare'),
(r'vup\.to', 'VUp'),
(r'xvideosharing\.com', 'XVideoSharing'),
) )
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
_VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
% '|'.join(site for site in list(zip(*_SITES))[0])) % '|'.join(site for site in list(zip(*_SITES))[0]))
_FILE_NOT_FOUND_REGEXES = ( _FILE_NOT_FOUND_REGEXES = (
@ -43,82 +70,14 @@ class XFileShareIE(InfoExtractor):
) )
_TESTS = [{ _TESTS = [{
'url': 'http://gorillavid.in/06y9juieqpmi', 'url': 'http://xvideosharing.com/fq65f94nd2ve',
'md5': '5ae4a3580620380619678ee4875893ba', 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
'info_dict': { 'info_dict': {
'id': '06y9juieqpmi', 'id': 'fq65f94nd2ve',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', 'title': 'sample',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:http://.*\.jpg',
}, },
}, {
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
'only_matching': True,
}, {
'url': 'http://daclips.in/3rso4kdn6f9m',
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
'info_dict': {
'id': '3rso4kdn6f9m',
'ext': 'mp4',
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
'thumbnail': r're:http://.*\.jpg',
}
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,
}, {
'url': 'http://filehoot.com/3ivfabn7573c.html',
'info_dict': {
'id': '3ivfabn7573c',
'ext': 'mp4',
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
'thumbnail': r're:http://.*\.jpg',
},
'skip': 'Video removed',
}, {
'url': 'http://vidto.me/ku5glz52nqe1.html',
'info_dict': {
'id': 'ku5glz52nqe1',
'ext': 'mp4',
'title': 'test'
}
}, {
'url': 'http://powerwatch.pw/duecjibvicbu',
'info_dict': {
'id': 'duecjibvicbu',
'ext': 'mp4',
'title': 'Big Buck Bunny trailer',
},
}, {
'url': 'http://xvidstage.com/e0qcnl03co6z',
'info_dict': {
'id': 'e0qcnl03co6z',
'ext': 'mp4',
'title': 'Chucky Prank 2015.mp4',
},
}, {
# removed by administrator
'url': 'http://xvidstage.com/amfy7atlkx25',
'only_matching': True,
}, {
'url': 'http://vidabc.com/i8ybqscrphfv',
'info_dict': {
'id': 'i8ybqscrphfv',
'ext': 'mp4',
'title': 're:Beauty and the Beast 2017',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
'only_matching': True,
}, {
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
'only_matching': True,
}, {
'url': 'http://vidto.se/1tx1pf6t12cg.html',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -131,10 +90,9 @@ class XFileShareIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) host, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
url = 'http://%s/%s' % (mobj.group('host'), video_id) url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
@ -142,7 +100,7 @@ class XFileShareIE(InfoExtractor):
fields = self._hidden_inputs(webpage) fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1': if fields.get('op') == 'download1':
countdown = int_or_none(self._search_regex( countdown = int_or_none(self._search_regex(
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>', r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
webpage, 'countdown', default=None)) webpage, 'countdown', default=None))
@ -160,13 +118,37 @@ class XFileShareIE(InfoExtractor):
(r'style="z-index: [0-9]+;">([^<]+)</span>', (r'style="z-index: [0-9]+;">([^<]+)</span>',
r'<td nowrap>([^<]+)</td>', r'<td nowrap>([^<]+)</td>',
r'h4-fine[^>]*>([^<]+)<', r'h4-fine[^>]*>([^<]+)<',
r'>Watch (.+) ', r'>Watch (.+)[ <]',
r'<h2 class="video-page-head">([^<]+)</h2>', r'<h2 class="video-page-head">([^<]+)</h2>',
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
r'title\s*:\s*"([^"]+)"'), # govid.me
webpage, 'title', default=None) or self._og_search_title( webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or video_id).strip() webpage, default=None) or video_id).strip()
def extract_formats(default=NO_DEFAULT): for regex, func in (
(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
(r'(゚.+)', aa_decode)):
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
if obf_code:
webpage = webpage.replace(obf_code, func(obf_code))
formats = []
jwplayer_data = self._search_regex(
[
r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
], webpage,
'jwplayer data', default=None)
if jwplayer_data:
jwplayer_data = self._parse_json(
jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
if jwplayer_data:
formats = self._parse_jwplayer_data(
jwplayer_data, video_id, False,
m3u8_id='hls', mpd_id='dash')['formats']
if not formats:
urls = [] urls = []
for regex in ( for regex in (
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
@ -177,6 +159,12 @@ class XFileShareIE(InfoExtractor):
video_url = mobj.group('url') video_url = mobj.group('url')
if video_url not in urls: if video_url not in urls:
urls.append(video_url) urls.append(video_url)
sources = self._search_regex(
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
if sources:
urls.extend(self._parse_json(sources, video_id))
formats = [] formats = []
for video_url in urls: for video_url in urls:
if determine_ext(video_url) == 'm3u8': if determine_ext(video_url) == 'm3u8':
@ -189,21 +177,13 @@ class XFileShareIE(InfoExtractor):
'url': video_url, 'url': video_url,
'format_id': 'sd', 'format_id': 'sd',
}) })
if not formats and default is not NO_DEFAULT: self._sort_formats(formats)
return default
self._sort_formats(formats)
return formats
formats = extract_formats(default=None)
if not formats:
webpage = decode_packed_codes(self._search_regex(
r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
webpage, 'packed code'))
formats = extract_formats()
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) [
r'<video[^>]+poster="([^"]+)"',
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
], webpage, 'thumbnail', default=None)
return { return {
'id': video_id, 'id': video_id,

View File

@ -3,453 +3,313 @@ from __future__ import unicode_literals
import hashlib import hashlib
import itertools import itertools
import json
import re import re
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext,
ExtractorError,
extract_attributes,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601,
smuggle_url, smuggle_url,
try_get, try_get,
unescapeHTML,
url_or_none, url_or_none,
) )
from .brightcove import ( from .brightcove import BrightcoveNewIE
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .nbc import NBCSportsVPlayerIE
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies' IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'(?P<host>https?://(?:(?P<country>[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?' _VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
_TESTS = [ _TESTS = [{
{ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'info_dict': {
'info_dict': { 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
'id': '2d25e626-2378-391f-ada0-ddaf1417e588', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
'title': 'Julian Smith & Travis Legg Watch Julian Smith', 'description': 'Julian and Travis watch Julian Smith',
'description': 'Julian and Travis watch Julian Smith', 'duration': 6863,
'duration': 6863, 'timestamp': 1369812016,
}, 'upload_date': '20130529',
}, },
{ }, {
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
'md5': '251af144a19ebc4a033e8ba91ac726bb', 'md5': '7993e572fac98e044588d0b5260f4352',
'info_dict': { 'info_dict': {
'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Codefellas - The Cougar Lies with Spanish Moss', 'title': "Yahoo Saves 'Community'",
'description': 'md5:66b627ab0a282b26352136ca96ce73c1', 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
'duration': 151, 'duration': 170,
}, 'timestamp': 1406838636,
'skip': 'HTTP Error 404', 'upload_date': '20140731',
}, },
{ }, {
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
'md5': '7993e572fac98e044588d0b5260f4352', 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
'info_dict': { 'info_dict': {
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
'ext': 'mp4', 'ext': 'webm',
'title': "Yahoo Saves 'Community'", 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', 'description': 'md5:f66c890e1490f4910a9953c941dee944',
'duration': 170, 'duration': 97,
} 'timestamp': 1414489862,
}, 'upload_date': '20141028',
{
'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
'md5': '45c024bad51e63e9b6f6fad7a43a8c23',
'info_dict': {
'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
'ext': 'mp4',
'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
'description': '直言台南沒捷運 交通居五都之末',
'duration': 396,
},
},
{
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
'md5': '71298482f7c64cbb7fa064e4553ff1c1',
'info_dict': {
'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
'ext': 'webm',
'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
'description': 'md5:f66c890e1490f4910a9953c941dee944',
'duration': 97,
}
},
{
'url': 'https://ca.sports.yahoo.com/video/program-makes-hockey-more-affordable-013127711.html',
'md5': '57e06440778b1828a6079d2f744212c4',
'info_dict': {
'id': 'c9fa2a36-0d4d-3937-b8f6-cc0fb1881e73',
'ext': 'mp4',
'title': 'Program that makes hockey more affordable not offered in Manitoba',
'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4',
'duration': 121,
},
'skip': 'Video gone',
}, {
'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
'info_dict': {
'id': '154609075',
},
'playlist': [{
'md5': '000887d0dc609bc3a47c974151a40fb8',
'info_dict': {
'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
'ext': 'mp4',
'title': '\'The Interview\' TV Spot: War',
'description': 'The Interview',
'duration': 30,
},
}, {
'md5': '81bc74faf10750fe36e4542f9a184c66',
'info_dict': {
'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
'ext': 'mp4',
'title': '\'The Interview\' TV Spot: Guys',
'description': 'The Interview',
'duration': 30,
},
}],
}, {
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '88e209b417f173d86186bef6e4d1f160',
'info_dict': {
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
'ext': 'mp4',
'title': 'China Moses Is Crazy About the Blues',
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
'duration': 128,
}
}, {
'url': 'https://in.lifestyle.yahoo.com/video/connect-dots-dark-side-virgo-090247395.html',
'md5': 'd9a083ccf1379127bf25699d67e4791b',
'info_dict': {
'id': '52aeeaa3-b3d1-30d8-9ef8-5d0cf05efb7c',
'ext': 'mp4',
'title': 'Connect the Dots: Dark Side of Virgo',
'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
'duration': 201,
},
'skip': 'Domain name in.lifestyle.yahoo.com gone',
}, {
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
'md5': '989396ae73d20c6f057746fb226aa215',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': '\'True Story\' Trailer',
'description': 'True Story',
'duration': 150,
},
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
}, {
'note': 'NBC Sports embeds',
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
'upload_date': '20150313',
'uploader': 'NBCU-SPORTS',
'timestamp': 1426270238,
}
}, {
'url': 'https://tw.news.yahoo.com/-100120367.html',
'only_matching': True,
}, {
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
'info_dict': {
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
'ext': 'mp4',
'title': 'Communitary - Community Episode 1: Ladders',
'description': 'md5:8fc39608213295748e1e289807838c97',
'duration': 1646,
},
}, {
# it uses an alias to get the video_id
'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html',
'info_dict': {
'id': '40eda9c8-8e5f-3552-8745-830f67d0c737',
'ext': 'mp4',
'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking',
'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
},
},
{
# config['models']['applet_model']['data']['sapi'] has no query
'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016',
'md5': 'dac0c72d502bc5facda80c9e6d5c98db',
'info_dict': {
'id': 'a6015640-e9e5-3efb-bb60-05589a183919',
'ext': 'mp4',
'description': 'Galactic',
'title': 'Dolla Diva (feat. Maggie Koerner)',
},
'skip': 'redirect to https://www.yahoo.com/music',
},
{
# yahoo://article/
'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': "'True Story' Trailer",
'description': 'True Story',
},
'params': {
'skip_download': True,
},
},
{
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
'info_dict': {
'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
'ext': 'mp4',
'title': '單車天使 - 中文版預',
'description': '中文版預',
},
'params': {
'skip_download': True,
},
},
{
# custom brightcove
'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/',
'info_dict': {
'id': '5575377707001',
'ext': 'mp4',
'title': "Clown entertainers say 'It' is hurting their business",
'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.',
'timestamp': 1505341164,
'upload_date': '20170913',
'uploader_id': '2376984109001',
},
'params': {
'skip_download': True,
},
},
{
# custom brightcove, geo-restricted to Australia, bypassable
'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/',
'only_matching': True,
} }
] }, {
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '88e209b417f173d86186bef6e4d1f160',
'info_dict': {
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
'ext': 'mp4',
'title': 'China Moses Is Crazy About the Blues',
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
'duration': 128,
'timestamp': 1385722202,
'upload_date': '20131129',
}
}, {
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
'md5': '2a9752f74cb898af5d1083ea9f661b58',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': '\'True Story\' Trailer',
'description': 'True Story',
'duration': 150,
'timestamp': 1418919206,
'upload_date': '20141218',
},
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
}, {
'note': 'NBC Sports embeds',
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
'upload_date': '20150313',
'uploader': 'NBCU-SPORTS',
'timestamp': 1426270238,
},
}, {
'url': 'https://tw.news.yahoo.com/-100120367.html',
'only_matching': True,
}, {
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
'info_dict': {
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
'ext': 'mp4',
'title': 'Communitary - Community Episode 1: Ladders',
'description': 'md5:8fc39608213295748e1e289807838c97',
'duration': 1646,
'timestamp': 1440436550,
'upload_date': '20150824',
'series': 'Communitary',
'season_number': 6,
'episode_number': 1,
},
}, {
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
'info_dict': {
'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
'ext': 'mp4',
'title': '單車天使 - 中文版預',
'description': '中文版預',
'timestamp': 1476696196,
'upload_date': '20161017',
},
'params': {
'skip_download': True,
},
}, {
# Contains both a Yahoo hosted video and multiple Youtube embeds
'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
'info_dict': {
'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead',
'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.',
},
'playlist': [{
'info_dict': {
'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6',
'ext': 'mp4',
'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
'timestamp': 1572406500,
'upload_date': '20191030',
},
}, {
'info_dict': {
'id': '352CFDOQrKg',
'ext': 'mp4',
'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
'description': 'md5:35b61e94c2ae214bc965ff4245f80d11',
'uploader': 'The Voice',
'uploader_id': 'NBCTheVoice',
'upload_date': '20191029',
},
}],
'params': {
'playlistend': 2,
},
'expected_warnings': ['HTTP Error 404'],
}, {
'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
'only_matching': True,
}, {
'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html',
'only_matching': True,
}, {
'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) url, country, display_id = re.match(self._VALID_URL, url).groups()
page_id = mobj.group('id') if not country:
display_id = mobj.group('display_id') or page_id country = 'us'
host = mobj.group('host')
webpage, urlh = self._download_webpage_handle(url, display_id)
if 'err=404' in urlh.geturl():
raise ExtractorError('Video gone', expected=True)
# Look for iframed media first
entries = []
iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
for idx, iframe_url in enumerate(iframe_urls):
entries.append(self.url_result(host + iframe_url, 'Yahoo'))
if entries:
return self.playlist_result(entries, page_id)
# Look for NBCSports iframes
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
if nbc_sports_url:
return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
# Look for Brightcove Legacy Studio embeds
bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if bc_url:
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
def brightcove_url_result(bc_url):
return self.url_result(
smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
BrightcoveNewIE.ie_key())
# Look for Brightcove New Studio embeds
bc_url = BrightcoveNewIE._extract_url(self, webpage)
if bc_url:
return brightcove_url_result(bc_url)
brightcove_iframe = self._search_regex(
r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
'brightcove iframe', default=None)
if brightcove_iframe:
attr = extract_attributes(brightcove_iframe)
src = attr.get('src')
if src:
parsed_src = compat_urlparse.urlparse(src)
qs = compat_urlparse.parse_qs(parsed_src.query)
account_id = qs.get('accountId', ['2376984109001'])[0]
brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
if account_id and brightcove_id:
return brightcove_url_result(
'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
% (account_id, brightcove_id))
# Query result is often embedded in webpage as JSON. Sometimes explicit requests
# to video API results in a failure with geo restriction reason therefore using
# embedded query result when present sounds reasonable.
config_json = self._search_regex(
r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
webpage, 'videoplayer applet', default=None)
if config_json:
config = self._parse_json(config_json, display_id, fatal=False)
if config:
sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
if sapi and 'query' in sapi:
info = self._extract_info(display_id, sapi, webpage)
self._sort_formats(info['formats'])
return info
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
default=None)
if items_json is None:
alias = self._search_regex(
r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
if alias is not None:
alias_info = self._download_json(
'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
display_id, 'Downloading alias info')
video_id = alias_info[0]['id']
else:
CONTENT_ID_REGEXES = [
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
r'"first_videoid"\s*:\s*"([^"]+)"',
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
r'<article[^>]data-uuid=["\']([^"\']+)',
r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
]
video_id = self._search_regex(
CONTENT_ID_REGEXES, webpage, 'content ID')
else: else:
items = json.loads(items_json) country = country.split('-')[0]
info = items['mediaItems']['query']['results']['mediaObj'][0] api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
# The 'meta' field is not always in the video webpage, we request it
# from another page
video_id = info['id']
return self._get_info(video_id, display_id, webpage)
def _extract_info(self, display_id, query, webpage): for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
info = query['query']['results']['mediaObj'][0] content = self._download_json(
meta = info.get('meta') api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
video_id = info.get('id') display_id, 'Downloading content JSON metadata', fatal=i == 1)
if content:
item = content['items'][0]
break
if not meta: if item.get('type') != 'video':
msg = info['status'].get('msg') entries = []
if msg:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, msg), expected=True)
raise ExtractorError('Unable to extract media object meta')
cover = item.get('cover') or {}
if cover.get('type') == 'yvideo':
cover_url = cover.get('url')
if cover_url:
entries.append(self.url_result(
cover_url, 'Yahoo', cover.get('uuid')))
for e in item.get('body', []):
if e.get('type') == 'videoIframe':
iframe_url = e.get('url')
if not iframe_url:
continue
entries.append(self.url_result(iframe_url))
return self.playlist_result(
entries, item.get('uuid'),
item.get('title'), item.get('summary'))
video_id = item['uuid']
video = self._download_json(
api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
video_id, 'Downloading video JSON metadata')[0]
title = video['title']
if country == 'malaysia':
country = 'my'
is_live = video.get('live_state') == 'live'
fmts = ('m3u8',) if is_live else ('webm', 'mp4')
urls = []
formats = [] formats = []
for s in info['streams']:
tbr = int_or_none(s.get('bitrate'))
format_info = {
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),
'tbr': tbr,
}
host = s['host']
path = s['path']
if host.startswith('rtmp'):
fmt = 'rtmp'
format_info.update({
'url': host,
'play_path': path,
'ext': 'flv',
})
else:
if s.get('format') == 'm3u8_playlist':
fmt = 'hls'
format_info.update({
'protocol': 'm3u8_native',
'ext': 'mp4',
})
else:
fmt = format_info['ext'] = determine_ext(path)
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
formats.append(format_info)
closed_captions = self._html_search_regex(
r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
default='[]')
cc_json = self._parse_json(closed_captions, video_id, fatal=False)
subtitles = {} subtitles = {}
if cc_json: for fmt in fmts:
for closed_caption in cc_json: media_obj = self._download_json(
lang = closed_caption['lang'] 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
if lang not in subtitles: video_id, 'Downloading %s JSON metadata' % fmt,
subtitles[lang] = [] headers=self.geo_verification_headers(), query={
subtitles[lang].append({ 'format': fmt,
'url': closed_caption['url'], 'region': country.upper(),
'ext': mimetype2ext(closed_caption['content_type']), })['query']['results']['mediaObj'][0]
msg = media_obj.get('status', {}).get('msg')
for s in media_obj.get('streams', []):
host = s.get('host')
path = s.get('path')
if not host or not path:
continue
s_url = host + path
if s.get('format') == 'm3u8':
formats.extend(self._extract_m3u8_formats(
s_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
continue
tbr = int_or_none(s.get('bitrate'))
formats.append({
'url': s_url,
'format_id': fmt + ('-%d' % tbr if tbr else ''),
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),
'tbr': tbr,
'fps': int_or_none(s.get('framerate')),
}) })
for cc in media_obj.get('closedcaptions', []):
cc_url = cc.get('url')
if not cc_url or cc_url in urls:
continue
urls.append(cc_url)
subtitles.setdefault(cc.get('lang') or 'en-US', []).append({
'url': cc_url,
'ext': mimetype2ext(cc.get('content_type')),
})
streaming_url = video.get('streaming_url')
if streaming_url and not is_live:
formats.extend(self._extract_m3u8_formats(
streaming_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
if not formats and msg == 'geo restricted':
self.raise_geo_restricted()
self._sort_formats(formats)
thumbnails = []
for thumb in video.get('thumbnails', []):
thumb_url = thumb.get('url')
if not thumb_url:
continue
thumbnails.append({
'id': thumb.get('tag'),
'url': thumb.get('url'),
'width': int_or_none(thumb.get('width')),
'height': int_or_none(thumb.get('height')),
})
series_info = video.get('series_info') or {}
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'title': self._live_title(title) if is_live else title,
'title': unescapeHTML(meta['title']),
'formats': formats, 'formats': formats,
'description': clean_html(meta['description']), 'display_id': display_id,
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'thumbnails': thumbnails,
'duration': int_or_none(meta.get('duration')), 'description': clean_html(video.get('description')),
'timestamp': parse_iso8601(video.get('publish_time')),
'subtitles': subtitles, 'subtitles': subtitles,
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('view_count')),
'is_live': is_live,
'series': video.get('show_name'),
'season_number': int_or_none(series_info.get('season_number')),
'episode_number': int_or_none(series_info.get('episode_number')),
} }
def _get_info(self, video_id, display_id, webpage):
region = self._search_regex(
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
webpage, 'region', fatal=False, default='US').upper()
formats = []
info = {}
for fmt in ('webm', 'mp4'):
query_result = self._download_json(
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
display_id, 'Downloading %s video info' % fmt, query={
'protocol': 'http',
'region': region,
'format': fmt,
})
info = self._extract_info(display_id, query_result, webpage)
formats.extend(info['formats'])
formats.extend(self._extract_m3u8_formats(
'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats)
info['formats'] = formats
return info
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search' IE_DESC = 'Yahoo screen search'

View File

@ -393,7 +393,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
sub_ext = sub_info['ext'] sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang) sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else: else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True webm_vtt_warn = True
@ -606,9 +606,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self._downloader.to_screen( self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue continue
old_file = subtitles_filename(filename, lang, ext) old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file) sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext) new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'): if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning( self._downloader.report_warning(
@ -616,7 +616,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
'which results in style information loss') 'which results in style information loss')
dfxp_file = old_file dfxp_file = old_file
srt_file = subtitles_filename(filename, lang, 'srt') srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
with open(dfxp_file, 'rb') as f: with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read()) srt_data = dfxp2srt(f.read())

View File

@ -2906,8 +2906,8 @@ def determine_ext(url, default_ext='unknown_video'):
return default_ext return default_ext
def subtitles_filename(filename, sub_lang, sub_format): def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
def date_from_str(date_str): def date_from_str(date_str):
@ -4979,7 +4979,7 @@ class ISO3166Utils(object):
class GeoUtils(object): class GeoUtils(object):
# Major IPv4 address blocks per country # Major IPv4 address blocks per country
_country_ip_map = { _country_ip_map = {
'AD': '85.94.160.0/19', 'AD': '46.172.224.0/19',
'AE': '94.200.0.0/13', 'AE': '94.200.0.0/13',
'AF': '149.54.0.0/17', 'AF': '149.54.0.0/17',
'AG': '209.59.64.0/18', 'AG': '209.59.64.0/18',
@ -4987,28 +4987,30 @@ class GeoUtils(object):
'AL': '46.99.0.0/16', 'AL': '46.99.0.0/16',
'AM': '46.70.0.0/15', 'AM': '46.70.0.0/15',
'AO': '105.168.0.0/13', 'AO': '105.168.0.0/13',
'AP': '159.117.192.0/21', 'AP': '182.50.184.0/21',
'AQ': '23.154.160.0/24',
'AR': '181.0.0.0/12', 'AR': '181.0.0.0/12',
'AS': '202.70.112.0/20', 'AS': '202.70.112.0/20',
'AT': '84.112.0.0/13', 'AT': '77.116.0.0/14',
'AU': '1.128.0.0/11', 'AU': '1.128.0.0/11',
'AW': '181.41.0.0/18', 'AW': '181.41.0.0/18',
'AZ': '5.191.0.0/16', 'AX': '185.217.4.0/22',
'AZ': '5.197.0.0/16',
'BA': '31.176.128.0/17', 'BA': '31.176.128.0/17',
'BB': '65.48.128.0/17', 'BB': '65.48.128.0/17',
'BD': '114.130.0.0/16', 'BD': '114.130.0.0/16',
'BE': '57.0.0.0/8', 'BE': '57.0.0.0/8',
'BF': '129.45.128.0/17', 'BF': '102.178.0.0/15',
'BG': '95.42.0.0/15', 'BG': '95.42.0.0/15',
'BH': '37.131.0.0/17', 'BH': '37.131.0.0/17',
'BI': '154.117.192.0/18', 'BI': '154.117.192.0/18',
'BJ': '137.255.0.0/16', 'BJ': '137.255.0.0/16',
'BL': '192.131.134.0/24', 'BL': '185.212.72.0/23',
'BM': '196.12.64.0/18', 'BM': '196.12.64.0/18',
'BN': '156.31.0.0/16', 'BN': '156.31.0.0/16',
'BO': '161.56.0.0/16', 'BO': '161.56.0.0/16',
'BQ': '161.0.80.0/20', 'BQ': '161.0.80.0/20',
'BR': '152.240.0.0/12', 'BR': '191.128.0.0/12',
'BS': '24.51.64.0/18', 'BS': '24.51.64.0/18',
'BT': '119.2.96.0/19', 'BT': '119.2.96.0/19',
'BW': '168.167.0.0/16', 'BW': '168.167.0.0/16',
@ -5016,20 +5018,20 @@ class GeoUtils(object):
'BZ': '179.42.192.0/18', 'BZ': '179.42.192.0/18',
'CA': '99.224.0.0/11', 'CA': '99.224.0.0/11',
'CD': '41.243.0.0/16', 'CD': '41.243.0.0/16',
'CF': '196.32.200.0/21', 'CF': '197.242.176.0/21',
'CG': '197.214.128.0/17', 'CG': '160.113.0.0/16',
'CH': '85.0.0.0/13', 'CH': '85.0.0.0/13',
'CI': '154.232.0.0/14', 'CI': '102.136.0.0/14',
'CK': '202.65.32.0/19', 'CK': '202.65.32.0/19',
'CL': '152.172.0.0/14', 'CL': '152.172.0.0/14',
'CM': '165.210.0.0/15', 'CM': '102.244.0.0/14',
'CN': '36.128.0.0/10', 'CN': '36.128.0.0/10',
'CO': '181.240.0.0/12', 'CO': '181.240.0.0/12',
'CR': '201.192.0.0/12', 'CR': '201.192.0.0/12',
'CU': '152.206.0.0/15', 'CU': '152.206.0.0/15',
'CV': '165.90.96.0/19', 'CV': '165.90.96.0/19',
'CW': '190.88.128.0/17', 'CW': '190.88.128.0/17',
'CY': '46.198.0.0/15', 'CY': '31.153.0.0/16',
'CZ': '88.100.0.0/14', 'CZ': '88.100.0.0/14',
'DE': '53.0.0.0/8', 'DE': '53.0.0.0/8',
'DJ': '197.241.0.0/17', 'DJ': '197.241.0.0/17',
@ -5046,6 +5048,7 @@ class GeoUtils(object):
'EU': '2.16.0.0/13', 'EU': '2.16.0.0/13',
'FI': '91.152.0.0/13', 'FI': '91.152.0.0/13',
'FJ': '144.120.0.0/16', 'FJ': '144.120.0.0/16',
'FK': '80.73.208.0/21',
'FM': '119.252.112.0/20', 'FM': '119.252.112.0/20',
'FO': '88.85.32.0/19', 'FO': '88.85.32.0/19',
'FR': '90.0.0.0/9', 'FR': '90.0.0.0/9',
@ -5055,8 +5058,8 @@ class GeoUtils(object):
'GE': '31.146.0.0/16', 'GE': '31.146.0.0/16',
'GF': '161.22.64.0/18', 'GF': '161.22.64.0/18',
'GG': '62.68.160.0/19', 'GG': '62.68.160.0/19',
'GH': '45.208.0.0/14', 'GH': '154.160.0.0/12',
'GI': '85.115.128.0/19', 'GI': '95.164.0.0/16',
'GL': '88.83.0.0/19', 'GL': '88.83.0.0/19',
'GM': '160.182.0.0/15', 'GM': '160.182.0.0/15',
'GN': '197.149.192.0/18', 'GN': '197.149.192.0/18',
@ -5085,13 +5088,13 @@ class GeoUtils(object):
'JE': '87.244.64.0/18', 'JE': '87.244.64.0/18',
'JM': '72.27.0.0/17', 'JM': '72.27.0.0/17',
'JO': '176.29.0.0/16', 'JO': '176.29.0.0/16',
'JP': '126.0.0.0/8', 'JP': '133.0.0.0/8',
'KE': '105.48.0.0/12', 'KE': '105.48.0.0/12',
'KG': '158.181.128.0/17', 'KG': '158.181.128.0/17',
'KH': '36.37.128.0/17', 'KH': '36.37.128.0/17',
'KI': '103.25.140.0/22', 'KI': '103.25.140.0/22',
'KM': '197.255.224.0/20', 'KM': '197.255.224.0/20',
'KN': '198.32.32.0/19', 'KN': '198.167.192.0/19',
'KP': '175.45.176.0/22', 'KP': '175.45.176.0/22',
'KR': '175.192.0.0/10', 'KR': '175.192.0.0/10',
'KW': '37.36.0.0/14', 'KW': '37.36.0.0/14',
@ -5099,10 +5102,10 @@ class GeoUtils(object):
'KZ': '2.72.0.0/13', 'KZ': '2.72.0.0/13',
'LA': '115.84.64.0/18', 'LA': '115.84.64.0/18',
'LB': '178.135.0.0/16', 'LB': '178.135.0.0/16',
'LC': '192.147.231.0/24', 'LC': '24.92.144.0/20',
'LI': '82.117.0.0/19', 'LI': '82.117.0.0/19',
'LK': '112.134.0.0/15', 'LK': '112.134.0.0/15',
'LR': '41.86.0.0/19', 'LR': '102.183.0.0/16',
'LS': '129.232.0.0/17', 'LS': '129.232.0.0/17',
'LT': '78.56.0.0/13', 'LT': '78.56.0.0/13',
'LU': '188.42.0.0/16', 'LU': '188.42.0.0/16',
@ -5127,7 +5130,7 @@ class GeoUtils(object):
'MT': '46.11.0.0/16', 'MT': '46.11.0.0/16',
'MU': '105.16.0.0/12', 'MU': '105.16.0.0/12',
'MV': '27.114.128.0/18', 'MV': '27.114.128.0/18',
'MW': '105.234.0.0/16', 'MW': '102.70.0.0/15',
'MX': '187.192.0.0/11', 'MX': '187.192.0.0/11',
'MY': '175.136.0.0/13', 'MY': '175.136.0.0/13',
'MZ': '197.218.0.0/15', 'MZ': '197.218.0.0/15',
@ -5158,23 +5161,23 @@ class GeoUtils(object):
'PW': '202.124.224.0/20', 'PW': '202.124.224.0/20',
'PY': '181.120.0.0/14', 'PY': '181.120.0.0/14',
'QA': '37.210.0.0/15', 'QA': '37.210.0.0/15',
'RE': '139.26.0.0/16', 'RE': '102.35.0.0/16',
'RO': '79.112.0.0/13', 'RO': '79.112.0.0/13',
'RS': '178.220.0.0/14', 'RS': '93.86.0.0/15',
'RU': '5.136.0.0/13', 'RU': '5.136.0.0/13',
'RW': '105.178.0.0/15', 'RW': '41.186.0.0/16',
'SA': '188.48.0.0/13', 'SA': '188.48.0.0/13',
'SB': '202.1.160.0/19', 'SB': '202.1.160.0/19',
'SC': '154.192.0.0/11', 'SC': '154.192.0.0/11',
'SD': '154.96.0.0/13', 'SD': '102.120.0.0/13',
'SE': '78.64.0.0/12', 'SE': '78.64.0.0/12',
'SG': '152.56.0.0/14', 'SG': '8.128.0.0/10',
'SI': '188.196.0.0/14', 'SI': '188.196.0.0/14',
'SK': '78.98.0.0/15', 'SK': '78.98.0.0/15',
'SL': '197.215.0.0/17', 'SL': '102.143.0.0/17',
'SM': '89.186.32.0/19', 'SM': '89.186.32.0/19',
'SN': '41.82.0.0/15', 'SN': '41.82.0.0/15',
'SO': '197.220.64.0/19', 'SO': '154.115.192.0/18',
'SR': '186.179.128.0/17', 'SR': '186.179.128.0/17',
'SS': '105.235.208.0/21', 'SS': '105.235.208.0/21',
'ST': '197.159.160.0/19', 'ST': '197.159.160.0/19',
@ -5197,15 +5200,15 @@ class GeoUtils(object):
'TV': '202.2.96.0/19', 'TV': '202.2.96.0/19',
'TW': '120.96.0.0/11', 'TW': '120.96.0.0/11',
'TZ': '156.156.0.0/14', 'TZ': '156.156.0.0/14',
'UA': '93.72.0.0/13', 'UA': '37.52.0.0/14',
'UG': '154.224.0.0/13', 'UG': '102.80.0.0/13',
'US': '3.0.0.0/8', 'US': '6.0.0.0/8',
'UY': '167.56.0.0/13', 'UY': '167.56.0.0/13',
'UZ': '82.215.64.0/18', 'UZ': '84.54.64.0/18',
'VA': '212.77.0.0/19', 'VA': '212.77.0.0/19',
'VC': '24.92.144.0/20', 'VC': '207.191.240.0/21',
'VE': '186.88.0.0/13', 'VE': '186.88.0.0/13',
'VG': '172.103.64.0/18', 'VG': '66.81.192.0/20',
'VI': '146.226.0.0/16', 'VI': '146.226.0.0/16',
'VN': '14.160.0.0/11', 'VN': '14.160.0.0/11',
'VU': '202.80.32.0/20', 'VU': '202.80.32.0/20',
@ -5214,8 +5217,8 @@ class GeoUtils(object):
'YE': '134.35.0.0/16', 'YE': '134.35.0.0/16',
'YT': '41.242.116.0/22', 'YT': '41.242.116.0/22',
'ZA': '41.0.0.0/11', 'ZA': '41.0.0.0/11',
'ZM': '165.56.0.0/13', 'ZM': '102.144.0.0/13',
'ZW': '41.85.192.0/19', 'ZW': '102.177.192.0/18',
} }
@classmethod @classmethod

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.09.28' __version__ = '2019.10.29'