mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-26 02:14:32 +01:00
Merge remote-tracking branch 'upstream/master' into spreaker
This commit is contained in:
commit
b3f68149b6
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.25*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.01**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.25**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.05.01
|
[debug] youtube-dl version 2018.06.25
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -47,3 +47,4 @@ youtube-dl.zsh
|
|||||||
*.iml
|
*.iml
|
||||||
|
|
||||||
tmp/
|
tmp/
|
||||||
|
venv/
|
||||||
|
207
ChangeLog
207
ChangeLog
@ -1,3 +1,210 @@
|
|||||||
|
version 2018.06.25
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [joj] Relax URL regular expression (#16771)
|
||||||
|
* [brightcove] Workaround sonyliv DRM protected videos (#16807)
|
||||||
|
* [motherless] Fix extraction (#16786)
|
||||||
|
* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
|
||||||
|
- [foxnews:insider] Remove extractor (#15810)
|
||||||
|
+ [foxnews] Add support for iframe embeds (#15810, #16711)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.19
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Introduce expected_status in _download_* methods
|
||||||
|
for convenient accept of HTTP requests failed with non 2xx status codes
|
||||||
|
+ [compat] Introduce compat_integer_types
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [peertube] Improve generic support (#16733)
|
||||||
|
+ [6play] Use geo verification headers
|
||||||
|
* [rtbf] Fix extraction for python 3.2
|
||||||
|
* [vgtv] Improve HLS formats extraction
|
||||||
|
+ [vgtv] Add support for www.aftonbladet.se/tv URLs
|
||||||
|
* [bbccouk] Use expected_status
|
||||||
|
* [markiza] Expect 500 HTTP status code
|
||||||
|
* [tvnow] Try all clear manifest URLs (#15361)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/rtmp] Fix downloading in verbose mode (#16736)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [markiza] Add support for markiza.sk (#16750)
|
||||||
|
* [wat] Try all supported adaptive URLs
|
||||||
|
+ [6play] Add support for rtlplay.be and extract hd usp formats
|
||||||
|
+ [rtbf] Add support for audio and live streams (#9638, #11923)
|
||||||
|
+ [rtbf] Extract HLS, DASH and all HTTP formats
|
||||||
|
+ [rtbf] Extract subtitles
|
||||||
|
+ [rtbf] Fixup specific HTTP URLs (#16101)
|
||||||
|
+ [expressen] Add support for expressen.se
|
||||||
|
* [vidzi] Fix extraction (#16678)
|
||||||
|
* [pbs] Improve extraction (#16623, #16684)
|
||||||
|
* [bilibili] Restrict cid regular expression (#16638, #16734)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/http] Fix retry on error when streaming to stdout (#16699)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [discoverynetworks] Add support for disco-api videos (#16724)
|
||||||
|
+ [dailymotion] Add support for password protected videos (#9789)
|
||||||
|
+ [abc:iview] Add support for livestreams (#12354)
|
||||||
|
* [abc:iview] Fix extraction (#16704)
|
||||||
|
+ [crackle] Add support for sonycrackle.com (#16698)
|
||||||
|
+ [tvnet] Add support for tvnet.gov.vn (#15462)
|
||||||
|
* [nrk] Update API hosts and try all previously known ones (#16690)
|
||||||
|
* [wimp] Fix Youtube embeds extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.11
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
|
||||||
|
+ [inc] Add support for another embed schema (#16666)
|
||||||
|
* [tv4] Fix format extraction (#16650)
|
||||||
|
+ [nexx] Add support for free cdn (#16538)
|
||||||
|
+ [pbs] Add another cove id pattern (#15373)
|
||||||
|
+ [rbmaradio] Add support for 192k format (#16631)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.04
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [camtube] Add support for camtube.co
|
||||||
|
+ [twitter:card] Extract guest token (#16609)
|
||||||
|
+ [chaturbate] Use geo verification headers
|
||||||
|
+ [bbc] Add support for bbcthree (#16612)
|
||||||
|
* [youtube] Move metadata extraction after video availability check
|
||||||
|
+ [youtube] Extract track and artist
|
||||||
|
+ [safari] Add support for new URL schema (#16614)
|
||||||
|
* [adn] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.02
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve determine_ext
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [facebook] Add support for tahoe player videos (#15441, #16554)
|
||||||
|
* [cbc] Improve extraction (#16583, #16593)
|
||||||
|
* [openload] Improve ext extraction (#16595)
|
||||||
|
+ [twitter:card] Add support for another endpoint (#16586)
|
||||||
|
+ [openload] Add support for oload.win and oload.download (#16592)
|
||||||
|
* [audimedia] Fix extraction (#15309)
|
||||||
|
+ [francetv] Add support for sport.francetvinfo.fr (#15645)
|
||||||
|
* [mlb] Improve extraction (#16587)
|
||||||
|
- [nhl] Remove old extractors
|
||||||
|
* [rbmaradio] Check formats availability (#16585)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.05.30
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/rtmp] Generalize download messages and report time elapsed
|
||||||
|
on finish
|
||||||
|
* [downloader/rtmp] Gracefully handle live streams interrupted by user
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [teamcoco] Fix extraction for full episodes (#16573)
|
||||||
|
* [spiegel] Fix info extraction (#16538)
|
||||||
|
+ [apa] Add support for apa.at (#15041, #15672)
|
||||||
|
+ [bellmedia] Add support for bnnbloomberg.ca (#16560)
|
||||||
|
+ [9c9media] Extract MPD formats and subtitles
|
||||||
|
* [cammodels] Use geo verification headers
|
||||||
|
+ [ufctv] Add support for authentication (#16542)
|
||||||
|
+ [cammodels] Add support for cammodels.com (#14499)
|
||||||
|
* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
|
||||||
|
(#16551)
|
||||||
|
* [soundcloud] Detect format extension (#16549)
|
||||||
|
* [cbc] Fix playlist title extraction (#16502)
|
||||||
|
+ [tumblr] Detect and report sensitive media (#13829)
|
||||||
|
+ [tumblr] Add support for authentication (#15133)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.05.26
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve parse_age_limit
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [audiomack] Stringify video id (#15310)
|
||||||
|
* [izlesene] Fix extraction (#16233, #16271, #16407)
|
||||||
|
+ [indavideo] Add support for generic embeds (#11989)
|
||||||
|
* [indavideo] Fix extraction (#11221)
|
||||||
|
* [indavideo] Sign download URLs (#16174)
|
||||||
|
+ [peertube] Add support for PeerTube based sites (#16301, #16329)
|
||||||
|
* [imgur] Fix extraction (#16537)
|
||||||
|
+ [hidive] Add support for authentication (#16534)
|
||||||
|
+ [nbc] Add support for stream.nbcsports.com (#13911)
|
||||||
|
+ [viewlift] Add support for hoichoi.tv (#16536)
|
||||||
|
* [go90] Extract age limit and detect DRM protection(#10127)
|
||||||
|
* [viewlift] fix extraction for snagfilms.com (#15766)
|
||||||
|
* [globo] Improve extraction (#4189)
|
||||||
|
* Add support for authentication
|
||||||
|
* Simplify URL signing
|
||||||
|
* Extract DASH and MSS formats
|
||||||
|
* [leeco] Fix extraction (#16464)
|
||||||
|
* [teamcoco] Add fallback for format extraction (#16484)
|
||||||
|
* [teamcoco] Improve URL regular expression (#16484)
|
||||||
|
* [imdb] Improve extraction (#4085, #14557)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.05.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [vimeo:likes] Relax URL regular expression and fix single page likes
|
||||||
|
extraction (#16475)
|
||||||
|
* [pluralsight] Fix clip id extraction (#16460)
|
||||||
|
+ [mychannels] Add support for mychannels.com (#15334)
|
||||||
|
- [moniker] Remove extractor (#15336)
|
||||||
|
* [pbs] Fix embed data extraction (#16474)
|
||||||
|
+ [mtv] Add support for paramountnetwork.com and bellator.com (#15418)
|
||||||
|
* [youtube] Fix hd720 format position
|
||||||
|
* [dailymotion] Remove fragment part from m3u8 URLs (#8915)
|
||||||
|
* [3sat] Improve extraction (#15350)
|
||||||
|
* Extract all formats
|
||||||
|
* Extract more format metadata
|
||||||
|
* Improve format sorting
|
||||||
|
* Use hls native downloader
|
||||||
|
* Detect and bypass geo-restriction
|
||||||
|
+ [dtube] Add support for d.tube (#15201)
|
||||||
|
* [options] Fix typo (#16450)
|
||||||
|
* [youtube] Improve format filesize extraction (#16453)
|
||||||
|
* [youtube] Make uploader extraction non fatal (#16444)
|
||||||
|
* [youtube] Fix extraction for embed restricted live streams (#16433)
|
||||||
|
* [nbc] Improve info extraction (#16440)
|
||||||
|
* [twitch:clips] Fix extraction (#16429)
|
||||||
|
* [redditr] Relax URL regular expression (#16426, #16427)
|
||||||
|
* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424)
|
||||||
|
+ [nick] Add support for nickjr.de (#13230)
|
||||||
|
* [teamcoco] Fix extraction (#16374)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.05.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Ensure ext exists for automatic captions
|
||||||
|
* Introduce --geo-bypass-ip-block
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [udemy] Extract asset captions
|
||||||
|
+ [udemy] Extract stream URLs (#16372)
|
||||||
|
+ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
|
||||||
|
+ [cloudflarestream] Add support for cloudflarestream.com (#16375)
|
||||||
|
* [watchbox] Fix extraction (#16356)
|
||||||
|
* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
|
||||||
|
+ [itv:btcc] Add support for itv.com/btcc (#16139)
|
||||||
|
* [tunein] Use live title for live streams (#16347)
|
||||||
|
* [itv] Improve extraction (#16253)
|
||||||
|
|
||||||
|
|
||||||
version 2018.05.01
|
version 2018.05.01
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
16
README.md
16
README.md
@ -93,8 +93,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||||
To enable experimental SOCKS proxy, specify
|
To enable SOCKS proxy, specify a proper
|
||||||
a proper scheme. For example
|
scheme. For example
|
||||||
socks5://127.0.0.1:1080/. Pass in an empty
|
socks5://127.0.0.1:1080/. Pass in an empty
|
||||||
string (--proxy "") for direct connection
|
string (--proxy "") for direct connection
|
||||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||||
@ -106,16 +106,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||||
some geo-restricted sites. The default
|
some geo-restricted sites. The default
|
||||||
proxy specified by --proxy (or none, if the
|
proxy specified by --proxy (or none, if the
|
||||||
options is not present) is used for the
|
option is not present) is used for the
|
||||||
actual downloading.
|
actual downloading.
|
||||||
--geo-bypass Bypass geographic restriction via faking
|
--geo-bypass Bypass geographic restriction via faking
|
||||||
X-Forwarded-For HTTP header (experimental)
|
X-Forwarded-For HTTP header
|
||||||
--no-geo-bypass Do not bypass geographic restriction via
|
--no-geo-bypass Do not bypass geographic restriction via
|
||||||
faking X-Forwarded-For HTTP header
|
faking X-Forwarded-For HTTP header
|
||||||
(experimental)
|
|
||||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||||
explicitly provided two-letter ISO 3166-2
|
explicitly provided two-letter ISO 3166-2
|
||||||
country code (experimental)
|
country code
|
||||||
|
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
|
||||||
|
explicitly provided IP block in CIDR
|
||||||
|
notation
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||||
@ -206,7 +208,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--playlist-random Download playlist videos in random order
|
--playlist-random Download playlist videos in random order
|
||||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||||
expected file size (experimental)
|
expected file size
|
||||||
--hls-prefer-native Use the native HLS downloader instead of
|
--hls-prefer-native Use the native HLS downloader instead of
|
||||||
ffmpeg
|
ffmpeg
|
||||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||||
|
@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
|
|||||||
for fn in glob.glob('*.html*'):
|
for fn in glob.glob('*.html*'):
|
||||||
with io.open(fn, encoding='utf-8') as f:
|
with io.open(fn, encoding='utf-8') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
|
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
||||||
if content != newc:
|
if content != newc:
|
||||||
tmpFn = fn + '.part'
|
tmpFn = fn + '.part'
|
||||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
- **8tracks**
|
- **8tracks**
|
||||||
- **91porn**
|
- **91porn**
|
||||||
- **9c9media**
|
- **9c9media**
|
||||||
- **9c9media:stack**
|
|
||||||
- **9gag**
|
- **9gag**
|
||||||
- **9now.com.au**
|
- **9now.com.au**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
@ -48,6 +47,7 @@
|
|||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **Anvato**
|
- **Anvato**
|
||||||
- **AnySex**
|
- **AnySex**
|
||||||
|
- **APA**
|
||||||
- **Aparat**
|
- **Aparat**
|
||||||
- **AppleConnect**
|
- **AppleConnect**
|
||||||
- **AppleDaily**: 臺灣蘋果日報
|
- **AppleDaily**: 臺灣蘋果日報
|
||||||
@ -100,6 +100,7 @@
|
|||||||
- **Beatport**
|
- **Beatport**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
|
- **Bellator**
|
||||||
- **BellMedia**
|
- **BellMedia**
|
||||||
- **Bet**
|
- **Bet**
|
||||||
- **Bigflix**
|
- **Bigflix**
|
||||||
@ -122,10 +123,13 @@
|
|||||||
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
||||||
- **bt:article**: Bergens Tidende Articles
|
- **bt:article**: Bergens Tidende Articles
|
||||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||||
|
- **BusinessInsider**
|
||||||
- **BuzzFeed**
|
- **BuzzFeed**
|
||||||
- **BYUtv**
|
- **BYUtv**
|
||||||
- **Camdemy**
|
- **Camdemy**
|
||||||
- **CamdemyFolder**
|
- **CamdemyFolder**
|
||||||
|
- **CamModels**
|
||||||
|
- **CamTube**
|
||||||
- **CamWithHer**
|
- **CamWithHer**
|
||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||||
@ -163,6 +167,7 @@
|
|||||||
- **ClipRs**
|
- **ClipRs**
|
||||||
- **Clipsyndicate**
|
- **Clipsyndicate**
|
||||||
- **CloserToTruth**
|
- **CloserToTruth**
|
||||||
|
- **CloudflareStream**
|
||||||
- **cloudtime**: CloudTime
|
- **cloudtime**: CloudTime
|
||||||
- **Cloudy**
|
- **Cloudy**
|
||||||
- **Clubic**
|
- **Clubic**
|
||||||
@ -232,6 +237,7 @@
|
|||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
- **drtv**
|
- **drtv**
|
||||||
- **drtv:live**
|
- **drtv:live**
|
||||||
|
- **DTube**
|
||||||
- **Dumpert**
|
- **Dumpert**
|
||||||
- **dvtv**: http://video.aktualne.cz/
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
- **dw**
|
- **dw**
|
||||||
@ -260,6 +266,7 @@
|
|||||||
- **Europa**
|
- **Europa**
|
||||||
- **EveryonesMixtape**
|
- **EveryonesMixtape**
|
||||||
- **ExpoTV**
|
- **ExpoTV**
|
||||||
|
- **Expressen**
|
||||||
- **ExtremeTube**
|
- **ExtremeTube**
|
||||||
- **EyedoTV**
|
- **EyedoTV**
|
||||||
- **facebook**
|
- **facebook**
|
||||||
@ -283,7 +290,6 @@
|
|||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **foxnews**: Fox News and Fox Business Video
|
- **foxnews**: Fox News and Fox Business Video
|
||||||
- **foxnews:article**
|
- **foxnews:article**
|
||||||
- **foxnews:insider**
|
|
||||||
- **FoxSports**
|
- **FoxSports**
|
||||||
- **france2.fr:generation-what**
|
- **france2.fr:generation-what**
|
||||||
- **FranceCulture**
|
- **FranceCulture**
|
||||||
@ -361,7 +367,6 @@
|
|||||||
- **ImgurAlbum**
|
- **ImgurAlbum**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **Inc**
|
- **Inc**
|
||||||
- **Indavideo**
|
|
||||||
- **IndavideoEmbed**
|
- **IndavideoEmbed**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
- **Instagram**
|
- **Instagram**
|
||||||
@ -373,6 +378,7 @@
|
|||||||
- **Ir90Tv**
|
- **Ir90Tv**
|
||||||
- **ITTF**
|
- **ITTF**
|
||||||
- **ITV**
|
- **ITV**
|
||||||
|
- **ITVBTCC**
|
||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
- **ivi:compilation**: ivi.ru compilations
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
- **ivideon**: Ivideon TV
|
- **ivideon**: Ivideon TV
|
||||||
@ -445,11 +451,12 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **mailru:music**: Музыка@Mail.Ru
|
- **mailru:music**: Музыка@Mail.Ru
|
||||||
- **mailru:music:search**: Музыка@Mail.Ru
|
- **mailru:music:search**: Музыка@Mail.Ru
|
||||||
- **MakersChannel**
|
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
- **ManyVids**
|
- **ManyVids**
|
||||||
|
- **Markiza**
|
||||||
|
- **MarkizaPage**
|
||||||
- **massengeschmack.tv**
|
- **massengeschmack.tv**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
@ -483,7 +490,6 @@
|
|||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
- **Mofosex**
|
- **Mofosex**
|
||||||
- **Mojvideo**
|
- **Mojvideo**
|
||||||
- **Moniker**: allmyvideos.net and vidspot.net
|
|
||||||
- **Morningstar**: morningstar.com
|
- **Morningstar**: morningstar.com
|
||||||
- **Motherless**
|
- **Motherless**
|
||||||
- **MotherlessGroup**
|
- **MotherlessGroup**
|
||||||
@ -505,6 +511,7 @@
|
|||||||
- **mva:course**: Microsoft Virtual Academy courses
|
- **mva:course**: Microsoft Virtual Academy courses
|
||||||
- **Mwave**
|
- **Mwave**
|
||||||
- **MwaveMeetGreet**
|
- **MwaveMeetGreet**
|
||||||
|
- **MyChannels**
|
||||||
- **MySpace**
|
- **MySpace**
|
||||||
- **MySpace:album**
|
- **MySpace:album**
|
||||||
- **MySpass**
|
- **MySpass**
|
||||||
@ -522,6 +529,7 @@
|
|||||||
- **nbcolympics**
|
- **nbcolympics**
|
||||||
- **nbcolympics:stream**
|
- **nbcolympics:stream**
|
||||||
- **NBCSports**
|
- **NBCSports**
|
||||||
|
- **NBCSportsStream**
|
||||||
- **NBCSportsVPlayer**
|
- **NBCSportsVPlayer**
|
||||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||||
- **ndr:embed**
|
- **ndr:embed**
|
||||||
@ -548,9 +556,6 @@
|
|||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **NhkVod**
|
- **NhkVod**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
- **nhl.com:news**: NHL news
|
|
||||||
- **nhl.com:videocenter**
|
|
||||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
|
||||||
- **nick.com**
|
- **nick.com**
|
||||||
- **nick.de**
|
- **nick.de**
|
||||||
- **nickelodeon:br**
|
- **nickelodeon:br**
|
||||||
@ -615,11 +620,13 @@
|
|||||||
- **PacktPubCourse**
|
- **PacktPubCourse**
|
||||||
- **PandaTV**: 熊猫TV
|
- **PandaTV**: 熊猫TV
|
||||||
- **pandora.tv**: 판도라TV
|
- **pandora.tv**: 판도라TV
|
||||||
|
- **ParamountNetwork**
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
- **Patreon**
|
- **Patreon**
|
||||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||||
- **pcmag**
|
- **pcmag**
|
||||||
- **PearVideo**
|
- **PearVideo**
|
||||||
|
- **PeerTube**
|
||||||
- **People**
|
- **People**
|
||||||
- **PerformGroup**
|
- **PerformGroup**
|
||||||
- **periscope**: Periscope
|
- **periscope**: Periscope
|
||||||
@ -786,7 +793,7 @@
|
|||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
- **Spiegeltv**
|
- **Spiegeltv**
|
||||||
- **Spike**
|
- **sport.francetvinfo.fr**
|
||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBoxEmbed**
|
- **SportBoxEmbed**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
@ -888,6 +895,7 @@
|
|||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
- **TVN24**
|
- **TVN24**
|
||||||
|
- **TVNet**
|
||||||
- **TVNoe**
|
- **TVNoe**
|
||||||
- **TVNow**
|
- **TVNow**
|
||||||
- **TVNowList**
|
- **TVNowList**
|
||||||
|
@ -2,5 +2,5 @@
|
|||||||
universal = True
|
universal = True
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
|
||||||
ignore = E402,E501,E731,E741
|
ignore = E402,E501,E731,E741
|
||||||
|
@ -361,6 +361,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
||||||
|
self.assertEqual(determine_ext('foobar', None), None)
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
@ -519,6 +520,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_age_limit('PG-13'), 13)
|
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||||
self.assertEqual(parse_age_limit('TV-14'), 14)
|
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||||
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||||
|
self.assertEqual(parse_age_limit('TV14'), 14)
|
||||||
|
self.assertEqual(parse_age_limit('TV_G'), 0)
|
||||||
|
|
||||||
def test_parse_duration(self):
|
def test_parse_duration(self):
|
||||||
self.assertEqual(parse_duration(None), None)
|
self.assertEqual(parse_duration(None), None)
|
||||||
|
@ -211,7 +211,7 @@ class YoutubeDL(object):
|
|||||||
At the moment, this is only supported by YouTube.
|
At the moment, this is only supported by YouTube.
|
||||||
proxy: URL of the proxy server to use
|
proxy: URL of the proxy server to use
|
||||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||||
on geo-restricted sites. (Experimental)
|
on geo-restricted sites.
|
||||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||||
support, using fridibi
|
support, using fridibi
|
||||||
@ -259,7 +259,7 @@ class YoutubeDL(object):
|
|||||||
- "warn": only emit a warning
|
- "warn": only emit a warning
|
||||||
- "detect_or_warn": check whether we can do anything
|
- "detect_or_warn": check whether we can do anything
|
||||||
about it, warn otherwise (default)
|
about it, warn otherwise (default)
|
||||||
source_address: (Experimental) Client-side IP address to bind to.
|
source_address: Client-side IP address to bind to.
|
||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
youtube-dl servers for debugging.
|
youtube-dl servers for debugging.
|
||||||
sleep_interval: Number of seconds to sleep before each download when
|
sleep_interval: Number of seconds to sleep before each download when
|
||||||
@ -281,11 +281,14 @@ class YoutubeDL(object):
|
|||||||
match_filter_func in utils.py is one example for this.
|
match_filter_func in utils.py is one example for this.
|
||||||
no_color: Do not emit color codes in output.
|
no_color: Do not emit color codes in output.
|
||||||
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
||||||
HTTP header (experimental)
|
HTTP header
|
||||||
geo_bypass_country:
|
geo_bypass_country:
|
||||||
Two-letter ISO 3166-2 country code that will be used for
|
Two-letter ISO 3166-2 country code that will be used for
|
||||||
explicit geographic restriction bypassing via faking
|
explicit geographic restriction bypassing via faking
|
||||||
X-Forwarded-For HTTP header (experimental)
|
X-Forwarded-For HTTP header
|
||||||
|
geo_bypass_ip_block:
|
||||||
|
IP range in CIDR notation that will be used similarly to
|
||||||
|
geo_bypass_country
|
||||||
|
|
||||||
The following options determine which downloader is picked:
|
The following options determine which downloader is picked:
|
||||||
external_downloader: Executable of the external downloader to call.
|
external_downloader: Executable of the external downloader to call.
|
||||||
@ -302,8 +305,8 @@ class YoutubeDL(object):
|
|||||||
http_chunk_size.
|
http_chunk_size.
|
||||||
|
|
||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
|
||||||
otherwise prefer avconv.
|
otherwise prefer ffmpeg.
|
||||||
postprocessor_args: A list of additional command-line arguments for the
|
postprocessor_args: A list of additional command-line arguments for the
|
||||||
postprocessor.
|
postprocessor.
|
||||||
|
|
||||||
@ -1479,23 +1482,28 @@ class YoutubeDL(object):
|
|||||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||||
|
|
||||||
|
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||||
|
cc = info_dict.get(cc_kind)
|
||||||
|
if cc:
|
||||||
|
for _, subtitle in cc.items():
|
||||||
|
for subtitle_format in subtitle:
|
||||||
|
if subtitle_format.get('url'):
|
||||||
|
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||||
|
if subtitle_format.get('ext') is None:
|
||||||
|
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||||
|
|
||||||
|
automatic_captions = info_dict.get('automatic_captions')
|
||||||
subtitles = info_dict.get('subtitles')
|
subtitles = info_dict.get('subtitles')
|
||||||
if subtitles:
|
|
||||||
for _, subtitle in subtitles.items():
|
|
||||||
for subtitle_format in subtitle:
|
|
||||||
if subtitle_format.get('url'):
|
|
||||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
|
||||||
if subtitle_format.get('ext') is None:
|
|
||||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
|
||||||
|
|
||||||
if self.params.get('listsubtitles', False):
|
if self.params.get('listsubtitles', False):
|
||||||
if 'automatic_captions' in info_dict:
|
if 'automatic_captions' in info_dict:
|
||||||
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
self.list_subtitles(
|
||||||
|
info_dict['id'], automatic_captions, 'automatic captions')
|
||||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||||
return
|
return
|
||||||
|
|
||||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||||
info_dict['id'], subtitles,
|
info_dict['id'], subtitles, automatic_captions)
|
||||||
info_dict.get('automatic_captions'))
|
|
||||||
|
|
||||||
# We now pick which formats have to be downloaded
|
# We now pick which formats have to be downloaded
|
||||||
if info_dict.get('formats') is None:
|
if info_dict.get('formats') is None:
|
||||||
|
@ -430,6 +430,7 @@ def _real_main(argv=None):
|
|||||||
'config_location': opts.config_location,
|
'config_location': opts.config_location,
|
||||||
'geo_bypass': opts.geo_bypass,
|
'geo_bypass': opts.geo_bypass,
|
||||||
'geo_bypass_country': opts.geo_bypass_country,
|
'geo_bypass_country': opts.geo_bypass_country,
|
||||||
|
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
||||||
# just for deprecation check
|
# just for deprecation check
|
||||||
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
||||||
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
||||||
|
@ -2787,6 +2787,12 @@ except NameError: # Python 3
|
|||||||
compat_numeric_types = (int, float, complex)
|
compat_numeric_types = (int, float, complex)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_integer_types = (int, long)
|
||||||
|
except NameError: # Python 3
|
||||||
|
compat_integer_types = (int, )
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||||
host, port = address
|
host, port = address
|
||||||
@ -2974,6 +2980,7 @@ __all__ = [
|
|||||||
'compat_http_client',
|
'compat_http_client',
|
||||||
'compat_http_server',
|
'compat_http_server',
|
||||||
'compat_input',
|
'compat_input',
|
||||||
|
'compat_integer_types',
|
||||||
'compat_itertools_count',
|
'compat_itertools_count',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
|
@ -45,7 +45,6 @@ class FileDownloader(object):
|
|||||||
min_filesize: Skip files smaller than this size
|
min_filesize: Skip files smaller than this size
|
||||||
max_filesize: Skip files larger than this size
|
max_filesize: Skip files larger than this size
|
||||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||||
(experimental)
|
|
||||||
external_downloader_args: A list of additional command-line arguments for the
|
external_downloader_args: A list of additional command-line arguments for the
|
||||||
external downloader.
|
external downloader.
|
||||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||||
|
@ -217,10 +217,11 @@ class HttpFD(FileDownloader):
|
|||||||
before = start # start measuring
|
before = start # start measuring
|
||||||
|
|
||||||
def retry(e):
|
def retry(e):
|
||||||
if ctx.tmpfilename != '-':
|
to_stdout = ctx.tmpfilename == '-'
|
||||||
|
if not to_stdout:
|
||||||
ctx.stream.close()
|
ctx.stream.close()
|
||||||
ctx.stream = None
|
ctx.stream = None
|
||||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
raise RetryDownload(e)
|
raise RetryDownload(e)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -29,66 +29,68 @@ class RtmpFD(FileDownloader):
|
|||||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||||
cursor_in_new_line = True
|
cursor_in_new_line = True
|
||||||
proc_stderr_closed = False
|
proc_stderr_closed = False
|
||||||
while not proc_stderr_closed:
|
try:
|
||||||
# read line from stderr
|
while not proc_stderr_closed:
|
||||||
line = ''
|
# read line from stderr
|
||||||
while True:
|
line = ''
|
||||||
char = proc.stderr.read(1)
|
while True:
|
||||||
if not char:
|
char = proc.stderr.read(1)
|
||||||
proc_stderr_closed = True
|
if not char:
|
||||||
break
|
proc_stderr_closed = True
|
||||||
if char in [b'\r', b'\n']:
|
break
|
||||||
break
|
if char in [b'\r', b'\n']:
|
||||||
line += char.decode('ascii', 'replace')
|
break
|
||||||
if not line:
|
line += char.decode('ascii', 'replace')
|
||||||
# proc_stderr_closed is True
|
if not line:
|
||||||
continue
|
# proc_stderr_closed is True
|
||||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
continue
|
||||||
if mobj:
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
|
||||||
percent = float(mobj.group(2))
|
|
||||||
if not resume_percent:
|
|
||||||
resume_percent = percent
|
|
||||||
resume_downloaded_data_len = downloaded_data_len
|
|
||||||
time_now = time.time()
|
|
||||||
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
|
||||||
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
|
||||||
data_len = None
|
|
||||||
if percent > 0:
|
|
||||||
data_len = int(downloaded_data_len * 100 / percent)
|
|
||||||
self._hook_progress({
|
|
||||||
'status': 'downloading',
|
|
||||||
'downloaded_bytes': downloaded_data_len,
|
|
||||||
'total_bytes_estimate': data_len,
|
|
||||||
'tmpfilename': tmpfilename,
|
|
||||||
'filename': filename,
|
|
||||||
'eta': eta,
|
|
||||||
'elapsed': time_now - start,
|
|
||||||
'speed': speed,
|
|
||||||
})
|
|
||||||
cursor_in_new_line = False
|
|
||||||
else:
|
|
||||||
# no percent for live streams
|
|
||||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
|
||||||
if mobj:
|
if mobj:
|
||||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||||
|
percent = float(mobj.group(2))
|
||||||
|
if not resume_percent:
|
||||||
|
resume_percent = percent
|
||||||
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||||
|
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||||
|
data_len = None
|
||||||
|
if percent > 0:
|
||||||
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
|
'total_bytes_estimate': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
'eta': eta,
|
||||||
'elapsed': time_now - start,
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
cursor_in_new_line = False
|
cursor_in_new_line = False
|
||||||
elif self.params.get('verbose', False):
|
else:
|
||||||
if not cursor_in_new_line:
|
# no percent for live streams
|
||||||
self.to_screen('')
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||||
cursor_in_new_line = True
|
if mobj:
|
||||||
self.to_screen('[rtmpdump] ' + line)
|
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||||
proc.wait()
|
time_now = time.time()
|
||||||
|
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': downloaded_data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'downloading',
|
||||||
|
'elapsed': time_now - start,
|
||||||
|
'speed': speed,
|
||||||
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
|
elif self.params.get('verbose', False):
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen('')
|
||||||
|
cursor_in_new_line = True
|
||||||
|
self.to_screen('[rtmpdump] ' + line)
|
||||||
|
finally:
|
||||||
|
proc.wait()
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen('')
|
self.to_screen('')
|
||||||
return proc.returncode
|
return proc.returncode
|
||||||
@ -163,7 +165,15 @@ class RtmpFD(FileDownloader):
|
|||||||
RD_INCOMPLETE = 2
|
RD_INCOMPLETE = 2
|
||||||
RD_NO_CONNECT = 3
|
RD_NO_CONNECT = 3
|
||||||
|
|
||||||
retval = run_rtmpdump(args)
|
started = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
retval = run_rtmpdump(args)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
if not info_dict.get('is_live'):
|
||||||
|
raise
|
||||||
|
retval = RD_SUCCESS
|
||||||
|
self.to_screen('\n[rtmpdump] Interrupted by user')
|
||||||
|
|
||||||
if retval == RD_NO_CONNECT:
|
if retval == RD_NO_CONNECT:
|
||||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||||
@ -171,7 +181,7 @@ class RtmpFD(FileDownloader):
|
|||||||
|
|
||||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
args = basic_args + ['--resume']
|
args = basic_args + ['--resume']
|
||||||
if retval == RD_FAILED:
|
if retval == RD_FAILED:
|
||||||
@ -188,13 +198,14 @@ class RtmpFD(FileDownloader):
|
|||||||
break
|
break
|
||||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen('[rtmpdump] %s bytes' % fsize)
|
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
'total_bytes': fsize,
|
'total_bytes': fsize,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': time.time() - started,
|
||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
@ -105,22 +105,22 @@ class ABCIE(InfoExtractor):
|
|||||||
|
|
||||||
class ABCIViewIE(InfoExtractor):
|
class ABCIViewIE(InfoExtractor):
|
||||||
IE_NAME = 'abc.net.au:iview'
|
IE_NAME = 'abc.net.au:iview'
|
||||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||||
_GEO_COUNTRIES = ['AU']
|
_GEO_COUNTRIES = ['AU']
|
||||||
|
|
||||||
# ABC iview programs are normally available for 14 days only.
|
# ABC iview programs are normally available for 14 days only.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
|
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
||||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ZY9247A021S00',
|
'id': 'ZX9371A050S00',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Gaston's Visit",
|
'title': "Gaston's Birthday",
|
||||||
'series': "Ben And Holly's Little Kingdom",
|
'series': "Ben And Holly's Little Kingdom",
|
||||||
'description': 'md5:18db170ad71cf161e006a4c688e33155',
|
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
||||||
'upload_date': '20180318',
|
'upload_date': '20180604',
|
||||||
'uploader_id': 'abc4kids',
|
'uploader_id': 'abc4kids',
|
||||||
'timestamp': 1521400959,
|
'timestamp': 1528140219,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -129,17 +129,16 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_params = self._download_json(
|
||||||
video_params = self._parse_json(self._search_regex(
|
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
||||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
|
||||||
title = video_params.get('title') or video_params['seriesTitle']
|
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
|
||||||
|
|
||||||
house_number = video_params.get('episodeHouseNumber')
|
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
|
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||||
int(time.time()), house_number)
|
int(time.time()), house_number)
|
||||||
sig = hmac.new(
|
sig = hmac.new(
|
||||||
'android.content.res.Resources'.encode('utf-8'),
|
b'android.content.res.Resources',
|
||||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||||
token = self._download_webpage(
|
token = self._download_webpage(
|
||||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||||
@ -169,18 +168,26 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
'ext': 'vtt',
|
'ext': 'vtt',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
is_live = video_params.get('livestream') == '1'
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': unescapeHTML(title),
|
'title': title,
|
||||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
'description': video_params.get('description'),
|
||||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
'thumbnail': video_params.get('thumbnail'),
|
||||||
'duration': int_or_none(video_params.get('eventDuration')),
|
'duration': int_or_none(video_params.get('eventDuration')),
|
||||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
'season_number': int_or_none(self._search_regex(
|
||||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
||||||
|
'episode_number': int_or_none(self._search_regex(
|
||||||
|
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||||
|
'episode_id': house_number,
|
||||||
'uploader_id': video_params.get('channel'),
|
'uploader_id': video_params.get('channel'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_cbc_decrypt
|
from ..aes import aes_cbc_decrypt
|
||||||
@ -12,9 +15,12 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
bytes_to_intlist,
|
bytes_to_intlist,
|
||||||
|
bytes_to_long,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
|
long_to_bytes,
|
||||||
|
pkcs1pad,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -35,6 +41,7 @@ class ADNIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
|
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_path, video_id):
|
||||||
if not sub_path:
|
if not sub_path:
|
||||||
@ -42,16 +49,14 @@ class ADNIE(InfoExtractor):
|
|||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
urljoin(self._BASE_URL, sub_path),
|
urljoin(self._BASE_URL, sub_path),
|
||||||
video_id, fatal=False, headers={
|
video_id, fatal=False)
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
|
||||||
})
|
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
|
bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
@ -112,11 +117,24 @@ class ADNIE(InfoExtractor):
|
|||||||
error = None
|
error = None
|
||||||
if not links:
|
if not links:
|
||||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||||
links_data = self._download_json(urljoin(
|
token = options['token']
|
||||||
self._BASE_URL, links_url), video_id)
|
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||||
|
message = bytes_to_intlist(json.dumps({
|
||||||
|
'k': self._K,
|
||||||
|
'e': 60,
|
||||||
|
't': token,
|
||||||
|
}))
|
||||||
|
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||||
|
n, e = self._RSA_KEY
|
||||||
|
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||||
|
authorization = base64.b64encode(encrypted_message).decode()
|
||||||
|
links_data = self._download_json(
|
||||||
|
urljoin(self._BASE_URL, links_url), video_id, headers={
|
||||||
|
'Authorization': 'Bearer ' + authorization,
|
||||||
|
})
|
||||||
links = links_data.get('links') or {}
|
links = links_data.get('links') or {}
|
||||||
metas = metas or links_data.get('meta') or {}
|
metas = metas or links_data.get('meta') or {}
|
||||||
sub_path = sub_path or links_data.get('subtitles')
|
sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
|
||||||
error = links_data.get('error')
|
error = links_data.get('error')
|
||||||
title = metas.get('title') or video_info['title']
|
title = metas.get('title') or video_info['title']
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -277,7 +277,9 @@ class AnvatoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
self._initialize_geo_bypass({
|
||||||
|
'countries': smuggled_data.get('geo_countries'),
|
||||||
|
})
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||||
|
94
youtube_dl/extractor/apa.py
Normal file
94
youtube_dl/extractor/apa.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class APAIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||||
|
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jjv85FdZ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 254,
|
||||||
|
'timestamp': 1519211149,
|
||||||
|
'upload_date': '20180221',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
jwplatform_id = self._search_regex(
|
||||||
|
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||||
|
'jwplatform id', default=None)
|
||||||
|
|
||||||
|
if jwplatform_id:
|
||||||
|
return self.url_result(
|
||||||
|
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
|
sources = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in sources:
|
||||||
|
if not isinstance(source, dict):
|
||||||
|
continue
|
||||||
|
source_url = source.get('file')
|
||||||
|
if not source_url or not isinstance(source_url, compat_str):
|
||||||
|
continue
|
||||||
|
ext = determine_ext(source_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': source_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'thumbnail', fatal=False, group='url')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -74,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -5,13 +5,12 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AudiMediaIE(InfoExtractor):
|
class AudiMediaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
||||||
'md5': '79a8b71c46d49042609795ab59779b66',
|
'md5': '79a8b71c46d49042609795ab59779b66',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -24,41 +23,46 @@ class AudiMediaIE(InfoExtractor):
|
|||||||
'duration': 74022,
|
'duration': 74022,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
|
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||||
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
raw_payload = self._search_regex([
|
raw_payload = self._search_regex([
|
||||||
r'class="amtv-embed"[^>]+id="([^"]+)"',
|
r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
|
||||||
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
|
r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
|
||||||
|
r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
|
||||||
|
r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
|
||||||
|
r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
|
||||||
], webpage, 'raw payload')
|
], webpage, 'raw payload')
|
||||||
_, stage_mode, video_id, lang = raw_payload.split('-')
|
_, stage_mode, video_id, _ = raw_payload.split('-')
|
||||||
|
|
||||||
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
||||||
if stage_mode not in ('s', 'e'):
|
if stage_mode not in ('s', 'e'):
|
||||||
request = sanitized_Request(
|
video_data = self._download_json(
|
||||||
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
|
'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
|
||||||
headers={'X-Auth-Token': self._AUTH_TOKEN})
|
video_id, query={
|
||||||
json_data = self._download_json(request, video_id)['results']
|
'embed[]': ['video_versions', 'thumbnail_image'],
|
||||||
|
})['results']
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
stream_url_hls = json_data.get('stream_url_hls')
|
stream_url_hls = video_data.get('stream_url_hls')
|
||||||
if stream_url_hls:
|
if stream_url_hls:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
stream_url_hls, video_id, 'mp4',
|
stream_url_hls, video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
stream_url_hds = json_data.get('stream_url_hds')
|
stream_url_hds = video_data.get('stream_url_hds')
|
||||||
if stream_url_hds:
|
if stream_url_hds:
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
stream_url_hds + '?hdcore=3.4.0',
|
stream_url_hds + '?hdcore=3.4.0',
|
||||||
video_id, f4m_id='hds', fatal=False))
|
video_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
for video_version in json_data.get('video_versions'):
|
for video_version in video_data.get('video_versions', []):
|
||||||
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
||||||
if not video_version_url:
|
if not video_version_url:
|
||||||
continue
|
continue
|
||||||
@ -79,11 +83,11 @@ class AudiMediaIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': json_data['title'],
|
'title': video_data['title'],
|
||||||
'description': json_data.get('subtitle'),
|
'description': video_data.get('subtitle'),
|
||||||
'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
|
'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
|
||||||
'timestamp': parse_iso8601(json_data.get('publication_date')),
|
'timestamp': parse_iso8601(video_data.get('publication_date')),
|
||||||
'duration': int_or_none(json_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'view_count': int_or_none(json_data.get('view_count')),
|
'view_count': int_or_none(video_data.get('view_count')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ class AudiomackIE(InfoExtractor):
|
|||||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': api_response.get('id', album_url_tag),
|
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||||
'uploader': api_response.get('artist'),
|
'uploader': api_response.get('artist'),
|
||||||
'title': api_response.get('title'),
|
'title': api_response.get('title'),
|
||||||
'url': api_response['url'],
|
'url': api_response['url'],
|
||||||
|
@ -44,7 +44,7 @@ class BambuserIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
@ -20,7 +21,6 @@ from ..utils import (
|
|||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@ -333,14 +333,9 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
self._raise_extractor_error(last_exception)
|
self._raise_extractor_error(last_exception)
|
||||||
|
|
||||||
def _download_media_selector_url(self, url, programme_id=None):
|
def _download_media_selector_url(self, url, programme_id=None):
|
||||||
try:
|
media_selection = self._download_xml(
|
||||||
media_selection = self._download_xml(
|
url, programme_id, 'Downloading media selection XML',
|
||||||
url, programme_id, 'Downloading media selection XML')
|
expected_status=(403, 404))
|
||||||
except ExtractorError as ee:
|
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
|
|
||||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
return self._process_media_selector(media_selection, programme_id)
|
return self._process_media_selector(media_selection, programme_id)
|
||||||
|
|
||||||
def _process_media_selector(self, media_selection, programme_id):
|
def _process_media_selector(self, media_selection, programme_id):
|
||||||
@ -772,6 +767,17 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p06556y7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
|
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -994,6 +1000,36 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bbc3_config = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
|
'bbcthree config', default='{}'),
|
||||||
|
playlist_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if bbc3_config:
|
||||||
|
bbc3_playlist = try_get(
|
||||||
|
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
||||||
|
dict)
|
||||||
|
if bbc3_playlist:
|
||||||
|
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||||
|
thumbnail = bbc3_playlist.get('holdingImageURL')
|
||||||
|
entries = []
|
||||||
|
for bbc3_item in bbc3_playlist['items']:
|
||||||
|
programme_id = bbc3_item.get('versionID')
|
||||||
|
if not programme_id:
|
||||||
|
continue
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': programme_id,
|
||||||
|
'title': playlist_title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
|
@ -12,7 +12,7 @@ class BellMediaIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
ctv|
|
ctv|
|
||||||
tsn|
|
tsn|
|
||||||
bnn|
|
bnn(?:bloomberg)?|
|
||||||
thecomedynetwork|
|
thecomedynetwork|
|
||||||
discovery|
|
discovery|
|
||||||
discoveryvelocity|
|
discoveryvelocity|
|
||||||
@ -27,17 +27,16 @@ class BellMediaIE(InfoExtractor):
|
|||||||
much\.com
|
much\.com
|
||||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '706966',
|
'id': '1403070',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
'title': 'David Cockfield\'s Top Picks',
|
||||||
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||||
'upload_date': '20150919',
|
'upload_date': '20180525',
|
||||||
'timestamp': 1442624700,
|
'timestamp': 1527288600,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['HTTP Error 404'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -70,6 +69,7 @@ class BellMediaIE(InfoExtractor):
|
|||||||
'investigationdiscovery': 'invdisc',
|
'investigationdiscovery': 'invdisc',
|
||||||
'animalplanet': 'aniplan',
|
'animalplanet': 'aniplan',
|
||||||
'etalk': 'ctv',
|
'etalk': 'ctv',
|
||||||
|
'bnnbloomberg': 'bnn',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -114,7 +114,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
if 'anime/' not in url:
|
if 'anime/' not in url:
|
||||||
cid = self._search_regex(
|
cid = self._search_regex(
|
||||||
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
|
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||||
default=None
|
default=None
|
||||||
) or compat_parse_qs(self._search_regex(
|
) or compat_parse_qs(self._search_regex(
|
||||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||||
|
@ -572,7 +572,8 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
container = source.get('container')
|
container = source.get('container')
|
||||||
ext = mimetype2ext(source.get('type'))
|
ext = mimetype2ext(source.get('type'))
|
||||||
src = source.get('src')
|
src = source.get('src')
|
||||||
if ext == 'ism' or container == 'WVM':
|
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||||
|
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||||
continue
|
continue
|
||||||
elif ext == 'm3u8' or container == 'M2TS':
|
elif ext == 'm3u8' or container == 'M2TS':
|
||||||
if not src:
|
if not src:
|
||||||
@ -629,6 +630,14 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
'format_id': build_format_id('rtmp'),
|
'format_id': build_format_id('rtmp'),
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
if not formats:
|
||||||
|
# for sonyliv.com DRM protected videos
|
||||||
|
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||||
|
if s3_source_url:
|
||||||
|
formats.append({
|
||||||
|
'url': s3_source_url,
|
||||||
|
'format_id': 'source',
|
||||||
|
})
|
||||||
|
|
||||||
errors = json_data.get('errors')
|
errors = json_data.get('errors')
|
||||||
if not formats and errors:
|
if not formats and errors:
|
||||||
@ -669,7 +678,10 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
self._initialize_geo_bypass({
|
||||||
|
'countries': smuggled_data.get('geo_countries'),
|
||||||
|
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
|
||||||
|
})
|
||||||
|
|
||||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
42
youtube_dl/extractor/businessinsider.py
Normal file
42
youtube_dl/extractor/businessinsider.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .jwplatform import JWPlatformIE
|
||||||
|
|
||||||
|
|
||||||
|
class BusinessInsiderIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
|
||||||
|
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hZRllCfw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Here's how much radiation you're exposed to in everyday life",
|
||||||
|
'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
|
||||||
|
'upload_date': '20170709',
|
||||||
|
'timestamp': 1499606400,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
jwplatform_id = self._search_regex(
|
||||||
|
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
|
||||||
|
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
|
||||||
|
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
|
||||||
|
webpage, 'jwplatform id')
|
||||||
|
return self.url_result(
|
||||||
|
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||||
|
video_id=video_id)
|
96
youtube_dl/extractor/cammodels.py
Normal file
96
youtube_dl/extractor/cammodels.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CamModelsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, user_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
|
manifest_root = self._html_search_regex(
|
||||||
|
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
||||||
|
|
||||||
|
if not manifest_root:
|
||||||
|
ERRORS = (
|
||||||
|
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
||||||
|
('in a private show', 'This user is in a private show'),
|
||||||
|
('is currently performing LIVE', 'This model is currently performing live'),
|
||||||
|
)
|
||||||
|
for pattern, message in ERRORS:
|
||||||
|
if pattern in webpage:
|
||||||
|
error = message
|
||||||
|
expected = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
error = 'Unable to find manifest URL root'
|
||||||
|
expected = False
|
||||||
|
raise ExtractorError(error, expected=expected)
|
||||||
|
|
||||||
|
manifest = self._download_json(
|
||||||
|
'%s%s.json' % (manifest_root, user_id), user_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_dict in manifest['formats'].items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
encodings = format_dict.get('encodings')
|
||||||
|
if not isinstance(encodings, list):
|
||||||
|
continue
|
||||||
|
vcodec = format_dict.get('videoCodec')
|
||||||
|
acodec = format_dict.get('audioCodec')
|
||||||
|
for media in encodings:
|
||||||
|
if not isinstance(media, dict):
|
||||||
|
continue
|
||||||
|
media_url = media.get('location')
|
||||||
|
if not media_url or not isinstance(media_url, compat_str):
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_id_list = [format_id]
|
||||||
|
height = int_or_none(media.get('videoHeight'))
|
||||||
|
if height is not None:
|
||||||
|
format_id_list.append('%dp' % height)
|
||||||
|
f = {
|
||||||
|
'url': media_url,
|
||||||
|
'format_id': '-'.join(format_id_list),
|
||||||
|
'width': int_or_none(media.get('videoWidth')),
|
||||||
|
'height': height,
|
||||||
|
'vbr': int_or_none(media.get('videoKbps')),
|
||||||
|
'abr': int_or_none(media.get('audioKbps')),
|
||||||
|
'fps': int_or_none(media.get('fps')),
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'acodec': acodec,
|
||||||
|
}
|
||||||
|
if 'rtmp' in format_id:
|
||||||
|
f['ext'] = 'flv'
|
||||||
|
elif 'hls' in format_id:
|
||||||
|
f.update({
|
||||||
|
'ext': 'mp4',
|
||||||
|
# hls skips fragments, preferring rtmp
|
||||||
|
'preference': -1,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': user_id,
|
||||||
|
'title': self._live_title(user_id),
|
||||||
|
'is_live': True,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
69
youtube_dl/extractor/camtube.py
Normal file
69
youtube_dl/extractor/camtube.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CamTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
|
||||||
|
'display_id': 'minafay-030618-1136-chaturbate-female',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'minafay-030618-1136-chaturbate-female',
|
||||||
|
'duration': 1274,
|
||||||
|
'timestamp': 1528018608,
|
||||||
|
'upload_date': '20180603',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_API_BASE = 'https://api.camtube.co'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
token = self._download_json(
|
||||||
|
'%s/rpc/session/new' % self._API_BASE, display_id,
|
||||||
|
'Downloading session token')['token']
|
||||||
|
|
||||||
|
self._set_cookie('api.camtube.co', 'session', token)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'%s/recordings/%s' % (self._API_BASE, display_id), display_id,
|
||||||
|
headers={'Referer': url})
|
||||||
|
|
||||||
|
video_id = video['uuid']
|
||||||
|
timestamp = unified_timestamp(video.get('createdAt'))
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
view_count = int_or_none(video.get('viewCount'))
|
||||||
|
like_count = int_or_none(video.get('likeCount'))
|
||||||
|
creator = video.get('stageName')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': '%s/recordings/%s/manifest.m3u8'
|
||||||
|
% (self._API_BASE, video_id),
|
||||||
|
'format_id': 'hls',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': display_id,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'creator': creator,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -17,9 +17,11 @@ from ..utils import (
|
|||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
|
strip_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -129,15 +131,23 @@ class CBCIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||||
entries = [
|
entries = [
|
||||||
self._extract_player_init(player_init, display_id)
|
self._extract_player_init(player_init, display_id)
|
||||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||||
|
media_ids = []
|
||||||
|
for media_id_re in (
|
||||||
|
r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
|
||||||
|
r'<div[^>]+\bid=["\']player-(\d+)',
|
||||||
|
r'guid["\']\s*:\s*["\'](\d+)'):
|
||||||
|
media_ids.extend(re.findall(media_id_re, webpage))
|
||||||
entries.extend([
|
entries.extend([
|
||||||
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
|
for media_id in orderedSet(media_ids)])
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, display_id,
|
entries, display_id, strip_or_none(title),
|
||||||
self._og_search_title(webpage, fatal=False),
|
|
||||||
self._og_search_description(webpage))
|
self._og_search_description(webpage))
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,7 +31,8 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
m3u8_urls = []
|
m3u8_urls = []
|
||||||
|
|
||||||
|
60
youtube_dl/extractor/cloudflarestream.py
Normal file
60
youtube_dl/extractor/cloudflarestream.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CloudflareStreamIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:watch\.)?cloudflarestream\.com/|
|
||||||
|
embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=
|
||||||
|
)
|
||||||
|
(?P<id>[\da-f]+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
|
||||||
|
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False)
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
|
||||||
|
video_id, mpd_id='dash', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -19,6 +19,7 @@ from ..compat import (
|
|||||||
compat_cookies,
|
compat_cookies,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
|
compat_integer_types,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
@ -339,15 +340,17 @@ class InfoExtractor(object):
|
|||||||
_GEO_BYPASS attribute may be set to False in order to disable
|
_GEO_BYPASS attribute may be set to False in order to disable
|
||||||
geo restriction bypass mechanisms for a particular extractor.
|
geo restriction bypass mechanisms for a particular extractor.
|
||||||
Though it won't disable explicit geo restriction bypass based on
|
Though it won't disable explicit geo restriction bypass based on
|
||||||
country code provided with geo_bypass_country. (experimental)
|
country code provided with geo_bypass_country.
|
||||||
|
|
||||||
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
|
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
|
||||||
countries for this extractor. One of these countries will be used by
|
countries for this extractor. One of these countries will be used by
|
||||||
geo restriction bypass mechanism right away in order to bypass
|
geo restriction bypass mechanism right away in order to bypass
|
||||||
geo restriction, of course, if the mechanism is not disabled. (experimental)
|
geo restriction, of course, if the mechanism is not disabled.
|
||||||
|
|
||||||
NB: both these geo attributes are experimental and may change in future
|
_GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
|
||||||
or be completely removed.
|
IP blocks in CIDR notation for this extractor. One of these IP blocks
|
||||||
|
will be used by geo restriction bypass mechanism similarly
|
||||||
|
to _GEO_COUNTRIES.
|
||||||
|
|
||||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||||
in order to warn the users and skip the tests.
|
in order to warn the users and skip the tests.
|
||||||
@ -358,6 +361,7 @@ class InfoExtractor(object):
|
|||||||
_x_forwarded_for_ip = None
|
_x_forwarded_for_ip = None
|
||||||
_GEO_BYPASS = True
|
_GEO_BYPASS = True
|
||||||
_GEO_COUNTRIES = None
|
_GEO_COUNTRIES = None
|
||||||
|
_GEO_IP_BLOCKS = None
|
||||||
_WORKING = True
|
_WORKING = True
|
||||||
|
|
||||||
def __init__(self, downloader=None):
|
def __init__(self, downloader=None):
|
||||||
@ -392,12 +396,15 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
"""Initializes an instance (authentication, etc)."""
|
"""Initializes an instance (authentication, etc)."""
|
||||||
self._initialize_geo_bypass(self._GEO_COUNTRIES)
|
self._initialize_geo_bypass({
|
||||||
|
'countries': self._GEO_COUNTRIES,
|
||||||
|
'ip_blocks': self._GEO_IP_BLOCKS,
|
||||||
|
})
|
||||||
if not self._ready:
|
if not self._ready:
|
||||||
self._real_initialize()
|
self._real_initialize()
|
||||||
self._ready = True
|
self._ready = True
|
||||||
|
|
||||||
def _initialize_geo_bypass(self, countries):
|
def _initialize_geo_bypass(self, geo_bypass_context):
|
||||||
"""
|
"""
|
||||||
Initialize geo restriction bypass mechanism.
|
Initialize geo restriction bypass mechanism.
|
||||||
|
|
||||||
@ -408,28 +415,82 @@ class InfoExtractor(object):
|
|||||||
HTTP requests.
|
HTTP requests.
|
||||||
|
|
||||||
This method will be used for initial geo bypass mechanism initialization
|
This method will be used for initial geo bypass mechanism initialization
|
||||||
during the instance initialization with _GEO_COUNTRIES.
|
during the instance initialization with _GEO_COUNTRIES and
|
||||||
|
_GEO_IP_BLOCKS.
|
||||||
|
|
||||||
You may also manually call it from extractor's code if geo countries
|
You may also manually call it from extractor's code if geo bypass
|
||||||
information is not available beforehand (e.g. obtained during
|
information is not available beforehand (e.g. obtained during
|
||||||
extraction) or due to some another reason.
|
extraction) or due to some other reason. In this case you should pass
|
||||||
|
this information in geo bypass context passed as first argument. It may
|
||||||
|
contain following fields:
|
||||||
|
|
||||||
|
countries: List of geo unrestricted countries (similar
|
||||||
|
to _GEO_COUNTRIES)
|
||||||
|
ip_blocks: List of geo unrestricted IP blocks in CIDR notation
|
||||||
|
(similar to _GEO_IP_BLOCKS)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not self._x_forwarded_for_ip:
|
if not self._x_forwarded_for_ip:
|
||||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
|
||||||
# If there is no explicit country for geo bypass specified and
|
# Geo bypass mechanism is explicitly disabled by user
|
||||||
# the extractor is known to be geo restricted let's fake IP
|
if not self._downloader.params.get('geo_bypass', True):
|
||||||
# as X-Forwarded-For right away.
|
return
|
||||||
if (not country_code and
|
|
||||||
self._GEO_BYPASS and
|
if not geo_bypass_context:
|
||||||
self._downloader.params.get('geo_bypass', True) and
|
geo_bypass_context = {}
|
||||||
countries):
|
|
||||||
country_code = random.choice(countries)
|
# Backward compatibility: previously _initialize_geo_bypass
|
||||||
if country_code:
|
# expected a list of countries, some 3rd party code may still use
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
# it this way
|
||||||
|
if isinstance(geo_bypass_context, (list, tuple)):
|
||||||
|
geo_bypass_context = {
|
||||||
|
'countries': geo_bypass_context,
|
||||||
|
}
|
||||||
|
|
||||||
|
# The whole point of geo bypass mechanism is to fake IP
|
||||||
|
# as X-Forwarded-For HTTP header based on some IP block or
|
||||||
|
# country code.
|
||||||
|
|
||||||
|
# Path 1: bypassing based on IP block in CIDR notation
|
||||||
|
|
||||||
|
# Explicit IP block specified by user, use it right away
|
||||||
|
# regardless of whether extractor is geo bypassable or not
|
||||||
|
ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
|
||||||
|
|
||||||
|
# Otherwise use random IP block from geo bypass context but only
|
||||||
|
# if extractor is known as geo bypassable
|
||||||
|
if not ip_block:
|
||||||
|
ip_blocks = geo_bypass_context.get('ip_blocks')
|
||||||
|
if self._GEO_BYPASS and ip_blocks:
|
||||||
|
ip_block = random.choice(ip_blocks)
|
||||||
|
|
||||||
|
if ip_block:
|
||||||
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
||||||
|
if self._downloader.params.get('verbose', False):
|
||||||
|
self._downloader.to_screen(
|
||||||
|
'[debug] Using fake IP %s as X-Forwarded-For.'
|
||||||
|
% self._x_forwarded_for_ip)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Path 2: bypassing based on country code
|
||||||
|
|
||||||
|
# Explicit country code specified by user, use it right away
|
||||||
|
# regardless of whether extractor is geo bypassable or not
|
||||||
|
country = self._downloader.params.get('geo_bypass_country', None)
|
||||||
|
|
||||||
|
# Otherwise use random country code from geo bypass context but
|
||||||
|
# only if extractor is known as geo bypassable
|
||||||
|
if not country:
|
||||||
|
countries = geo_bypass_context.get('countries')
|
||||||
|
if self._GEO_BYPASS and countries:
|
||||||
|
country = random.choice(countries)
|
||||||
|
|
||||||
|
if country:
|
||||||
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self._downloader.to_screen(
|
self._downloader.to_screen(
|
||||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
% (self._x_forwarded_for_ip, country.upper()))
|
||||||
|
|
||||||
def extract(self, url):
|
def extract(self, url):
|
||||||
"""Extracts URL information and returns it in list of dicts."""
|
"""Extracts URL information and returns it in list of dicts."""
|
||||||
@ -488,8 +549,26 @@ class InfoExtractor(object):
|
|||||||
def IE_NAME(self):
|
def IE_NAME(self):
|
||||||
return compat_str(type(self).__name__[:-2])
|
return compat_str(type(self).__name__[:-2])
|
||||||
|
|
||||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
@staticmethod
|
||||||
""" Returns the response handle """
|
def __can_accept_status_code(err, expected_status):
|
||||||
|
assert isinstance(err, compat_urllib_error.HTTPError)
|
||||||
|
if expected_status is None:
|
||||||
|
return False
|
||||||
|
if isinstance(expected_status, compat_integer_types):
|
||||||
|
return err.code == expected_status
|
||||||
|
elif isinstance(expected_status, (list, tuple)):
|
||||||
|
return err.code in expected_status
|
||||||
|
elif callable(expected_status):
|
||||||
|
return expected_status(err.code) is True
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
|
||||||
|
"""
|
||||||
|
Return the response handle.
|
||||||
|
|
||||||
|
See _download_webpage docstring for arguments specification.
|
||||||
|
"""
|
||||||
if note is None:
|
if note is None:
|
||||||
self.report_download_webpage(video_id)
|
self.report_download_webpage(video_id)
|
||||||
elif note is not False:
|
elif note is not False:
|
||||||
@ -518,6 +597,10 @@ class InfoExtractor(object):
|
|||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
if isinstance(err, compat_urllib_error.HTTPError):
|
||||||
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
|
return err.fp
|
||||||
|
|
||||||
if errnote is False:
|
if errnote is False:
|
||||||
return False
|
return False
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
@ -530,13 +613,17 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning(errmsg)
|
self._downloader.report_warning(errmsg)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||||
""" Returns a tuple (page content as string, URL handle) """
|
"""
|
||||||
|
Return a tuple (page content as string, URL handle).
|
||||||
|
|
||||||
|
See _download_webpage docstring for arguments specification.
|
||||||
|
"""
|
||||||
# Strip hashes from the URL (#1038)
|
# Strip hashes from the URL (#1038)
|
||||||
if isinstance(url_or_request, (compat_str, str)):
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
url_or_request = url_or_request.partition('#')[0]
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
|
|
||||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
|
||||||
if urlh is False:
|
if urlh is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
return False
|
return False
|
||||||
@ -625,13 +712,52 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
|
def _download_webpage(
|
||||||
""" Returns the data of the page as a string """
|
self, url_or_request, video_id, note=None, errnote=None,
|
||||||
|
fatal=True, tries=1, timeout=5, encoding=None, data=None,
|
||||||
|
headers={}, query={}, expected_status=None):
|
||||||
|
"""
|
||||||
|
Return the data of the page as a string.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
url_or_request -- plain text URL as a string or
|
||||||
|
a compat_urllib_request.Requestobject
|
||||||
|
video_id -- Video/playlist/item identifier (string)
|
||||||
|
|
||||||
|
Keyword arguments:
|
||||||
|
note -- note printed before downloading (string)
|
||||||
|
errnote -- note printed in case of an error (string)
|
||||||
|
fatal -- flag denoting whether error should be considered fatal,
|
||||||
|
i.e. whether it should cause ExtractionError to be raised,
|
||||||
|
otherwise a warning will be reported and extraction continued
|
||||||
|
tries -- number of tries
|
||||||
|
timeout -- sleep interval between tries
|
||||||
|
encoding -- encoding for a page content decoding, guessed automatically
|
||||||
|
when not explicitly specified
|
||||||
|
data -- POST data (bytes)
|
||||||
|
headers -- HTTP headers (dict)
|
||||||
|
query -- URL query (dict)
|
||||||
|
expected_status -- allows to accept failed HTTP requests (non 2xx
|
||||||
|
status code) by explicitly specifying a set of accepted status
|
||||||
|
codes. Can be any of the following entities:
|
||||||
|
- an integer type specifying an exact failed status code to
|
||||||
|
accept
|
||||||
|
- a list or a tuple of integer types specifying a list of
|
||||||
|
failed status codes to accept
|
||||||
|
- a callable accepting an actual failed status code and
|
||||||
|
returning True if it should be accepted
|
||||||
|
Note that this argument does not affect success status codes (2xx)
|
||||||
|
which are always accepted.
|
||||||
|
"""
|
||||||
|
|
||||||
success = False
|
success = False
|
||||||
try_count = 0
|
try_count = 0
|
||||||
while success is False:
|
while success is False:
|
||||||
try:
|
try:
|
||||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
|
res = self._download_webpage_handle(
|
||||||
|
url_or_request, video_id, note, errnote, fatal,
|
||||||
|
encoding=encoding, data=data, headers=headers, query=query,
|
||||||
|
expected_status=expected_status)
|
||||||
success = True
|
success = True
|
||||||
except compat_http_client.IncompleteRead as e:
|
except compat_http_client.IncompleteRead as e:
|
||||||
try_count += 1
|
try_count += 1
|
||||||
@ -647,11 +773,17 @@ class InfoExtractor(object):
|
|||||||
def _download_xml_handle(
|
def _download_xml_handle(
|
||||||
self, url_or_request, video_id, note='Downloading XML',
|
self, url_or_request, video_id, note='Downloading XML',
|
||||||
errnote='Unable to download XML', transform_source=None,
|
errnote='Unable to download XML', transform_source=None,
|
||||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
expected_status=None):
|
||||||
|
"""
|
||||||
|
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
|
||||||
|
|
||||||
|
See _download_webpage docstring for arguments specification.
|
||||||
|
"""
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||||
encoding=encoding, data=data, headers=headers, query=query)
|
encoding=encoding, data=data, headers=headers, query=query,
|
||||||
|
expected_status=expected_status)
|
||||||
if res is False:
|
if res is False:
|
||||||
return res
|
return res
|
||||||
xml_string, urlh = res
|
xml_string, urlh = res
|
||||||
@ -659,15 +791,21 @@ class InfoExtractor(object):
|
|||||||
xml_string, video_id, transform_source=transform_source,
|
xml_string, video_id, transform_source=transform_source,
|
||||||
fatal=fatal), urlh
|
fatal=fatal), urlh
|
||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(
|
||||||
note='Downloading XML', errnote='Unable to download XML',
|
self, url_or_request, video_id,
|
||||||
transform_source=None, fatal=True, encoding=None,
|
note='Downloading XML', errnote='Unable to download XML',
|
||||||
data=None, headers={}, query={}):
|
transform_source=None, fatal=True, encoding=None,
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
data=None, headers={}, query={}, expected_status=None):
|
||||||
|
"""
|
||||||
|
Return the xml as an xml.etree.ElementTree.Element.
|
||||||
|
|
||||||
|
See _download_webpage docstring for arguments specification.
|
||||||
|
"""
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
url_or_request, video_id, note=note, errnote=errnote,
|
url_or_request, video_id, note=note, errnote=errnote,
|
||||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||||
data=data, headers=headers, query=query)
|
data=data, headers=headers, query=query,
|
||||||
|
expected_status=expected_status)
|
||||||
return res if res is False else res[0]
|
return res if res is False else res[0]
|
||||||
|
|
||||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||||
@ -685,11 +823,17 @@ class InfoExtractor(object):
|
|||||||
def _download_json_handle(
|
def _download_json_handle(
|
||||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||||
errnote='Unable to download JSON metadata', transform_source=None,
|
errnote='Unable to download JSON metadata', transform_source=None,
|
||||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||||
"""Return a tuple (JSON object, URL handle)"""
|
expected_status=None):
|
||||||
|
"""
|
||||||
|
Return a tuple (JSON object, URL handle).
|
||||||
|
|
||||||
|
See _download_webpage docstring for arguments specification.
|
||||||
|
"""
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||||
encoding=encoding, data=data, headers=headers, query=query)
|
encoding=encoding, data=data, headers=headers, query=query,
|
||||||
|
expected_status=expected_status)
|
||||||
if res is False:
|
if res is False:
|
||||||
return res
|
return res
|
||||||
json_string, urlh = res
|
json_string, urlh = res
|
||||||
@ -700,11 +844,18 @@ class InfoExtractor(object):
|
|||||||
def _download_json(
|
def _download_json(
|
||||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||||
errnote='Unable to download JSON metadata', transform_source=None,
|
errnote='Unable to download JSON metadata', transform_source=None,
|
||||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||||
|
expected_status=None):
|
||||||
|
"""
|
||||||
|
Return the JSON object as a dict.
|
||||||
|
|
||||||
|
See _download_webpage docstring for arguments specification.
|
||||||
|
"""
|
||||||
res = self._download_json_handle(
|
res = self._download_json_handle(
|
||||||
url_or_request, video_id, note=note, errnote=errnote,
|
url_or_request, video_id, note=note, errnote=errnote,
|
||||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||||
data=data, headers=headers, query=query)
|
data=data, headers=headers, query=query,
|
||||||
|
expected_status=expected_status)
|
||||||
return res if res is False else res[0]
|
return res if res is False else res[0]
|
||||||
|
|
||||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||||
@ -1955,7 +2106,21 @@ class InfoExtractor(object):
|
|||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
|
|
||||||
def prepare_template(template_name, identifiers):
|
def prepare_template(template_name, identifiers):
|
||||||
t = representation_ms_info[template_name]
|
tmpl = representation_ms_info[template_name]
|
||||||
|
# First of, % characters outside $...$ templates
|
||||||
|
# must be escaped by doubling for proper processing
|
||||||
|
# by % operator string formatting used further (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/16867).
|
||||||
|
t = ''
|
||||||
|
in_template = False
|
||||||
|
for c in tmpl:
|
||||||
|
t += c
|
||||||
|
if c == '$':
|
||||||
|
in_template = not in_template
|
||||||
|
elif c == '%' and not in_template:
|
||||||
|
t += c
|
||||||
|
# Next, $...$ templates are translated to their
|
||||||
|
# %(...) counterparts to be used with % operator
|
||||||
t = t.replace('$RepresentationID$', representation_id)
|
t = t.replace('$RepresentationID$', representation_id)
|
||||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||||
@ -2286,6 +2451,8 @@ class InfoExtractor(object):
|
|||||||
media_info['subtitles'].setdefault(lang, []).append({
|
media_info['subtitles'].setdefault(lang, []).append({
|
||||||
'url': absolute_url(src),
|
'url': absolute_url(src),
|
||||||
})
|
})
|
||||||
|
for f in media_info['formats']:
|
||||||
|
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||||
if media_info['formats'] or media_info['subtitles']:
|
if media_info['formats'] or media_info['subtitles']:
|
||||||
entries.append(media_info)
|
entries.append(media_info)
|
||||||
return entries
|
return entries
|
||||||
|
@ -19,8 +19,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CrackleIE(InfoExtractor):
|
class CrackleIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
# geo restricted to CA
|
# geo restricted to CA
|
||||||
'url': 'https://www.crackle.com/andromeda/2502343',
|
'url': 'https://www.crackle.com/andromeda/2502343',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -45,7 +45,10 @@ class CrackleIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.sonycrackle.com/andromeda/2502343',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -49,7 +49,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -11,10 +11,10 @@ class CTVNewsIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||||
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
'md5': '9b8624ba66351a23e0b6e1391971f9af',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '901995',
|
'id': '901995',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Extended: \'That person cannot be me\' Johnson says',
|
'title': 'Extended: \'That person cannot be me\' Johnson says',
|
||||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||||
'timestamp': 1467286284,
|
'timestamp': 1467286284,
|
||||||
|
@ -35,7 +35,7 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
|||||||
return result['data']
|
return result['data']
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
(email, password) = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
if email is None:
|
if email is None:
|
||||||
return
|
return
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
|
@ -1,12 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import base64
|
||||||
import json
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_struct_pack
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
@ -64,7 +68,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'uploader': 'Deadline',
|
'uploader': 'Deadline',
|
||||||
'uploader_id': 'x1xm8ri',
|
'uploader_id': 'x1xm8ri',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
@ -167,6 +170,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
player = self._parse_json(player_v5, video_id)
|
player = self._parse_json(player_v5, video_id)
|
||||||
metadata = player['metadata']
|
metadata = player['metadata']
|
||||||
|
|
||||||
|
if metadata.get('error', {}).get('type') == 'password_protected':
|
||||||
|
password = self._downloader.params.get('videopassword')
|
||||||
|
if password:
|
||||||
|
r = int(metadata['id'][1:], 36)
|
||||||
|
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
|
||||||
|
t = ''.join(random.choice(string.ascii_letters) for i in range(10))
|
||||||
|
n = us64e(compat_struct_pack('I', r))
|
||||||
|
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
|
||||||
|
metadata = self._download_json(
|
||||||
|
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
|
||||||
|
|
||||||
self._check_error(metadata)
|
self._check_error(metadata)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@ -180,9 +194,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
continue
|
continue
|
||||||
ext = mimetype2ext(type_) or determine_ext(media_url)
|
ext = mimetype2ext(type_) or determine_ext(media_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
media_url, video_id, 'mp4', preference=-1,
|
media_url, video_id, 'mp4', preference=-1,
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False)
|
||||||
|
for f in m3u8_formats:
|
||||||
|
f['url'] = f['url'].split('#')[0]
|
||||||
|
formats.append(f)
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||||
@ -299,8 +316,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
def _check_error(self, info):
|
def _check_error(self, info):
|
||||||
error = info.get('error')
|
error = info.get('error')
|
||||||
if info.get('error') is not None:
|
if error:
|
||||||
title = error['title']
|
title = error.get('title') or error['message']
|
||||||
# See https://developer.dailymotion.com/api#access-error
|
# See https://developer.dailymotion.com/api#access-error
|
||||||
if error.get('code') == 'DM007':
|
if error.get('code') == 'DM007':
|
||||||
self.raise_geo_restricted(msg=title)
|
self.raise_geo_restricted(msg=title)
|
||||||
|
@ -5,13 +5,15 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
unified_strdate,
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DctpTvIE(InfoExtractor):
|
class DctpTvIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# 4x3
|
||||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||||
@ -19,31 +21,49 @@ class DctpTvIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||||
'description': 'Kurzfilm',
|
'description': 'Kurzfilm',
|
||||||
'upload_date': '20110407',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 71.24,
|
'duration': 71.24,
|
||||||
|
'timestamp': 1302172322,
|
||||||
|
'upload_date': '20110407',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# 16x9
|
||||||
|
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
version = self._download_json(
|
||||||
|
'%s/version.json' % self._BASE_URL, display_id,
|
||||||
|
'Downloading version JSON')
|
||||||
|
|
||||||
video_id = self._html_search_meta(
|
restapi_base = '%s/%s/restapi' % (
|
||||||
'DC.identifier', webpage, 'video id',
|
self._BASE_URL, version['version_name'])
|
||||||
default=None) or self._search_regex(
|
|
||||||
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
info = self._download_json(
|
||||||
|
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
|
||||||
|
'Downloading video info JSON')
|
||||||
|
|
||||||
|
media = self._download_json(
|
||||||
|
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
|
||||||
|
display_id, 'Downloading media JSON')
|
||||||
|
|
||||||
|
uuid = media['uuid']
|
||||||
|
title = media['title']
|
||||||
|
ratio = '16x9' if media.get('is_wide') else '4x3'
|
||||||
|
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
||||||
|
|
||||||
servers = self._download_json(
|
servers = self._download_json(
|
||||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
'http://www.dctp.tv/streaming_servers/', display_id,
|
||||||
note='Downloading server list', fatal=False)
|
note='Downloading server list JSON', fatal=False)
|
||||||
|
|
||||||
if servers:
|
if servers:
|
||||||
endpoint = next(
|
endpoint = next(
|
||||||
@ -60,27 +80,35 @@ class DctpTvIE(InfoExtractor):
|
|||||||
formats = [{
|
formats = [{
|
||||||
'url': endpoint,
|
'url': endpoint,
|
||||||
'app': app,
|
'app': app,
|
||||||
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
|
'play_path': play_path,
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
|
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
description = self._html_search_meta('DC.description', webpage)
|
thumbnails = []
|
||||||
upload_date = unified_strdate(
|
images = media.get('images')
|
||||||
self._html_search_meta('DC.date.created', webpage))
|
if isinstance(images, list):
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
for image in images:
|
||||||
duration = float_or_none(self._search_regex(
|
if not isinstance(image, dict):
|
||||||
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
|
continue
|
||||||
default=None), scale=1000)
|
image_url = image.get('url')
|
||||||
|
if not image_url or not isinstance(image_url, compat_str):
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': uuid,
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'description': description,
|
'title': title,
|
||||||
'upload_date': upload_date,
|
'alt_title': media.get('subtitle'),
|
||||||
'thumbnail': thumbnail,
|
'description': media.get('description') or media.get('teaser'),
|
||||||
'duration': duration,
|
'timestamp': unified_timestamp(media.get('created')),
|
||||||
|
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,10 @@ import re
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
from .discoverygo import DiscoveryGoBaseIE
|
from .discoverygo import DiscoveryGoBaseIE
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
try_get,
|
try_get,
|
||||||
@ -55,15 +58,27 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
||||||
video_id = video['id']
|
video_id = video['id']
|
||||||
|
|
||||||
access_token = self._download_json(
|
access_token = None
|
||||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
cookies = self._get_cookies(url)
|
||||||
'authRel': 'authorization',
|
|
||||||
'client_id': try_get(
|
# prefer Affiliate Auth Token over Anonymous Auth Token
|
||||||
react_data, lambda x: x['application']['apiClientId'],
|
auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
|
||||||
compat_str) or '3020a40c2356a645b4b4',
|
if auth_storage_cookie and auth_storage_cookie.value:
|
||||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
||||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
||||||
})['access_token']
|
video_id, fatal=False) or {}
|
||||||
|
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||||
|
|
||||||
|
if not access_token:
|
||||||
|
access_token = self._download_json(
|
||||||
|
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||||
|
'authRel': 'authorization',
|
||||||
|
'client_id': try_get(
|
||||||
|
react_data, lambda x: x['application']['apiClientId'],
|
||||||
|
compat_str) or '3020a40c2356a645b4b4',
|
||||||
|
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||||
|
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||||
|
})['access_token']
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stream = self._download_json(
|
stream = self._download_json(
|
||||||
@ -72,7 +87,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
'Authorization': 'Bearer ' + access_token,
|
'Authorization': 'Bearer ' + access_token,
|
||||||
})
|
})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||||
e_description = self._parse_json(
|
e_description = self._parse_json(
|
||||||
e.cause.read().decode(), display_id)['description']
|
e.cause.read().decode(), display_id)['description']
|
||||||
if 'resource not available for country' in e_description:
|
if 'resource not available for country' in e_description:
|
||||||
|
@ -3,8 +3,8 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .brightcove import BrightcoveLegacyIE
|
from .brightcove import BrightcoveLegacyIE
|
||||||
|
from .dplay import DPlayIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@ -12,8 +12,13 @@ from ..compat import (
|
|||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(InfoExtractor):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
|
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
|
||||||
|
(?:
|
||||||
|
.*\#(?P<id>\d+)|
|
||||||
|
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
|
||||||
|
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
|
||||||
|
)'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||||
@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
alternate_id = mobj.group('alternate_id')
|
||||||
|
if alternate_id:
|
||||||
|
self._initialize_geo_bypass({
|
||||||
|
'countries': ['DE'],
|
||||||
|
})
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, '%s/%s' % (mobj.group('programme'), alternate_id),
|
||||||
|
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
|
||||||
brightcove_id = mobj.group('id')
|
brightcove_id = mobj.group('id')
|
||||||
if not brightcove_id:
|
if not brightcove_id:
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
|
@ -97,12 +97,83 @@ class DPlayIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _get_disco_api_info(self, url, display_id, disco_host, realm):
|
||||||
|
disco_base = 'https://' + disco_host
|
||||||
|
token = self._download_json(
|
||||||
|
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||||
|
query={
|
||||||
|
'realm': realm,
|
||||||
|
})['data']['attributes']['token']
|
||||||
|
headers = {
|
||||||
|
'Referer': url,
|
||||||
|
'Authorization': 'Bearer ' + token,
|
||||||
|
}
|
||||||
|
video = self._download_json(
|
||||||
|
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||||
|
headers=headers, query={
|
||||||
|
'include': 'show'
|
||||||
|
})
|
||||||
|
video_id = video['data']['id']
|
||||||
|
info = video['data']['attributes']
|
||||||
|
title = info['name']
|
||||||
|
formats = []
|
||||||
|
for format_id, format_dict in self._download_json(
|
||||||
|
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||||
|
display_id, headers=headers)['data']['attributes']['streaming'].items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
format_url = format_dict.get('url')
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if format_id == 'dash' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, display_id, mpd_id='dash', fatal=False))
|
||||||
|
elif format_id == 'hls' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, display_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
series = None
|
||||||
|
try:
|
||||||
|
included = video.get('included')
|
||||||
|
if isinstance(included, list):
|
||||||
|
show = next(e for e in included if e.get('type') == 'show')
|
||||||
|
series = try_get(
|
||||||
|
show, lambda x: x['attributes']['name'], compat_str)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': info.get('description'),
|
||||||
|
'duration': float_or_none(
|
||||||
|
info.get('videoDuration'), scale=1000),
|
||||||
|
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||||
|
'series': series,
|
||||||
|
'season_number': int_or_none(info.get('seasonNumber')),
|
||||||
|
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||||
|
'age_limit': int_or_none(info.get('minimum_age')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('id')
|
display_id = mobj.group('id')
|
||||||
domain = mobj.group('domain')
|
domain = mobj.group('domain')
|
||||||
|
|
||||||
self._initialize_geo_bypass([mobj.group('country').upper()])
|
self._initialize_geo_bypass({
|
||||||
|
'countries': [mobj.group('country').upper()],
|
||||||
|
})
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
@ -111,72 +182,8 @@ class DPlayIE(InfoExtractor):
|
|||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
host = mobj.group('host')
|
host = mobj.group('host')
|
||||||
disco_base = 'https://disco-api.%s' % host
|
return self._get_disco_api_info(
|
||||||
self._download_json(
|
url, display_id, 'disco-api.' + host, host.replace('.', ''))
|
||||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
|
||||||
query={
|
|
||||||
'realm': host.replace('.', ''),
|
|
||||||
})
|
|
||||||
video = self._download_json(
|
|
||||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
|
||||||
headers={
|
|
||||||
'Referer': url,
|
|
||||||
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
|
|
||||||
}, query={
|
|
||||||
'include': 'show'
|
|
||||||
})
|
|
||||||
video_id = video['data']['id']
|
|
||||||
info = video['data']['attributes']
|
|
||||||
title = info['name']
|
|
||||||
formats = []
|
|
||||||
for format_id, format_dict in self._download_json(
|
|
||||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
|
||||||
display_id)['data']['attributes']['streaming'].items():
|
|
||||||
if not isinstance(format_dict, dict):
|
|
||||||
continue
|
|
||||||
format_url = format_dict.get('url')
|
|
||||||
if not format_url:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if format_id == 'dash' or ext == 'mpd':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
format_url, display_id, mpd_id='dash', fatal=False))
|
|
||||||
elif format_id == 'hls' or ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, display_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
|
||||||
fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': format_id,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
series = None
|
|
||||||
try:
|
|
||||||
included = video.get('included')
|
|
||||||
if isinstance(included, list):
|
|
||||||
show = next(e for e in included if e.get('type') == 'show')
|
|
||||||
series = try_get(
|
|
||||||
show, lambda x: x['attributes']['name'], compat_str)
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': info.get('description'),
|
|
||||||
'duration': float_or_none(
|
|
||||||
info.get('videoDuration'), scale=1000),
|
|
||||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
|
||||||
'series': series,
|
|
||||||
'season_number': int_or_none(info.get('seasonNumber')),
|
|
||||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
|
||||||
'age_limit': int_or_none(info.get('minimum_age')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||||
|
@ -42,7 +42,7 @@ class DramaFeverBaseIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -8,7 +8,6 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
qualities,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -16,7 +15,8 @@ from ..utils import (
|
|||||||
|
|
||||||
class DreiSatIE(InfoExtractor):
|
class DreiSatIE(InfoExtractor):
|
||||||
IE_NAME = '3sat'
|
IE_NAME = '3sat'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||||
@ -43,7 +43,8 @@ class DreiSatIE(InfoExtractor):
|
|||||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||||
param_groups = {}
|
param_groups = {}
|
||||||
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
||||||
group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
|
group_id = param_group.get(self._xpath_ns(
|
||||||
|
'id', 'http://www.w3.org/XML/1998/namespace'))
|
||||||
params = {}
|
params = {}
|
||||||
for param in param_group:
|
for param in param_group:
|
||||||
params[param.get('name')] = param.get('value')
|
params[param.get('name')] = param.get('value')
|
||||||
@ -54,7 +55,7 @@ class DreiSatIE(InfoExtractor):
|
|||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
group_id = video.get('paramGroup')
|
group_id = video.get('paramGroup')
|
||||||
param_group = param_groups[group_id]
|
param_group = param_groups[group_id]
|
||||||
for proto in param_group['protocols'].split(','):
|
for proto in param_group['protocols'].split(','):
|
||||||
@ -75,66 +76,36 @@ class DreiSatIE(InfoExtractor):
|
|||||||
note='Downloading video info',
|
note='Downloading video info',
|
||||||
errnote='Failed to download video info')
|
errnote='Failed to download video info')
|
||||||
|
|
||||||
status_code = doc.find('./status/statuscode')
|
status_code = xpath_text(doc, './status/statuscode')
|
||||||
if status_code is not None and status_code.text != 'ok':
|
if status_code and status_code != 'ok':
|
||||||
code = status_code.text
|
if status_code == 'notVisibleAnymore':
|
||||||
if code == 'notVisibleAnymore':
|
|
||||||
message = 'Video %s is not available' % video_id
|
message = 'Video %s is not available' % video_id
|
||||||
else:
|
else:
|
||||||
message = '%s returned error: %s' % (self.IE_NAME, code)
|
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
||||||
raise ExtractorError(message, expected=True)
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
title = doc.find('.//information/title').text
|
title = xpath_text(doc, './/information/title', 'title', True)
|
||||||
description = xpath_text(doc, './/information/detail', 'description')
|
|
||||||
duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
|
|
||||||
uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
|
|
||||||
uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
|
|
||||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
|
|
||||||
|
|
||||||
def xml_to_thumbnails(fnode):
|
urls = []
|
||||||
thumbnails = []
|
|
||||||
for node in fnode:
|
|
||||||
thumbnail_url = node.text
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnail = {
|
|
||||||
'url': thumbnail_url,
|
|
||||||
}
|
|
||||||
if 'key' in node.attrib:
|
|
||||||
m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
|
|
||||||
if m:
|
|
||||||
thumbnail['width'] = int(m.group(1))
|
|
||||||
thumbnail['height'] = int(m.group(2))
|
|
||||||
thumbnails.append(thumbnail)
|
|
||||||
return thumbnails
|
|
||||||
|
|
||||||
thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
|
|
||||||
|
|
||||||
format_nodes = doc.findall('.//formitaeten/formitaet')
|
|
||||||
quality = qualities(['veryhigh', 'high', 'med', 'low'])
|
|
||||||
|
|
||||||
def get_quality(elem):
|
|
||||||
return quality(xpath_text(elem, 'quality'))
|
|
||||||
format_nodes.sort(key=get_quality)
|
|
||||||
format_ids = []
|
|
||||||
formats = []
|
formats = []
|
||||||
for fnode in format_nodes:
|
for fnode in doc.findall('.//formitaeten/formitaet'):
|
||||||
video_url = fnode.find('url').text
|
video_url = xpath_text(fnode, 'url')
|
||||||
is_available = 'http://www.metafilegenerator' not in video_url
|
if not video_url or video_url in urls:
|
||||||
if not is_available:
|
|
||||||
continue
|
continue
|
||||||
|
urls.append(video_url)
|
||||||
|
|
||||||
|
is_available = 'http://www.metafilegenerator' not in video_url
|
||||||
|
geoloced = 'static_geoloced_online' in video_url
|
||||||
|
if not is_available or geoloced:
|
||||||
|
continue
|
||||||
|
|
||||||
format_id = fnode.attrib['basetype']
|
format_id = fnode.attrib['basetype']
|
||||||
quality = xpath_text(fnode, './quality', 'quality')
|
|
||||||
format_m = re.match(r'''(?x)
|
format_m = re.match(r'''(?x)
|
||||||
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||||
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||||
''', format_id)
|
''', format_id)
|
||||||
|
|
||||||
ext = determine_ext(video_url, None) or format_m.group('container')
|
ext = determine_ext(video_url, None) or format_m.group('container')
|
||||||
if ext not in ('smil', 'f4m', 'm3u8'):
|
|
||||||
format_id = format_id + '-' + quality
|
|
||||||
if format_id in format_ids:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if ext == 'meta':
|
if ext == 'meta':
|
||||||
continue
|
continue
|
||||||
@ -147,24 +118,23 @@ class DreiSatIE(InfoExtractor):
|
|||||||
if video_url.startswith('https://'):
|
if video_url.startswith('https://'):
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
video_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
proto = format_m.group('proto').lower()
|
quality = xpath_text(fnode, './quality')
|
||||||
|
if quality:
|
||||||
|
format_id += '-' + quality
|
||||||
|
|
||||||
abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
|
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
||||||
vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
|
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
||||||
|
|
||||||
width = int_or_none(xpath_text(fnode, './width', 'width'))
|
tbr = int_or_none(self._search_regex(
|
||||||
height = int_or_none(xpath_text(fnode, './height', 'height'))
|
r'_(\d+)k', video_url, 'bitrate', None))
|
||||||
|
if tbr and vbr and not abr:
|
||||||
filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
|
abr = tbr - vbr
|
||||||
|
|
||||||
format_note = ''
|
|
||||||
if not format_note:
|
|
||||||
format_note = None
|
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
@ -174,31 +144,50 @@ class DreiSatIE(InfoExtractor):
|
|||||||
'vcodec': format_m.group('vcodec'),
|
'vcodec': format_m.group('vcodec'),
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'width': width,
|
'tbr': tbr,
|
||||||
'height': height,
|
'width': int_or_none(xpath_text(fnode, './width')),
|
||||||
'filesize': filesize,
|
'height': int_or_none(xpath_text(fnode, './height')),
|
||||||
'format_note': format_note,
|
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
||||||
'protocol': proto,
|
'protocol': format_m.group('proto').lower(),
|
||||||
'_available': is_available,
|
|
||||||
})
|
})
|
||||||
format_ids.append(format_id)
|
|
||||||
|
geolocation = xpath_text(doc, './/details/geolocation')
|
||||||
|
if not formats and geolocation and geolocation != 'none':
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for node in doc.findall('.//teaserimages/teaserimage'):
|
||||||
|
thumbnail_url = node.text
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnail = {
|
||||||
|
'url': thumbnail_url,
|
||||||
|
}
|
||||||
|
thumbnail_key = node.get('key')
|
||||||
|
if thumbnail_key:
|
||||||
|
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||||
|
if m:
|
||||||
|
thumbnail['width'] = int(m.group(1))
|
||||||
|
thumbnail['height'] = int(m.group(2))
|
||||||
|
thumbnails.append(thumbnail)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': xpath_text(doc, './/information/detail'),
|
||||||
'duration': duration,
|
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'uploader': uploader,
|
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
|
||||||
return self.extract_from_xml_url(video_id, details_url)
|
return self.extract_from_xml_url(video_id, details_url)
|
||||||
|
83
youtube_dl/extractor/dtube.py
Normal file
83
youtube_dl/extractor/dtube.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from socket import timeout
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://d.tube/#!/v/benswann/zqd630em',
|
||||||
|
'md5': 'a03eaa186618ffa7a3145945543a251e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'zqd630em',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
|
||||||
|
'description': 'md5:700d164e066b87f9eac057949e4227c2',
|
||||||
|
'uploader_id': 'benswann',
|
||||||
|
'upload_date': '20180222',
|
||||||
|
'timestamp': 1519328958,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': '480p',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uploader_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
|
||||||
|
'jsonrpc': '2.0',
|
||||||
|
'method': 'get_content',
|
||||||
|
'params': [uploader_id, video_id],
|
||||||
|
}).encode())['result']
|
||||||
|
|
||||||
|
metadata = json.loads(result['json_metadata'])
|
||||||
|
video = metadata['video']
|
||||||
|
content = video['content']
|
||||||
|
info = video.get('info', {})
|
||||||
|
title = info.get('title') or result['title']
|
||||||
|
|
||||||
|
def canonical_url(h):
|
||||||
|
if not h:
|
||||||
|
return None
|
||||||
|
return 'https://ipfs.io/ipfs/' + h
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for q in ('240', '480', '720', '1080', ''):
|
||||||
|
video_url = canonical_url(content.get('video%shash' % q))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = (q + 'p') if q else 'Source'
|
||||||
|
try:
|
||||||
|
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
||||||
|
self._downloader._opener.open(video_url, timeout=5).close()
|
||||||
|
except timeout as e:
|
||||||
|
self.to_screen(
|
||||||
|
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_url,
|
||||||
|
'height': int_or_none(q),
|
||||||
|
'ext': 'mp4',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': content.get('description'),
|
||||||
|
'thumbnail': canonical_url(info.get('snaphash')),
|
||||||
|
'tags': content.get('tags') or metadata.get('tags'),
|
||||||
|
'duration': info.get('duration'),
|
||||||
|
'formats': formats,
|
||||||
|
'timestamp': parse_iso8601(result.get('created')),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
}
|
@ -91,17 +91,6 @@ class DVTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
|
|
||||||
'md5': '87defe16681b1429c91f7a74809823c6',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_video_metadata(self, js, video_id, live_js=None):
|
def _parse_video_metadata(self, js, video_id, live_js=None):
|
||||||
|
77
youtube_dl/extractor/expressen.py
Normal file
77
youtube_dl/extractor/expressen.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ExpressenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||||
|
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8690962',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
|
||||||
|
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 788,
|
||||||
|
'timestamp': 1526639109,
|
||||||
|
'upload_date': '20180518',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
def extract_data(name):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||||
|
webpage, 'info', group='value'),
|
||||||
|
display_id, transform_source=unescapeHTML)
|
||||||
|
|
||||||
|
info = extract_data('video-tracking-info')
|
||||||
|
video_id = info['videoId']
|
||||||
|
|
||||||
|
data = extract_data('article-data')
|
||||||
|
stream = data['stream']
|
||||||
|
|
||||||
|
if determine_ext(stream) == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
stream, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
|
'url': stream,
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = info.get('titleRaw') or data['title']
|
||||||
|
description = info.get('descriptionRaw')
|
||||||
|
thumbnail = info.get('socialMediaImage') or data.get('image')
|
||||||
|
duration = int_or_none(info.get('videoTotalSecondsDuration') or
|
||||||
|
data.get('totalSecondsDuration'))
|
||||||
|
timestamp = unified_timestamp(info.get('publishDate'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -44,6 +44,7 @@ from .anysex import AnySexIE
|
|||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
from .aliexpress import AliExpressLiveIE
|
from .aliexpress import AliExpressLiveIE
|
||||||
|
from .apa import APAIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appleconnect import AppleConnectIE
|
from .appleconnect import AppleConnectIE
|
||||||
from .appletrailers import (
|
from .appletrailers import (
|
||||||
@ -137,6 +138,7 @@ from .brightcove import (
|
|||||||
BrightcoveLegacyIE,
|
BrightcoveLegacyIE,
|
||||||
BrightcoveNewIE,
|
BrightcoveNewIE,
|
||||||
)
|
)
|
||||||
|
from .businessinsider import BusinessInsiderIE
|
||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
@ -144,6 +146,8 @@ from .camdemy import (
|
|||||||
CamdemyIE,
|
CamdemyIE,
|
||||||
CamdemyFolderIE
|
CamdemyFolderIE
|
||||||
)
|
)
|
||||||
|
from .cammodels import CamModelsIE
|
||||||
|
from .camtube import CamTubeIE
|
||||||
from .camwithher import CamWithHerIE
|
from .camwithher import CamWithHerIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
@ -195,6 +199,7 @@ from .clippit import ClippitIE
|
|||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .closertotruth import CloserToTruthIE
|
from .closertotruth import CloserToTruthIE
|
||||||
|
from .cloudflarestream import CloudflareStreamIE
|
||||||
from .cloudy import CloudyIE
|
from .cloudy import CloudyIE
|
||||||
from .clubic import ClubicIE
|
from .clubic import ClubicIE
|
||||||
from .clyp import ClypIE
|
from .clyp import ClypIE
|
||||||
@ -281,6 +286,7 @@ from .drtv import (
|
|||||||
DRTVIE,
|
DRTVIE,
|
||||||
DRTVLiveIE,
|
DRTVLiveIE,
|
||||||
)
|
)
|
||||||
|
from .dtube import DTubeIE
|
||||||
from .dvtv import DVTVIE
|
from .dvtv import DVTVIE
|
||||||
from .dumpert import DumpertIE
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
@ -329,6 +335,7 @@ from .esri import EsriVideoIE
|
|||||||
from .europa import EuropaIE
|
from .europa import EuropaIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .expotv import ExpoTVIE
|
from .expotv import ExpoTVIE
|
||||||
|
from .expressen import ExpressenIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
from .eyedotv import EyedoTVIE
|
from .eyedotv import EyedoTVIE
|
||||||
from .facebook import (
|
from .facebook import (
|
||||||
@ -366,7 +373,6 @@ from .foxgay import FoxgayIE
|
|||||||
from .foxnews import (
|
from .foxnews import (
|
||||||
FoxNewsIE,
|
FoxNewsIE,
|
||||||
FoxNewsArticleIE,
|
FoxNewsArticleIE,
|
||||||
FoxNewsInsiderIE,
|
|
||||||
)
|
)
|
||||||
from .foxsports import FoxSportsIE
|
from .foxsports import FoxSportsIE
|
||||||
from .franceculture import FranceCultureIE
|
from .franceculture import FranceCultureIE
|
||||||
@ -376,6 +382,7 @@ from .francetv import (
|
|||||||
FranceTVSiteIE,
|
FranceTVSiteIE,
|
||||||
FranceTVEmbedIE,
|
FranceTVEmbedIE,
|
||||||
FranceTVInfoIE,
|
FranceTVInfoIE,
|
||||||
|
FranceTVInfoSportIE,
|
||||||
FranceTVJeunesseIE,
|
FranceTVJeunesseIE,
|
||||||
GenerationWhatIE,
|
GenerationWhatIE,
|
||||||
CultureboxIE,
|
CultureboxIE,
|
||||||
@ -466,10 +473,7 @@ from .imgur import (
|
|||||||
)
|
)
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .inc import IncIE
|
from .inc import IncIE
|
||||||
from .indavideo import (
|
from .indavideo import IndavideoEmbedIE
|
||||||
IndavideoIE,
|
|
||||||
IndavideoEmbedIE,
|
|
||||||
)
|
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
from .internazionale import InternazionaleIE
|
from .internazionale import InternazionaleIE
|
||||||
@ -477,7 +481,10 @@ from .internetvideoarchive import InternetVideoArchiveIE
|
|||||||
from .iprima import IPrimaIE
|
from .iprima import IPrimaIE
|
||||||
from .iqiyi import IqiyiIE
|
from .iqiyi import IqiyiIE
|
||||||
from .ir90tv import Ir90TvIE
|
from .ir90tv import Ir90TvIE
|
||||||
from .itv import ITVIE
|
from .itv import (
|
||||||
|
ITVIE,
|
||||||
|
ITVBTCCIE,
|
||||||
|
)
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
@ -576,13 +583,16 @@ from .mailru import (
|
|||||||
MailRuMusicIE,
|
MailRuMusicIE,
|
||||||
MailRuMusicSearchIE,
|
MailRuMusicSearchIE,
|
||||||
)
|
)
|
||||||
from .makerschannel import MakersChannelIE
|
|
||||||
from .makertv import MakerTVIE
|
from .makertv import MakerTVIE
|
||||||
from .mangomolo import (
|
from .mangomolo import (
|
||||||
MangomoloVideoIE,
|
MangomoloVideoIE,
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
)
|
)
|
||||||
from .manyvids import ManyVidsIE
|
from .manyvids import ManyVidsIE
|
||||||
|
from .markiza import (
|
||||||
|
MarkizaIE,
|
||||||
|
MarkizaPageIE,
|
||||||
|
)
|
||||||
from .massengeschmacktv import MassengeschmackTVIE
|
from .massengeschmacktv import MassengeschmackTVIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
@ -619,7 +629,6 @@ from .mnet import MnetIE
|
|||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
from .mojvideo import MojvideoIE
|
from .mojvideo import MojvideoIE
|
||||||
from .moniker import MonikerIE
|
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import (
|
from .motherless import (
|
||||||
MotherlessIE,
|
MotherlessIE,
|
||||||
@ -640,6 +649,7 @@ from .mtv import (
|
|||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .musicplayon import MusicPlayOnIE
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||||
|
from .mychannels import MyChannelsIE
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvi import (
|
from .myvi import (
|
||||||
@ -661,6 +671,7 @@ from .nbc import (
|
|||||||
NBCOlympicsIE,
|
NBCOlympicsIE,
|
||||||
NBCOlympicsStreamIE,
|
NBCOlympicsStreamIE,
|
||||||
NBCSportsIE,
|
NBCSportsIE,
|
||||||
|
NBCSportsStreamIE,
|
||||||
NBCSportsVPlayerIE,
|
NBCSportsVPlayerIE,
|
||||||
)
|
)
|
||||||
from .ndr import (
|
from .ndr import (
|
||||||
@ -700,12 +711,7 @@ from .nexx import (
|
|||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
from .nhk import NhkVodIE
|
from .nhk import NhkVodIE
|
||||||
from .nhl import (
|
from .nhl import NHLIE
|
||||||
NHLVideocenterIE,
|
|
||||||
NHLNewsIE,
|
|
||||||
NHLVideocenterCategoryIE,
|
|
||||||
NHLIE,
|
|
||||||
)
|
|
||||||
from .nick import (
|
from .nick import (
|
||||||
NickIE,
|
NickIE,
|
||||||
NickBrIE,
|
NickBrIE,
|
||||||
@ -714,10 +720,7 @@ from .nick import (
|
|||||||
NickRuIE,
|
NickRuIE,
|
||||||
)
|
)
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||||
from .ninecninemedia import (
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
NineCNineMediaStackIE,
|
|
||||||
NineCNineMediaIE,
|
|
||||||
)
|
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
from .nintendo import NintendoIE
|
from .nintendo import NintendoIE
|
||||||
@ -805,6 +808,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
|||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .pearvideo import PearVideoIE
|
from .pearvideo import PearVideoIE
|
||||||
|
from .peertube import PeerTubeIE
|
||||||
from .people import PeopleIE
|
from .people import PeopleIE
|
||||||
from .performgroup import PerformGroupIE
|
from .performgroup import PerformGroupIE
|
||||||
from .periscope import (
|
from .periscope import (
|
||||||
@ -1010,7 +1014,10 @@ from .spankbang import SpankBangIE
|
|||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
from .spike import SpikeIE
|
from .spike import (
|
||||||
|
BellatorIE,
|
||||||
|
ParamountNetworkIE,
|
||||||
|
)
|
||||||
from .stitcher import StitcherIE
|
from .stitcher import StitcherIE
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxEmbedIE
|
from .sportbox import SportBoxEmbedIE
|
||||||
@ -1039,6 +1046,7 @@ from .stretchinternet import StretchInternetIE
|
|||||||
from .sunporno import SunPornoIE
|
from .sunporno import SunPornoIE
|
||||||
from .svt import (
|
from .svt import (
|
||||||
SVTIE,
|
SVTIE,
|
||||||
|
SVTPageIE,
|
||||||
SVTPlayIE,
|
SVTPlayIE,
|
||||||
SVTSeriesIE,
|
SVTSeriesIE,
|
||||||
)
|
)
|
||||||
@ -1142,6 +1150,7 @@ from .tvc import (
|
|||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvn24 import TVN24IE
|
from .tvn24 import TVN24IE
|
||||||
|
from .tvnet import TVNetIE
|
||||||
from .tvnoe import TVNoeIE
|
from .tvnoe import TVNoeIE
|
||||||
from .tvnow import (
|
from .tvnow import (
|
||||||
TVNowIE,
|
TVNowIE,
|
||||||
|
@ -56,6 +56,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||||
|
|
||||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||||
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
@ -208,6 +209,17 @@ class FacebookIE(InfoExtractor):
|
|||||||
# no title
|
# no title
|
||||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '359649331226507',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
|
||||||
|
'uploader': 'ESL One Dota 2',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -226,7 +238,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
return urls
|
return urls
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(useremail, password) = self._get_login_info()
|
useremail, password = self._get_login_info()
|
||||||
if useremail is None:
|
if useremail is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -312,16 +324,18 @@ class FacebookIE(InfoExtractor):
|
|||||||
if server_js_data:
|
if server_js_data:
|
||||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||||
|
|
||||||
|
def extract_from_jsmods_instances(js_data):
|
||||||
|
if js_data:
|
||||||
|
return extract_video_data(try_get(
|
||||||
|
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
||||||
webpage, 'js data', default='{}'),
|
webpage, 'js data', default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
if server_js_data:
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
video_data = extract_video_data(try_get(
|
|
||||||
server_js_data, lambda x: x['jsmods']['instances'],
|
|
||||||
list) or [])
|
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
if not fatal_if_no_video:
|
||||||
@ -333,8 +347,33 @@ class FacebookIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
elif '>You must log in to continue' in webpage:
|
elif '>You must log in to continue' in webpage:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
else:
|
|
||||||
raise ExtractorError('Cannot parse data')
|
# Video info not in first request, do a secondary request using
|
||||||
|
# tahoe player specific URL
|
||||||
|
tahoe_data = self._download_webpage(
|
||||||
|
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'__user': 0,
|
||||||
|
'__a': 1,
|
||||||
|
'__pc': self._search_regex(
|
||||||
|
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||||
|
'pkg cohort', default='PHASED:DEFAULT'),
|
||||||
|
'__rev': self._search_regex(
|
||||||
|
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||||
|
'client revision', default='3944515'),
|
||||||
|
}),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
})
|
||||||
|
tahoe_js_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
|
||||||
|
'tahoe js data', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
video_data = extract_from_jsmods_instances(tahoe_js_data)
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
|
raise ExtractorError('Cannot parse data')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_data:
|
for f in video_data:
|
||||||
@ -380,7 +419,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_title = 'Facebook video #%s' % video_id
|
video_title = 'Facebook video #%s' % video_id
|
||||||
uploader = clean_html(get_element_by_id(
|
uploader = clean_html(get_element_by_id(
|
||||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
||||||
|
fatal=False) or self._og_search_title(webpage, fatal=False)
|
||||||
timestamp = int_or_none(self._search_regex(
|
timestamp = int_or_none(self._search_regex(
|
||||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
|
@ -46,7 +46,7 @@ class FC2IE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None or password is None:
|
if username is None or password is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
@ -68,21 +76,41 @@ class FoxNewsIE(AMPIE):
|
|||||||
|
|
||||||
|
|
||||||
class FoxNewsArticleIE(InfoExtractor):
|
class FoxNewsArticleIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||||
IE_NAME = 'foxnews:article'
|
IE_NAME = 'foxnews:article'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# data-video-id
|
||||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5116295019001',
|
'id': '5116295019001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||||
'description': 'Veterans react on \'The Kelly File\'',
|
'description': 'Veterans react on \'The Kelly File\'',
|
||||||
'timestamp': 1473299755,
|
'timestamp': 1473301045,
|
||||||
'upload_date': '20160908',
|
'upload_date': '20160908',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# iframe embed
|
||||||
|
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5748266721001',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Kyle Kashuv has a positive message for the Trump White House',
|
||||||
|
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 229,
|
||||||
|
'timestamp': 1520594670,
|
||||||
|
'upload_date': '20180309',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
@ -90,51 +118,10 @@ class FoxNewsArticleIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||||
webpage, 'video ID', group='id')
|
webpage, 'video ID', group='id', default=None)
|
||||||
|
if video_id:
|
||||||
|
return self.url_result(
|
||||||
|
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'http://video.foxnews.com/v/' + video_id,
|
FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
|
||||||
FoxNewsIE.ie_key())
|
|
||||||
|
|
||||||
|
|
||||||
class FoxNewsInsiderIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
|
||||||
IE_NAME = 'foxnews:insider'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
|
||||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5099377331001',
|
|
||||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
|
||||||
'description': 'Is campus censorship getting out of control?',
|
|
||||||
'timestamp': 1472168725,
|
|
||||||
'upload_date': '20160825',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'add_ie': [FoxNewsIE.ie_key()],
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': FoxNewsIE.ie_key(),
|
|
||||||
'url': embed_url,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
}
|
|
||||||
|
@ -379,6 +379,31 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
return self._make_url_result(video_id, catalogue)
|
return self._make_url_result(video_id, catalogue)
|
||||||
|
|
||||||
|
|
||||||
|
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
|
||||||
|
IE_NAME = 'sport.francetvinfo.fr'
|
||||||
|
_VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
|
||||||
|
'timestamp': 1523639962,
|
||||||
|
'upload_date': '20180413',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [FranceTVIE.ie_key()],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
|
||||||
|
return self._make_url_result(video_id, 'Sport-web')
|
||||||
|
|
||||||
|
|
||||||
class GenerationWhatIE(InfoExtractor):
|
class GenerationWhatIE(InfoExtractor):
|
||||||
IE_NAME = 'france2.fr:generation-what'
|
IE_NAME = 'france2.fr:generation-what'
|
||||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||||
|
@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
|
@ -91,7 +91,7 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _login(self, webpage_url, display_id):
|
def _login(self, webpage_url, display_id):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None or password is None:
|
if username is None or password is None:
|
||||||
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
||||||
return None
|
return None
|
||||||
|
@ -107,6 +107,11 @@ from .springboardplatform import SpringboardPlatformIE
|
|||||||
from .yapfiles import YapFilesIE
|
from .yapfiles import YapFilesIE
|
||||||
from .vice import ViceIE
|
from .vice import ViceIE
|
||||||
from .xfileshare import XFileShareIE
|
from .xfileshare import XFileShareIE
|
||||||
|
from .cloudflarestream import CloudflareStreamIE
|
||||||
|
from .peertube import PeerTubeIE
|
||||||
|
from .indavideo import IndavideoEmbedIE
|
||||||
|
from .apa import APAIE
|
||||||
|
from .foxnews import FoxNewsIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -1390,17 +1395,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# SVT embed
|
|
||||||
{
|
|
||||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2900353',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
|
||||||
'duration': 27,
|
|
||||||
'age_limit': 0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Crooks and Liars embed
|
# Crooks and Liars embed
|
||||||
{
|
{
|
||||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||||
@ -1471,21 +1465,6 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'expected_warnings': ['Failed to parse JSON Expecting value'],
|
'expected_warnings': ['Failed to parse JSON Expecting value'],
|
||||||
},
|
},
|
||||||
# Ooyala embed
|
|
||||||
{
|
|
||||||
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'Index/Match versus VLOOKUP.',
|
|
||||||
'title': 'This is what separates the Excel masters from the wannabes',
|
|
||||||
'duration': 191.933,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 downloads
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# Brightcove URL in single quotes
|
# Brightcove URL in single quotes
|
||||||
{
|
{
|
||||||
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
|
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
|
||||||
@ -2013,6 +1992,63 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# CloudflareStream embed
|
||||||
|
'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||||
|
},
|
||||||
|
'add_ie': [CloudflareStreamIE.ie_key()],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# PeerTube embed
|
||||||
|
'url': 'https://joinpeertube.org/fr/home/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'home',
|
||||||
|
'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Indavideo embed
|
||||||
|
'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1693903',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Így kell otthon hamburgert sütni',
|
||||||
|
'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
|
||||||
|
'timestamp': 1426330212,
|
||||||
|
'upload_date': '20150314',
|
||||||
|
'uploader': 'StreetKitchen',
|
||||||
|
'uploader_id': '546363',
|
||||||
|
},
|
||||||
|
'add_ie': [IndavideoEmbedIE.ie_key()],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# APA embed via JWPlatform embed
|
||||||
|
'url': 'http://www.vol.at/blue-man-group/5593454',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jjv85FdZ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 254,
|
||||||
|
'timestamp': 1519211149,
|
||||||
|
'upload_date': '20180221',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
|
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
|
||||||
'md5': 'b68d276de422ab07ee1d49388103f457',
|
'md5': 'b68d276de422ab07ee1d49388103f457',
|
||||||
@ -3025,6 +3061,31 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
|
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
|
||||||
|
|
||||||
|
cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
|
||||||
|
if cloudflarestream_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
|
||||||
|
|
||||||
|
peertube_urls = PeerTubeIE._extract_urls(webpage, url)
|
||||||
|
if peertube_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||||
|
|
||||||
|
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||||
|
if indavideo_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
|
||||||
|
|
||||||
|
apa_urls = APAIE._extract_urls(webpage)
|
||||||
|
if apa_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
apa_urls, video_id, video_title, ie=APAIE.ie_key())
|
||||||
|
|
||||||
|
foxnews_urls = FoxNewsIE._extract_urls(webpage)
|
||||||
|
if foxnews_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
|
||||||
|
|
||||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import math
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_chr,
|
|
||||||
compat_ord,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -22,12 +23,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class GloboIE(InfoExtractor):
|
class GloboIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||||
|
_NETRC_MACHINE = 'globo'
|
||||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
|
||||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
|
||||||
|
|
||||||
_RESIGN_EXPIRATION = 86400
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||||
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
||||||
@ -70,287 +66,51 @@ class GloboIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
class MD5(object):
|
def _real_initialize(self):
|
||||||
HEX_FORMAT_LOWERCASE = 0
|
email, password = self._get_login_info()
|
||||||
HEX_FORMAT_UPPERCASE = 1
|
if email is None:
|
||||||
BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
|
return
|
||||||
BASE64_PAD_CHARACTER_RFC_COMPLIANCE = '='
|
|
||||||
PADDING = '=0xFF01DD'
|
|
||||||
hexcase = 0
|
|
||||||
b64pad = ''
|
|
||||||
|
|
||||||
def __init__(self):
|
try:
|
||||||
pass
|
self._download_json(
|
||||||
|
'https://login.globo.com/api/authentication', None, data=json.dumps({
|
||||||
class JSArray(list):
|
'payload': {
|
||||||
def __getitem__(self, y):
|
'email': email,
|
||||||
try:
|
'password': password,
|
||||||
return list.__getitem__(self, y)
|
'serviceId': 4654,
|
||||||
except IndexError:
|
},
|
||||||
return 0
|
}).encode(), headers={
|
||||||
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
def __setitem__(self, i, y):
|
})
|
||||||
try:
|
except ExtractorError as e:
|
||||||
return list.__setitem__(self, i, y)
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
except IndexError:
|
resp = self._parse_json(e.cause.read(), None)
|
||||||
self.extend([0] * (i - len(self) + 1))
|
raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
|
||||||
self[-1] = y
|
raise
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def hex_md5(cls, param1):
|
|
||||||
return cls.rstr2hex(cls.rstr_md5(cls.str2rstr_utf8(param1)))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def b64_md5(cls, param1, param2=None):
|
|
||||||
return cls.rstr2b64(cls.rstr_md5(cls.str2rstr_utf8(param1, param2)))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def any_md5(cls, param1, param2):
|
|
||||||
return cls.rstr2any(cls.rstr_md5(cls.str2rstr_utf8(param1)), param2)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def rstr_md5(cls, param1):
|
|
||||||
return cls.binl2rstr(cls.binl_md5(cls.rstr2binl(param1), len(param1) * 8))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def rstr2hex(cls, param1):
|
|
||||||
_loc_2 = '0123456789ABCDEF' if cls.hexcase else '0123456789abcdef'
|
|
||||||
_loc_3 = ''
|
|
||||||
for _loc_5 in range(0, len(param1)):
|
|
||||||
_loc_4 = compat_ord(param1[_loc_5])
|
|
||||||
_loc_3 += _loc_2[_loc_4 >> 4 & 15] + _loc_2[_loc_4 & 15]
|
|
||||||
return _loc_3
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def rstr2b64(cls, param1):
|
|
||||||
_loc_2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
|
||||||
_loc_3 = ''
|
|
||||||
_loc_4 = len(param1)
|
|
||||||
for _loc_5 in range(0, _loc_4, 3):
|
|
||||||
_loc_6_1 = compat_ord(param1[_loc_5]) << 16
|
|
||||||
_loc_6_2 = compat_ord(param1[_loc_5 + 1]) << 8 if _loc_5 + 1 < _loc_4 else 0
|
|
||||||
_loc_6_3 = compat_ord(param1[_loc_5 + 2]) if _loc_5 + 2 < _loc_4 else 0
|
|
||||||
_loc_6 = _loc_6_1 | _loc_6_2 | _loc_6_3
|
|
||||||
for _loc_7 in range(0, 4):
|
|
||||||
if _loc_5 * 8 + _loc_7 * 6 > len(param1) * 8:
|
|
||||||
_loc_3 += cls.b64pad
|
|
||||||
else:
|
|
||||||
_loc_3 += _loc_2[_loc_6 >> 6 * (3 - _loc_7) & 63]
|
|
||||||
return _loc_3
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def rstr2any(param1, param2):
|
|
||||||
_loc_3 = len(param2)
|
|
||||||
_loc_4 = []
|
|
||||||
_loc_9 = [0] * ((len(param1) >> 2) + 1)
|
|
||||||
for _loc_5 in range(0, len(_loc_9)):
|
|
||||||
_loc_9[_loc_5] = compat_ord(param1[_loc_5 * 2]) << 8 | compat_ord(param1[_loc_5 * 2 + 1])
|
|
||||||
|
|
||||||
while len(_loc_9) > 0:
|
|
||||||
_loc_8 = []
|
|
||||||
_loc_7 = 0
|
|
||||||
for _loc_5 in range(0, len(_loc_9)):
|
|
||||||
_loc_7 = (_loc_7 << 16) + _loc_9[_loc_5]
|
|
||||||
_loc_6 = math.floor(_loc_7 / _loc_3)
|
|
||||||
_loc_7 -= _loc_6 * _loc_3
|
|
||||||
if len(_loc_8) > 0 or _loc_6 > 0:
|
|
||||||
_loc_8[len(_loc_8)] = _loc_6
|
|
||||||
|
|
||||||
_loc_4[len(_loc_4)] = _loc_7
|
|
||||||
_loc_9 = _loc_8
|
|
||||||
|
|
||||||
_loc_10 = ''
|
|
||||||
_loc_5 = len(_loc_4) - 1
|
|
||||||
while _loc_5 >= 0:
|
|
||||||
_loc_10 += param2[_loc_4[_loc_5]]
|
|
||||||
_loc_5 -= 1
|
|
||||||
|
|
||||||
return _loc_10
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def str2rstr_utf8(cls, param1, param2=None):
|
|
||||||
_loc_3 = ''
|
|
||||||
_loc_4 = -1
|
|
||||||
if not param2:
|
|
||||||
param2 = cls.PADDING
|
|
||||||
param1 = param1 + param2[1:9]
|
|
||||||
while True:
|
|
||||||
_loc_4 += 1
|
|
||||||
if _loc_4 >= len(param1):
|
|
||||||
break
|
|
||||||
_loc_5 = compat_ord(param1[_loc_4])
|
|
||||||
_loc_6 = compat_ord(param1[_loc_4 + 1]) if _loc_4 + 1 < len(param1) else 0
|
|
||||||
if 55296 <= _loc_5 <= 56319 and 56320 <= _loc_6 <= 57343:
|
|
||||||
_loc_5 = 65536 + ((_loc_5 & 1023) << 10) + (_loc_6 & 1023)
|
|
||||||
_loc_4 += 1
|
|
||||||
if _loc_5 <= 127:
|
|
||||||
_loc_3 += compat_chr(_loc_5)
|
|
||||||
continue
|
|
||||||
if _loc_5 <= 2047:
|
|
||||||
_loc_3 += compat_chr(192 | _loc_5 >> 6 & 31) + compat_chr(128 | _loc_5 & 63)
|
|
||||||
continue
|
|
||||||
if _loc_5 <= 65535:
|
|
||||||
_loc_3 += compat_chr(224 | _loc_5 >> 12 & 15) + compat_chr(128 | _loc_5 >> 6 & 63) + compat_chr(
|
|
||||||
128 | _loc_5 & 63)
|
|
||||||
continue
|
|
||||||
if _loc_5 <= 2097151:
|
|
||||||
_loc_3 += compat_chr(240 | _loc_5 >> 18 & 7) + compat_chr(128 | _loc_5 >> 12 & 63) + compat_chr(
|
|
||||||
128 | _loc_5 >> 6 & 63) + compat_chr(128 | _loc_5 & 63)
|
|
||||||
return _loc_3
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def rstr2binl(param1):
|
|
||||||
_loc_2 = [0] * ((len(param1) >> 2) + 1)
|
|
||||||
for _loc_3 in range(0, len(_loc_2)):
|
|
||||||
_loc_2[_loc_3] = 0
|
|
||||||
for _loc_3 in range(0, len(param1) * 8, 8):
|
|
||||||
_loc_2[_loc_3 >> 5] |= (compat_ord(param1[_loc_3 // 8]) & 255) << _loc_3 % 32
|
|
||||||
return _loc_2
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def binl2rstr(param1):
|
|
||||||
_loc_2 = ''
|
|
||||||
for _loc_3 in range(0, len(param1) * 32, 8):
|
|
||||||
_loc_2 += compat_chr(param1[_loc_3 >> 5] >> _loc_3 % 32 & 255)
|
|
||||||
return _loc_2
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def binl_md5(cls, param1, param2):
|
|
||||||
param1 = cls.JSArray(param1)
|
|
||||||
param1[param2 >> 5] |= 128 << param2 % 32
|
|
||||||
param1[(param2 + 64 >> 9 << 4) + 14] = param2
|
|
||||||
_loc_3 = 1732584193
|
|
||||||
_loc_4 = -271733879
|
|
||||||
_loc_5 = -1732584194
|
|
||||||
_loc_6 = 271733878
|
|
||||||
for _loc_7 in range(0, len(param1), 16):
|
|
||||||
_loc_8 = _loc_3
|
|
||||||
_loc_9 = _loc_4
|
|
||||||
_loc_10 = _loc_5
|
|
||||||
_loc_11 = _loc_6
|
|
||||||
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 7, -680876936)
|
|
||||||
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 1], 12, -389564586)
|
|
||||||
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 17, 606105819)
|
|
||||||
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 3], 22, -1044525330)
|
|
||||||
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 7, -176418897)
|
|
||||||
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 5], 12, 1200080426)
|
|
||||||
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 17, -1473231341)
|
|
||||||
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 7], 22, -45705983)
|
|
||||||
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 7, 1770035416)
|
|
||||||
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 9], 12, -1958414417)
|
|
||||||
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 17, -42063)
|
|
||||||
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 11], 22, -1990404162)
|
|
||||||
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 7, 1804603682)
|
|
||||||
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 13], 12, -40341101)
|
|
||||||
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 17, -1502002290)
|
|
||||||
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 15], 22, 1236535329)
|
|
||||||
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 5, -165796510)
|
|
||||||
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 6], 9, -1069501632)
|
|
||||||
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 14, 643717713)
|
|
||||||
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 0], 20, -373897302)
|
|
||||||
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 5, -701558691)
|
|
||||||
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 10], 9, 38016083)
|
|
||||||
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 14, -660478335)
|
|
||||||
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 4], 20, -405537848)
|
|
||||||
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 5, 568446438)
|
|
||||||
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 14], 9, -1019803690)
|
|
||||||
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 14, -187363961)
|
|
||||||
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 8], 20, 1163531501)
|
|
||||||
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 5, -1444681467)
|
|
||||||
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 2], 9, -51403784)
|
|
||||||
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 14, 1735328473)
|
|
||||||
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 12], 20, -1926607734)
|
|
||||||
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 4, -378558)
|
|
||||||
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 8], 11, -2022574463)
|
|
||||||
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 16, 1839030562)
|
|
||||||
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 14], 23, -35309556)
|
|
||||||
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 4, -1530992060)
|
|
||||||
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 4], 11, 1272893353)
|
|
||||||
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 16, -155497632)
|
|
||||||
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 10], 23, -1094730640)
|
|
||||||
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 4, 681279174)
|
|
||||||
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 0], 11, -358537222)
|
|
||||||
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 16, -722521979)
|
|
||||||
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 6], 23, 76029189)
|
|
||||||
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 4, -640364487)
|
|
||||||
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 12], 11, -421815835)
|
|
||||||
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 16, 530742520)
|
|
||||||
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 2], 23, -995338651)
|
|
||||||
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 6, -198630844)
|
|
||||||
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 7], 10, 1126891415)
|
|
||||||
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 15, -1416354905)
|
|
||||||
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 5], 21, -57434055)
|
|
||||||
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 6, 1700485571)
|
|
||||||
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 3], 10, -1894986606)
|
|
||||||
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 15, -1051523)
|
|
||||||
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 1], 21, -2054922799)
|
|
||||||
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 6, 1873313359)
|
|
||||||
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 15], 10, -30611744)
|
|
||||||
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 15, -1560198380)
|
|
||||||
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 13], 21, 1309151649)
|
|
||||||
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 6, -145523070)
|
|
||||||
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 11], 10, -1120210379)
|
|
||||||
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 15, 718787259)
|
|
||||||
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 9], 21, -343485551)
|
|
||||||
_loc_3 = cls.safe_add(_loc_3, _loc_8)
|
|
||||||
_loc_4 = cls.safe_add(_loc_4, _loc_9)
|
|
||||||
_loc_5 = cls.safe_add(_loc_5, _loc_10)
|
|
||||||
_loc_6 = cls.safe_add(_loc_6, _loc_11)
|
|
||||||
return [_loc_3, _loc_4, _loc_5, _loc_6]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def md5_cmn(cls, param1, param2, param3, param4, param5, param6):
|
|
||||||
return cls.safe_add(
|
|
||||||
cls.bit_rol(cls.safe_add(cls.safe_add(param2, param1), cls.safe_add(param4, param6)), param5), param3)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def md5_ff(cls, param1, param2, param3, param4, param5, param6, param7):
|
|
||||||
return cls.md5_cmn(param2 & param3 | ~param2 & param4, param1, param2, param5, param6, param7)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def md5_gg(cls, param1, param2, param3, param4, param5, param6, param7):
|
|
||||||
return cls.md5_cmn(param2 & param4 | param3 & ~param4, param1, param2, param5, param6, param7)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def md5_hh(cls, param1, param2, param3, param4, param5, param6, param7):
|
|
||||||
return cls.md5_cmn(param2 ^ param3 ^ param4, param1, param2, param5, param6, param7)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def md5_ii(cls, param1, param2, param3, param4, param5, param6, param7):
|
|
||||||
return cls.md5_cmn(param3 ^ (param2 | ~param4), param1, param2, param5, param6, param7)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def safe_add(cls, param1, param2):
|
|
||||||
_loc_3 = (param1 & 65535) + (param2 & 65535)
|
|
||||||
_loc_4 = (param1 >> 16) + (param2 >> 16) + (_loc_3 >> 16)
|
|
||||||
return cls.lshift(_loc_4, 16) | _loc_3 & 65535
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def bit_rol(cls, param1, param2):
|
|
||||||
return cls.lshift(param1, param2) | (param1 & 0xFFFFFFFF) >> (32 - param2)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def lshift(value, count):
|
|
||||||
r = (0xFFFFFFFF & value) << count
|
|
||||||
return -(~(r - 1) & 0xFFFFFFFF) if r > 0x7FFFFFFF else r
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||||
|
video_id)['videos'][0]
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for resource in video['resources']:
|
for resource in video['resources']:
|
||||||
resource_id = resource.get('_id')
|
resource_id = resource.get('_id')
|
||||||
if not resource_id or resource_id.endswith('manifest'):
|
resource_url = resource.get('url')
|
||||||
|
if not resource_id or not resource_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
security = self._download_json(
|
security = self._download_json(
|
||||||
self._SECURITY_URL_TEMPLATE % (video_id, resource_id),
|
'http://security.video.globo.com/videos/%s/hash' % video_id,
|
||||||
video_id, 'Downloading security hash for %s' % resource_id)
|
video_id, 'Downloading security hash for %s' % resource_id, query={
|
||||||
|
'player': 'flash',
|
||||||
|
'version': '17.0.0.132',
|
||||||
|
'resource_id': resource_id,
|
||||||
|
})
|
||||||
|
|
||||||
security_hash = security.get('hash')
|
security_hash = security.get('hash')
|
||||||
if not security_hash:
|
if not security_hash:
|
||||||
@ -361,22 +121,28 @@ class GloboIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
hash_code = security_hash[:2]
|
hash_code = security_hash[:2]
|
||||||
received_time = int(security_hash[2:12])
|
received_time = security_hash[2:12]
|
||||||
received_random = security_hash[12:22]
|
received_random = security_hash[12:22]
|
||||||
received_md5 = security_hash[22:]
|
received_md5 = security_hash[22:]
|
||||||
|
|
||||||
sign_time = received_time + self._RESIGN_EXPIRATION
|
sign_time = compat_str(int(received_time) + 86400)
|
||||||
padding = '%010d' % random.randint(1, 10000000000)
|
padding = '%010d' % random.randint(1, 10000000000)
|
||||||
|
|
||||||
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
|
md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
|
||||||
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
|
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||||
|
signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
|
||||||
|
|
||||||
resource_url = resource['url']
|
|
||||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
signed_url, resource_id, mpd_id='dash', fatal=False))
|
||||||
|
elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
signed_url, resource_id, ism_id='mss', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': signed_url,
|
'url': signed_url,
|
||||||
|
@ -123,7 +123,7 @@ class GoIE(AdobePassIE):
|
|||||||
'adobe_requestor_id': requestor_id,
|
'adobe_requestor_id': requestor_id,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
self._initialize_geo_bypass(['US'])
|
self._initialize_geo_bypass({'countries': ['US']})
|
||||||
entitlement = self._download_json(
|
entitlement = self._download_json(
|
||||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||||
video_id, data=urlencode_postdata(data))
|
video_id, data=urlencode_postdata(data))
|
||||||
|
@ -6,7 +6,9 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_age_limit,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -23,6 +25,7 @@ class Go90IE(InfoExtractor):
|
|||||||
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
||||||
'timestamp': 1491868800,
|
'timestamp': 1491868800,
|
||||||
'upload_date': '20170411',
|
'upload_date': '20170411',
|
||||||
|
'age_limit': 14,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,6 +36,8 @@ class Go90IE(InfoExtractor):
|
|||||||
video_id, headers={
|
video_id, headers={
|
||||||
'Content-Type': 'application/json; charset=utf-8',
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
}, data=b'{"client":"web","device_type":"pc"}')
|
}, data=b'{"client":"web","device_type":"pc"}')
|
||||||
|
if video_data.get('requires_drm'):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
main_video_asset = video_data['main_video_asset']
|
main_video_asset = video_data['main_video_asset']
|
||||||
|
|
||||||
episode_number = int_or_none(video_data.get('episode_number'))
|
episode_number = int_or_none(video_data.get('episode_number'))
|
||||||
@ -123,4 +128,5 @@ class Go90IE(InfoExtractor):
|
|||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,8 @@ class HiDiveIE(InfoExtractor):
|
|||||||
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
|
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
|
||||||
# so disabling geo bypass completely
|
# so disabling geo bypass completely
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
_NETRC_MACHINE = 'hidive'
|
||||||
|
_LOGIN_URL = 'https://www.hidive.com/account/login'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
|
'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
|
||||||
@ -31,8 +33,26 @@ class HiDiveIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Requires Authentication',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
email, password = self._get_login_info()
|
||||||
|
if email is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
webpage = self._download_webpage(self._LOGIN_URL, None)
|
||||||
|
form = self._search_regex(
|
||||||
|
r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
|
||||||
|
webpage, 'login form')
|
||||||
|
data = self._hidden_inputs(form)
|
||||||
|
data.update({
|
||||||
|
'Email': email,
|
||||||
|
'Password': password,
|
||||||
|
})
|
||||||
|
self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title, key = mobj.group('title', 'key')
|
title, key = mobj.group('title', 'key')
|
||||||
@ -43,6 +63,7 @@ class HiDiveIE(InfoExtractor):
|
|||||||
data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
'Title': title,
|
'Title': title,
|
||||||
'Key': key,
|
'Key': key,
|
||||||
|
'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
|
||||||
}))
|
}))
|
||||||
|
|
||||||
restriction = settings.get('restrictionReason')
|
restriction = settings.get('restrictionReason')
|
||||||
@ -79,6 +100,7 @@ class HiDiveIE(InfoExtractor):
|
|||||||
subtitles.setdefault(cc_lang, []).append({
|
subtitles.setdefault(cc_lang, []).append({
|
||||||
'url': cc_url,
|
'url': cc_url,
|
||||||
})
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
season_number = int_or_none(self._search_regex(
|
season_number = int_or_none(self._search_regex(
|
||||||
r's(\d+)', key, 'season number', default=None))
|
r's(\d+)', key, 'season number', default=None))
|
||||||
|
@ -66,7 +66,7 @@ class HRTiBaseIE(InfoExtractor):
|
|||||||
self._logout_url = modules['user']['resources']['logout']['uri']
|
self._logout_url = modules['user']['resources']['logout']['uri']
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
# TODO: figure out authentication with cookies
|
# TODO: figure out authentication with cookies
|
||||||
if username is None or password is None:
|
if username is None or password is None:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
@ -7,23 +7,23 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title).+?[/-]vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2524815897',
|
'id': '2524815897',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ice Age: Continental Drift Trailer (No. 2)',
|
'title': 'No. 2 from Ice Age: Continental Drift (2012)',
|
||||||
'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
|
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||||
@ -40,82 +40,67 @@ class ImdbIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
|
'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
|
webpage = self._download_webpage(
|
||||||
descr = self._html_search_regex(
|
'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
|
||||||
r'(?s)<span itemprop="description">(.*?)</span>',
|
video_metadata = self._parse_json(self._search_regex(
|
||||||
webpage, 'description', fatal=False)
|
r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
|
||||||
player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
|
'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
|
||||||
player_page = self._download_webpage(
|
title = self._html_search_meta(
|
||||||
player_url, video_id, 'Downloading player page')
|
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
||||||
# the player page contains the info for the default format, we have to
|
r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
|
||||||
# fetch other pages for the rest of the formats
|
|
||||||
extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
|
|
||||||
format_pages = [
|
|
||||||
self._download_webpage(
|
|
||||||
f_url, video_id, 'Downloading info for %s format' % f_name)
|
|
||||||
for f_url, f_name in extra_formats]
|
|
||||||
format_pages.append(player_page)
|
|
||||||
|
|
||||||
quality = qualities(('SD', '480p', '720p', '1080p'))
|
quality = qualities(('SD', '480p', '720p', '1080p'))
|
||||||
formats = []
|
formats = []
|
||||||
for format_page in format_pages:
|
for encoding in video_metadata.get('encodings', []):
|
||||||
json_data = self._search_regex(
|
if not encoding or not isinstance(encoding, dict):
|
||||||
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
|
||||||
format_page, 'json data', flags=re.DOTALL)
|
|
||||||
info = self._parse_json(json_data, video_id, fatal=False)
|
|
||||||
if not info:
|
|
||||||
continue
|
continue
|
||||||
format_info = info.get('videoPlayerObject', {}).get('video', {})
|
video_url = encoding.get('videoUrl')
|
||||||
if not format_info:
|
if not video_url or not isinstance(video_url, compat_str):
|
||||||
continue
|
continue
|
||||||
video_info_list = format_info.get('videoInfoList')
|
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||||
if not video_info_list or not isinstance(video_info_list, list):
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
continue
|
continue
|
||||||
for video_info in video_info_list:
|
format_id = encoding.get('definition')
|
||||||
if not video_info or not isinstance(video_info, dict):
|
formats.append({
|
||||||
continue
|
'format_id': format_id,
|
||||||
video_url = video_info.get('videoUrl')
|
'url': video_url,
|
||||||
if not video_url or not isinstance(video_url, compat_str):
|
'ext': ext,
|
||||||
continue
|
'quality': quality(format_id),
|
||||||
if (video_info.get('videoMimeType') == 'application/x-mpegURL' or
|
})
|
||||||
determine_ext(video_url) == 'm3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
|
||||||
format_id = format_info.get('ffname')
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': mimetype2ext(video_info.get('videoMimeType')),
|
|
||||||
'quality': quality(format_id),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': remove_end(self._og_search_title(webpage), ' - IMDb'),
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': descr,
|
'description': video_metadata.get('description'),
|
||||||
'thumbnail': format_info.get('slate'),
|
'thumbnail': video_metadata.get('slate', {}).get('url'),
|
||||||
|
'duration': parse_duration(video_metadata.get('duration')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ImdbListIE(InfoExtractor):
|
class ImdbListIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb:list'
|
IE_NAME = 'imdb:list'
|
||||||
IE_DESC = 'Internet Movie Database lists'
|
IE_DESC = 'Internet Movie Database lists'
|
||||||
_VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
_VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
|
'url': 'https://www.imdb.com/list/ls009921623/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'JFs9NWw6XI0',
|
'id': '009921623',
|
||||||
'title': 'March 23, 2012 Releases',
|
'title': 'The Bourne Legacy',
|
||||||
|
'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
|
||||||
},
|
},
|
||||||
'playlist_count': 7,
|
'playlist_count': 8,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -123,9 +108,13 @@ class ImdbListIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, list_id)
|
webpage = self._download_webpage(url, list_id)
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('http://www.imdb.com' + m, 'Imdb')
|
self.url_result('http://www.imdb.com' + m, 'Imdb')
|
||||||
for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
|
for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
|
||||||
|
|
||||||
list_title = self._html_search_regex(
|
list_title = self._html_search_regex(
|
||||||
r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
|
r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
|
||||||
|
webpage, 'list title')
|
||||||
|
list_description = self._html_search_regex(
|
||||||
|
r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
|
||||||
|
webpage, 'list description')
|
||||||
|
|
||||||
return self.playlist_result(entries, list_id, list_title)
|
return self.playlist_result(entries, list_id, list_title, list_description)
|
||||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
@ -21,7 +20,7 @@ class ImgurIE(InfoExtractor):
|
|||||||
'id': 'A61SaA1',
|
'id': 'A61SaA1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
'description': 'Imgur: The most awesome images on the Internet.',
|
'description': 'Imgur: The magic of the Internet',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/A61SaA1',
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
@ -29,7 +28,7 @@ class ImgurIE(InfoExtractor):
|
|||||||
'id': 'A61SaA1',
|
'id': 'A61SaA1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
'description': 'Imgur: The most awesome images on the Internet.',
|
'description': 'Imgur: The magic of the Internet',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||||
@ -37,8 +36,6 @@ class ImgurIE(InfoExtractor):
|
|||||||
'id': 'YcAQlkx',
|
'id': 'YcAQlkx',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||||
'description': 'Imgur: The most awesome images on the Internet.'
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||||
@ -50,8 +47,8 @@ class ImgurIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
|
||||||
compat_urlparse.urljoin(url, video_id), video_id)
|
webpage = self._download_webpage(gifv_url, video_id)
|
||||||
|
|
||||||
width = int_or_none(self._og_search_property(
|
width = int_or_none(self._og_search_property(
|
||||||
'video:width', webpage, default=None))
|
'video:width', webpage, default=None))
|
||||||
@ -107,7 +104,7 @@ class ImgurIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage, default=None),
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,6 +21,21 @@ class IncIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# div with id=kaltura_player_1_kqs38cgm
|
||||||
|
'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_kqs38cgm',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
|
||||||
|
'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
|
||||||
|
'timestamp': 1364403232,
|
||||||
|
'upload_date': '20130327',
|
||||||
|
'uploader_id': 'incdigital@inc.com',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
|
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -31,10 +46,13 @@ class IncIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
partner_id = self._search_regex(
|
||||||
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
|
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
|
||||||
|
'partner id', default='1034971')
|
||||||
|
|
||||||
kaltura_id = self._parse_json(self._search_regex(
|
kaltura_id = self._search_regex(
|
||||||
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
|
||||||
|
default=None, group='id') or self._parse_json(self._search_regex(
|
||||||
|
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
||||||
display_id)['vid_kaltura_id']
|
display_id)['vid_kaltura_id']
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
|
@ -1,11 +1,15 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -13,7 +17,7 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||||
'md5': 'f79b009c66194acacd40712a6778acfa',
|
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1837039',
|
'id': '1837039',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -36,6 +40,20 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
# Some example URLs covered by generic extractor:
|
||||||
|
# http://indavideo.hu/video/Vicces_cica_1
|
||||||
|
# http://index.indavideo.hu/video/2015_0728_beregszasz
|
||||||
|
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||||
|
# http://erotika.indavideo.hu/video/Amator_tini_punci
|
||||||
|
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
|
||||||
|
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
|
||||||
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
@ -45,7 +63,14 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
video_urls = video.get('video_files', [])
|
video_urls = []
|
||||||
|
|
||||||
|
video_files = video.get('video_files')
|
||||||
|
if isinstance(video_files, list):
|
||||||
|
video_urls.extend(video_files)
|
||||||
|
elif isinstance(video_files, dict):
|
||||||
|
video_urls.extend(video_files.values())
|
||||||
|
|
||||||
video_file = video.get('video_file')
|
video_file = video.get('video_file')
|
||||||
if video:
|
if video:
|
||||||
video_urls.append(video_file)
|
video_urls.append(video_file)
|
||||||
@ -58,11 +83,23 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||||||
if flv_url not in video_urls:
|
if flv_url not in video_urls:
|
||||||
video_urls.append(flv_url)
|
video_urls.append(flv_url)
|
||||||
|
|
||||||
formats = [{
|
filesh = video.get('filesh')
|
||||||
'url': video_url,
|
|
||||||
'height': int_or_none(self._search_regex(
|
formats = []
|
||||||
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
|
for video_url in video_urls:
|
||||||
} for video_url in video_urls]
|
height = int_or_none(self._search_regex(
|
||||||
|
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
|
||||||
|
if filesh:
|
||||||
|
if not height:
|
||||||
|
continue
|
||||||
|
token = filesh.get(compat_str(height))
|
||||||
|
if token is None:
|
||||||
|
continue
|
||||||
|
video_url = update_url_query(video_url, {'token': token})
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
timestamp = video.get('date')
|
timestamp = video.get('date')
|
||||||
@ -89,55 +126,3 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class IndavideoIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://indavideo.hu/video/Vicces_cica_1',
|
|
||||||
'md5': '8c82244ba85d2a2310275b318eb51eac',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1335611',
|
|
||||||
'display_id': 'Vicces_cica_1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Vicces cica',
|
|
||||||
'description': 'Játszik a tablettel. :D',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'Jet_Pack',
|
|
||||||
'uploader_id': '491217',
|
|
||||||
'timestamp': 1390821212,
|
|
||||||
'upload_date': '20140127',
|
|
||||||
'duration': 7,
|
|
||||||
'age_limit': 0,
|
|
||||||
'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
embed_url = self._search_regex(
|
|
||||||
r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': 'IndavideoEmbed',
|
|
||||||
'url': embed_url,
|
|
||||||
'display_id': display_id,
|
|
||||||
}
|
|
||||||
|
@ -239,7 +239,7 @@ class IqiyiIE(InfoExtractor):
|
|||||||
return ohdave_rsa_encrypt(data, e, N)
|
return ohdave_rsa_encrypt(data, e, N)
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
|
|
||||||
# No authentication to be performed
|
# No authentication to be performed
|
||||||
if not username:
|
if not username:
|
||||||
|
@ -7,19 +7,22 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_etree_register_namespace,
|
compat_etree_register_namespace,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
smuggle_url,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
ExtractorError,
|
|
||||||
determine_ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -41,6 +44,14 @@ class ITVIE(InfoExtractor):
|
|||||||
# unavailable via data-playlist-url
|
# unavailable via data-playlist-url
|
||||||
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
|
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# InvalidVodcrid
|
||||||
|
'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# ContentUnavailable
|
||||||
|
'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -119,63 +130,65 @@ class ITVIE(InfoExtractor):
|
|||||||
|
|
||||||
resp_env = self._download_xml(
|
resp_env = self._download_xml(
|
||||||
params['data-playlist-url'], video_id,
|
params['data-playlist-url'], video_id,
|
||||||
headers=headers, data=etree.tostring(req_env))
|
headers=headers, data=etree.tostring(req_env), fatal=False)
|
||||||
playlist = xpath_element(resp_env, './/Playlist')
|
if resp_env:
|
||||||
if playlist is None:
|
playlist = xpath_element(resp_env, './/Playlist')
|
||||||
fault_code = xpath_text(resp_env, './/faultcode')
|
if playlist is None:
|
||||||
fault_string = xpath_text(resp_env, './/faultstring')
|
fault_code = xpath_text(resp_env, './/faultcode')
|
||||||
if fault_code == 'InvalidGeoRegion':
|
fault_string = xpath_text(resp_env, './/faultstring')
|
||||||
self.raise_geo_restricted(
|
if fault_code == 'InvalidGeoRegion':
|
||||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(
|
||||||
elif fault_code != 'InvalidEntity':
|
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||||
raise ExtractorError(
|
elif fault_code not in (
|
||||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||||
info.update({
|
raise ExtractorError(
|
||||||
'title': self._og_search_title(webpage),
|
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||||
'episode_title': params.get('data-video-episode'),
|
info.update({
|
||||||
'series': params.get('data-video-title'),
|
'title': self._og_search_title(webpage),
|
||||||
})
|
'episode_title': params.get('data-video-episode'),
|
||||||
else:
|
'series': params.get('data-video-title'),
|
||||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
})
|
||||||
info.update({
|
else:
|
||||||
'title': title,
|
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||||
'episode_title': title,
|
info.update({
|
||||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
'title': title,
|
||||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
'episode_title': title,
|
||||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||||
})
|
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
})
|
||||||
rtmp_url = media_files.attrib['base']
|
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||||
|
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||||
|
rtmp_url = media_files.attrib['base']
|
||||||
|
|
||||||
for media_file in media_files.findall('MediaFile'):
|
for media_file in media_files.findall('MediaFile'):
|
||||||
play_path = xpath_text(media_file, 'URL')
|
play_path = xpath_text(media_file, 'URL')
|
||||||
if not play_path:
|
if not play_path:
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||||
f = {
|
f = {
|
||||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||||
'play_path': play_path,
|
'play_path': play_path,
|
||||||
# Providing this swfVfy allows to avoid truncated downloads
|
# Providing this swfVfy allows to avoid truncated downloads
|
||||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
}
|
}
|
||||||
app = self._search_regex(
|
app = self._search_regex(
|
||||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||||
if app:
|
if app:
|
||||||
f.update({
|
f.update({
|
||||||
'url': rtmp_url.split('?', 1)[0],
|
'url': rtmp_url.split('?', 1)[0],
|
||||||
'app': app,
|
'app': app,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
f['url'] = rtmp_url
|
f['url'] = rtmp_url
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||||
if caption_url.text:
|
if caption_url.text:
|
||||||
extract_subtitle(caption_url.text)
|
extract_subtitle(caption_url.text)
|
||||||
|
|
||||||
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
||||||
hmac = params.get('data-video-hmac')
|
hmac = params.get('data-video-hmac')
|
||||||
@ -250,4 +263,49 @@ class ITVIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
})
|
})
|
||||||
return info
|
|
||||||
|
webpage_info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
if not webpage_info.get('title'):
|
||||||
|
webpage_info['title'] = self._html_search_regex(
|
||||||
|
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
|
||||||
|
webpage, 'title', default=None) or self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title',
|
||||||
|
default=None) or webpage_info['episode']
|
||||||
|
|
||||||
|
return merge_dicts(info, webpage_info)
|
||||||
|
|
||||||
|
|
||||||
|
class ITVBTCCIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
||||||
|
'title': 'BTCC 2018: All the action from Brands Hatch',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 9,
|
||||||
|
}
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
||||||
|
# ITV does not like some GB IP ranges, so here are some
|
||||||
|
# IP blocks it accepts
|
||||||
|
'geo_ip_blocks': [
|
||||||
|
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
|
||||||
|
],
|
||||||
|
'referrer': url,
|
||||||
|
}),
|
||||||
|
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||||
|
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@ -57,12 +58,33 @@ class IzleseneIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
video = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = video.get('videoTitle') or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for stream in video['media']['level']:
|
||||||
|
source_url = stream.get('source')
|
||||||
|
if not source_url or not isinstance(source_url, compat_str):
|
||||||
|
continue
|
||||||
|
ext = determine_ext(url, 'mp4')
|
||||||
|
quality = stream.get('value')
|
||||||
|
height = int_or_none(quality)
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%sp' % quality if quality else 'sd',
|
||||||
|
'url': compat_urllib_parse_unquote(source_url),
|
||||||
|
'ext': ext,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._og_search_description(webpage, default=None)
|
description = self._og_search_description(webpage, default=None)
|
||||||
thumbnail = self._proto_relative_url(
|
thumbnail = video.get('posterURL') or self._proto_relative_url(
|
||||||
self._og_search_thumbnail(webpage), scheme='http:')
|
self._og_search_thumbnail(webpage), scheme='http:')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
@ -71,41 +93,15 @@ class IzleseneIE(InfoExtractor):
|
|||||||
timestamp = parse_iso8601(self._html_search_meta(
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
'uploadDate', webpage, 'upload date'))
|
'uploadDate', webpage, 'upload date'))
|
||||||
|
|
||||||
duration = float_or_none(self._html_search_regex(
|
duration = float_or_none(video.get('duration') or self._html_search_regex(
|
||||||
r'"videoduration"\s*:\s*"([^"]+)"',
|
r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
webpage, 'duration', fatal=False), scale=1000)
|
webpage, 'duration', fatal=False, group='value'), scale=1000)
|
||||||
|
|
||||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||||
comment_count = self._html_search_regex(
|
comment_count = self._html_search_regex(
|
||||||
r'comment_count\s*=\s*\'([^\']+)\';',
|
r'comment_count\s*=\s*\'([^\']+)\';',
|
||||||
webpage, 'comment_count', fatal=False)
|
webpage, 'comment_count', fatal=False)
|
||||||
|
|
||||||
content_url = self._html_search_meta(
|
|
||||||
'contentURL', webpage, 'content URL', fatal=False)
|
|
||||||
ext = determine_ext(content_url, 'mp4')
|
|
||||||
|
|
||||||
# Might be empty for some videos.
|
|
||||||
streams = self._html_search_regex(
|
|
||||||
r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
if streams:
|
|
||||||
for stream in streams.split('|'):
|
|
||||||
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
|
|
||||||
formats.append({
|
|
||||||
'format_id': '%sp' % quality if quality else 'sd',
|
|
||||||
'url': compat_urllib_parse_unquote(url),
|
|
||||||
'ext': ext,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
stream_url = self._search_regex(
|
|
||||||
r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'sd',
|
|
||||||
'url': compat_urllib_parse_unquote(stream_url),
|
|
||||||
'ext': ext,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -18,7 +18,7 @@ class JojIE(InfoExtractor):
|
|||||||
joj:|
|
joj:|
|
||||||
https?://media\.joj\.sk/embed/
|
https?://media\.joj\.sk/embed/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
(?P<id>[^/?#^]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
|
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||||
@ -29,16 +29,24 @@ class JojIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 3118,
|
'duration': 3118,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://media.joj.sk/embed/9i1cxv',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
|
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'joj:9i1cxv',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return [
|
||||||
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
mobj.group('url')
|
||||||
webpage)
|
for mobj in re.finditer(
|
||||||
|
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -130,7 +130,7 @@ class LeIE(InfoExtractor):
|
|||||||
media_id, 'Downloading flash playJson data', query={
|
media_id, 'Downloading flash playJson data', query={
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'platid': 1,
|
'platid': 1,
|
||||||
'splatid': 101,
|
'splatid': 105,
|
||||||
'format': 1,
|
'format': 1,
|
||||||
'source': 1000,
|
'source': 1000,
|
||||||
'tkey': self.calc_time_key(int(time.time())),
|
'tkey': self.calc_time_key(int(time.time())),
|
||||||
|
@ -282,7 +282,9 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
self._initialize_geo_bypass({
|
||||||
|
'countries': smuggled_data.get('geo_countries'),
|
||||||
|
})
|
||||||
|
|
||||||
pc, mobile, metadata = self._extract(
|
pc, mobile, metadata = self._extract(
|
||||||
video_id, 'getPlaylistByMediaId',
|
video_id, 'getPlaylistByMediaId',
|
||||||
|
125
youtube_dl/extractor/markiza.py
Normal file
125
youtube_dl/extractor/markiza.py
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
orderedSet,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MarkizaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
|
||||||
|
'md5': 'ada4e9fad038abeed971843aa028c7b0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '139078',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Oteckovia 109',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2760,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85430',
|
||||||
|
'title': 'Televízne noviny',
|
||||||
|
},
|
||||||
|
'playlist_count': 23,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/video/84723',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videoarchiv.markiza.sk/embed/85295',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
|
||||||
|
video_id, query={'id': video_id})
|
||||||
|
|
||||||
|
info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
|
||||||
|
|
||||||
|
if info.get('_type') == 'playlist':
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'title': try_get(
|
||||||
|
data, lambda x: x['details']['name'], compat_str),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
info['duration'] = parse_duration(
|
||||||
|
try_get(data, lambda x: x['details']['duration'], compat_str))
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class MarkizaPageIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
|
||||||
|
'md5': 'ada4e9fad038abeed971843aa028c7b0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '139355',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Oteckovia 110',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2604,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
# Downloading for some hosts (e.g. dajto, doma) fails with 500
|
||||||
|
# although everything seems to be OK, so considering 500
|
||||||
|
# status code to be expected.
|
||||||
|
url, playlist_id, expected_status=500)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
|
||||||
|
for video_id in orderedSet(re.findall(
|
||||||
|
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
|
||||||
|
webpage))]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id)
|
@ -42,6 +42,22 @@ class MediasetIE(InfoExtractor):
|
|||||||
'categories': ['reality'],
|
'categories': ['reality'],
|
||||||
},
|
},
|
||||||
'expected_warnings': ['is not a supported codec'],
|
'expected_warnings': ['is not a supported codec'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
|
||||||
|
'md5': '1276f966ac423d16ba255ce867de073e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '846685',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Puntata del 25 maggio',
|
||||||
|
'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 6565,
|
||||||
|
'creator': 'mediaset',
|
||||||
|
'upload_date': '20180525',
|
||||||
|
'series': 'Matrix',
|
||||||
|
'categories': ['infotainment'],
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
}, {
|
}, {
|
||||||
# clip
|
# clip
|
||||||
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
||||||
@ -70,16 +86,33 @@ class MediasetIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://www.video.mediaset.it/html/metainfo.sjson',
|
||||||
|
video_id, 'Downloading media info', query={
|
||||||
|
'id': video_id
|
||||||
|
})['video']
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
media_id = video.get('guid') or video_id
|
||||||
|
|
||||||
video_list = self._download_json(
|
video_list = self._download_json(
|
||||||
'http://cdnsel01.mediaset.net/GetCdn.aspx',
|
'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
|
||||||
video_id, 'Downloading video CDN JSON', query={
|
video_id, 'Downloading video CDN JSON', query={
|
||||||
'streamid': video_id,
|
'streamid': media_id,
|
||||||
'format': 'json',
|
'format': 'json',
|
||||||
})['videoList']
|
})['videoList']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_url in video_list:
|
for format_url in video_list:
|
||||||
if '.ism' in format_url:
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif ext == 'ism' or '.ism' in format_url:
|
||||||
formats.extend(self._extract_ism_formats(
|
formats.extend(self._extract_ism_formats(
|
||||||
format_url, video_id, ism_id='mss', fatal=False))
|
format_url, video_id, ism_id='mss', fatal=False))
|
||||||
else:
|
else:
|
||||||
@ -89,30 +122,23 @@ class MediasetIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
mediainfo = self._download_json(
|
|
||||||
'http://plr.video.mediaset.it/html/metainfo.sjson',
|
|
||||||
video_id, 'Downloading video info JSON', query={
|
|
||||||
'id': video_id,
|
|
||||||
})['video']
|
|
||||||
|
|
||||||
title = mediainfo['title']
|
|
||||||
|
|
||||||
creator = try_get(
|
creator = try_get(
|
||||||
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
|
video, lambda x: x['brand-info']['publisher'], compat_str)
|
||||||
category = try_get(
|
category = try_get(
|
||||||
mediainfo, lambda x: x['brand-info']['category'], compat_str)
|
video, lambda x: x['brand-info']['category'], compat_str)
|
||||||
categories = [category] if category else None
|
categories = [category] if category else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': mediainfo.get('short-description'),
|
'description': video.get('short-description'),
|
||||||
'thumbnail': mediainfo.get('thumbnail'),
|
'thumbnail': video.get('thumbnail'),
|
||||||
'duration': parse_duration(mediainfo.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'creator': creator,
|
'creator': creator,
|
||||||
'upload_date': unified_strdate(mediainfo.get('production-date')),
|
'upload_date': unified_strdate(video.get('production-date')),
|
||||||
'webpage_url': mediainfo.get('url'),
|
'webpage_url': video.get('url'),
|
||||||
'series': mediainfo.get('brand-value'),
|
'series': video.get('brand-value'),
|
||||||
|
'season': video.get('season'),
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_codecs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MinotoIE(InfoExtractor):
|
class MinotoIE(InfoExtractor):
|
||||||
@ -26,7 +29,7 @@ class MinotoIE(InfoExtractor):
|
|||||||
formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
else:
|
else:
|
||||||
fmt_profile = fmt.get('profile') or {}
|
fmt_profile = fmt.get('profile') or {}
|
||||||
f = {
|
formats.append({
|
||||||
'format_id': fmt_profile.get('name-short'),
|
'format_id': fmt_profile.get('name-short'),
|
||||||
'format_note': fmt_profile.get('name'),
|
'format_note': fmt_profile.get('name'),
|
||||||
'url': fmt_url,
|
'url': fmt_url,
|
||||||
@ -35,16 +38,8 @@ class MinotoIE(InfoExtractor):
|
|||||||
'filesize': int_or_none(fmt.get('filesize')),
|
'filesize': int_or_none(fmt.get('filesize')),
|
||||||
'width': int_or_none(fmt.get('width')),
|
'width': int_or_none(fmt.get('width')),
|
||||||
'height': int_or_none(fmt.get('height')),
|
'height': int_or_none(fmt.get('height')),
|
||||||
}
|
'codecs': parse_codecs(fmt.get('codecs')),
|
||||||
codecs = fmt.get('codecs')
|
})
|
||||||
if codecs:
|
|
||||||
codecs = codecs.split(',')
|
|
||||||
if len(codecs) == 2:
|
|
||||||
f.update({
|
|
||||||
'vcodec': codecs[0],
|
|
||||||
'acodec': codecs[1],
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -179,6 +179,10 @@ class MixcloudIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'http',
|
'format_id': 'http',
|
||||||
'url': decrypted,
|
'url': decrypted,
|
||||||
|
'downloader_options': {
|
||||||
|
# Mixcloud starts throttling at >~5M
|
||||||
|
'http_chunk_size': 5242880,
|
||||||
|
},
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -1,96 +1,90 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .nhl import NHLBaseIE
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
parse_duration,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MLBIE(InfoExtractor):
|
class MLBIE(NHLBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:[\da-z_-]+\.)*mlb\.com/
|
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
|
(?:[^/]+/)*c-|
|
||||||
(?:
|
(?:
|
||||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||||
)\?.*?\bcontent_id=
|
)\?.*?\bcontent_id=
|
||||||
)
|
)
|
||||||
(?P<id>n?\d+)|
|
(?P<id>\d+)
|
||||||
(?:[^/]+/)*(?P<path>[^/]+)
|
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
|
_CONTENT_DOMAIN = 'content.mlb.com'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
||||||
'md5': 'ff56a598c2cf411a9a38a69709e97079',
|
'md5': '632358dacfceec06bad823b83d21df2d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34698933',
|
'id': '34698933',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Ackley's spectacular catch",
|
'title': "Ackley's spectacular catch",
|
||||||
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||||
'duration': 66,
|
'duration': 66,
|
||||||
'timestamp': 1405980600,
|
'timestamp': 1405995000,
|
||||||
'upload_date': '20140721',
|
'upload_date': '20140722',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
|
'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
|
||||||
'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
|
'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34496663',
|
'id': '34496663',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Stanton prepares for Derby',
|
'title': 'Stanton prepares for Derby',
|
||||||
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
|
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
|
||||||
'duration': 46,
|
'duration': 46,
|
||||||
'timestamp': 1405105800,
|
'timestamp': 1405120200,
|
||||||
'upload_date': '20140711',
|
'upload_date': '20140711',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
|
'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
|
||||||
'md5': '0e6e73d509321e142409b695eadd541f',
|
'md5': '99bb9176531adc600b90880fb8be9328',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34578115',
|
'id': '34578115',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Cespedes repeats as Derby champ',
|
'title': 'Cespedes repeats as Derby champ',
|
||||||
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
|
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
|
||||||
'duration': 488,
|
'duration': 488,
|
||||||
'timestamp': 1405399936,
|
'timestamp': 1405414336,
|
||||||
'upload_date': '20140715',
|
'upload_date': '20140715',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
|
'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
|
||||||
'md5': 'b8fd237347b844365d74ea61d4245967',
|
'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34577915',
|
'id': '34577915',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bautista on Home Run Derby',
|
'title': 'Bautista on Home Run Derby',
|
||||||
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
|
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
|
||||||
'duration': 52,
|
'duration': 52,
|
||||||
'timestamp': 1405390722,
|
'timestamp': 1405405122,
|
||||||
'upload_date': '20140715',
|
'upload_date': '20140715',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
|
||||||
'md5': 'aafaf5b0186fee8f32f20508092f8111',
|
'md5': 'e09e37b552351fddbf4d9e699c924d68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '75609783',
|
'id': '75609783',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Must C: Pillar climbs for catch',
|
'title': 'Must C: Pillar climbs for catch',
|
||||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||||
'timestamp': 1429124820,
|
'timestamp': 1429139220,
|
||||||
'upload_date': '20150415',
|
'upload_date': '20150415',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -111,7 +105,7 @@ class MLBIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -120,58 +114,7 @@ class MLBIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
|
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
if not video_id:
|
|
||||||
video_path = mobj.group('path')
|
|
||||||
webpage = self._download_webpage(url, video_path)
|
|
||||||
video_id = self._search_regex(
|
|
||||||
[r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
|
|
||||||
|
|
||||||
detail = self._download_xml(
|
|
||||||
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
|
|
||||||
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
|
|
||||||
|
|
||||||
title = detail.find('./headline').text
|
|
||||||
description = detail.find('./big-blurb').text
|
|
||||||
duration = parse_duration(detail.find('./duration').text)
|
|
||||||
timestamp = parse_iso8601(detail.attrib['date'][:-5])
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': thumbnail.text,
|
|
||||||
} for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for media_url in detail.findall('./url'):
|
|
||||||
playback_scenario = media_url.attrib['playback_scenario']
|
|
||||||
fmt = {
|
|
||||||
'url': media_url.text,
|
|
||||||
'format_id': playback_scenario,
|
|
||||||
}
|
|
||||||
m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
|
|
||||||
if m:
|
|
||||||
fmt.update({
|
|
||||||
'vbr': int(m.group('vbr')) * 1000,
|
|
||||||
'width': int(m.group('width')),
|
|
||||||
'height': int(m.group('height')),
|
|
||||||
})
|
|
||||||
formats.append(fmt)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
}
|
|
||||||
|
@ -1,116 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import os.path
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
remove_start,
|
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MonikerIE(InfoExtractor):
|
|
||||||
IE_DESC = 'allmyvideos.net and vidspot.net'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
|
||||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'jih3nce3x6wn',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'youtube-dl test video',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
|
|
||||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'jih3nce3x6wn',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'youtube-dl test video',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
|
||||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'l2ngsmhs8ci5',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'youtube-dl test video',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://vidspot.net/2/v-ywDf99',
|
|
||||||
'md5': '5f8254ce12df30479428b0152fb8e7ba',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ywDf99',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
|
|
||||||
'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://allmyvideos.net/v/v-HXZm5t',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
orig_video_id = self._match_id(url)
|
|
||||||
video_id = remove_start(orig_video_id, 'embed-')
|
|
||||||
url = url.replace(orig_video_id, video_id)
|
|
||||||
assert re.match(self._VALID_URL, url) is not None
|
|
||||||
orig_webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
if '>File Not Found<' in orig_webpage:
|
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
|
||||||
|
|
||||||
error = self._search_regex(
|
|
||||||
r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
|
|
||||||
if error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
|
|
||||||
builtin_url = self._search_regex(
|
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
|
|
||||||
orig_webpage, 'builtin URL', default=None, group='url')
|
|
||||||
|
|
||||||
if builtin_url:
|
|
||||||
req = sanitized_Request(builtin_url)
|
|
||||||
req.add_header('Referer', url)
|
|
||||||
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
|
|
||||||
title = self._og_search_title(orig_webpage).strip()
|
|
||||||
description = self._og_search_description(orig_webpage).strip()
|
|
||||||
else:
|
|
||||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
|
||||||
data = dict(fields)
|
|
||||||
|
|
||||||
post = urlencode_postdata(data)
|
|
||||||
headers = {
|
|
||||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
|
||||||
}
|
|
||||||
req = sanitized_Request(url, post, headers)
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
req, video_id, note='Downloading video page ...')
|
|
||||||
|
|
||||||
title = os.path.splitext(data['fname'])[0]
|
|
||||||
description = None
|
|
||||||
|
|
||||||
# Could be several links with different quality
|
|
||||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
|
||||||
# Assume the links are ordered in quality
|
|
||||||
formats = [{
|
|
||||||
'url': l,
|
|
||||||
'quality': i,
|
|
||||||
} for i, l in enumerate(links)]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
@ -77,8 +77,11 @@ class MotherlessIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||||
video_url = self._html_search_regex(
|
video_url = (self._html_search_regex(
|
||||||
r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
|
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'video URL', default=None, group='url') or
|
||||||
|
'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
view_count = str_to_int(self._html_search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
r'<strong>Views</strong>\s+([^<]+)<',
|
r'<strong>Views</strong>\s+([^<]+)<',
|
||||||
@ -120,7 +123,7 @@ class MotherlessIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class MotherlessGroupIE(InfoExtractor):
|
class MotherlessGroupIE(InfoExtractor):
|
||||||
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://motherless.com/g/movie_scenes',
|
'url': 'http://motherless.com/g/movie_scenes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -6,17 +6,17 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class MakersChannelIE(InfoExtractor):
|
class MyChannelsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?mychannels\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849',
|
'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416',
|
||||||
'md5': '624a512c6969236b5967bf9286345ad1',
|
'md5': 'b8993daad4262dd68d89d651c0c52c45',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '849',
|
'id': 'wUUDZZep6vQD',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Landing a bus on a plane is an epic win',
|
'title': 'Miss Holland joins VOTE LEAVE',
|
||||||
'uploader': 'ZoomIn',
|
'description': 'Miss Holland | #13 Not a potato',
|
||||||
'description': 'md5:cd9cca2ea7b69b78be81d07020c97139',
|
'uploader': 'Miss Holland',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -27,12 +27,12 @@ class MakersChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
def extract_data_val(attr, fatal=False):
|
def extract_data_val(attr, fatal=False):
|
||||||
return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
|
return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
|
||||||
minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
|
minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'minoto:%s' % minoto_id,
|
'url': 'minoto:%s' % minoto_id,
|
||||||
'id': extract_data_val('video-id', True),
|
'id': url_id,
|
||||||
'title': extract_data_val('title', True),
|
'title': extract_data_val('title', True),
|
||||||
'description': extract_data_val('description'),
|
'description': extract_data_val('description'),
|
||||||
'thumbnail': extract_data_val('image'),
|
'thumbnail': extract_data_val('image'),
|
@ -1,7 +1,8 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import base64
|
import base64
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
@ -9,6 +10,7 @@ from .adobepass import AdobePassIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -78,10 +80,14 @@ class NBCIE(AdobePassIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
permalink = 'http' + permalink
|
permalink = 'http' + permalink
|
||||||
video_data = self._download_json(
|
response = self._download_json(
|
||||||
'https://api.nbc.com/v3/videos', video_id, query={
|
'https://api.nbc.com/v3/videos', video_id, query={
|
||||||
'filter[permalink]': permalink,
|
'filter[permalink]': permalink,
|
||||||
})['data'][0]['attributes']
|
'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating',
|
||||||
|
'fields[shows]': 'shortTitle',
|
||||||
|
'include': 'show.shortTitle',
|
||||||
|
})
|
||||||
|
video_data = response['data'][0]['attributes']
|
||||||
query = {
|
query = {
|
||||||
'mbr': 'true',
|
'mbr': 'true',
|
||||||
'manifest': 'm3u',
|
'manifest': 'm3u',
|
||||||
@ -103,10 +109,11 @@ class NBCIE(AdobePassIE):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'url': theplatform_url,
|
'url': theplatform_url,
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'keywords': video_data.get('keywords'),
|
'tags': video_data.get('keywords'),
|
||||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||||
'series': video_data.get('showName'),
|
'episode': title,
|
||||||
|
'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']),
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,6 +176,65 @@ class NBCSportsIE(InfoExtractor):
|
|||||||
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
|
||||||
|
class NBCSportsStreamIE(AdobePassIE):
|
||||||
|
_VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '206559',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Amgen Tour of California Women\'s Recap',
|
||||||
|
'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires Adobe Pass Authentication',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
live_source = self._download_json(
|
||||||
|
'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
|
||||||
|
video_id)
|
||||||
|
video_source = live_source['videoSources'][0]
|
||||||
|
title = video_source['title']
|
||||||
|
source_url = None
|
||||||
|
for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
|
||||||
|
sk = k + 'Url'
|
||||||
|
source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
|
||||||
|
if source_url:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
source_url = video_source['ottStreamUrl']
|
||||||
|
is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
|
||||||
|
resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
|
||||||
|
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
|
||||||
|
tokenized_url = self._download_json(
|
||||||
|
'https://token.playmakerservices.com/cdn',
|
||||||
|
video_id, data=json.dumps({
|
||||||
|
'requestorId': 'nbcsports',
|
||||||
|
'pid': video_id,
|
||||||
|
'application': 'NBCSports',
|
||||||
|
'version': 'v1',
|
||||||
|
'platform': 'desktop',
|
||||||
|
'cdn': 'akamai',
|
||||||
|
'url': video_source['sourceUrl'],
|
||||||
|
'token': base64.b64encode(token.encode()).decode(),
|
||||||
|
'resourceId': base64.b64encode(resource.encode()).decode(),
|
||||||
|
}).encode())['tokenizedUrl']
|
||||||
|
formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'description': live_source.get('description'),
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class CSNNEIE(InfoExtractor):
|
class CSNNEIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
|
@ -29,14 +29,13 @@ class NexxIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# movie
|
# movie
|
||||||
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
||||||
'md5': '828cea195be04e66057b846288295ba1',
|
'md5': '31899fd683de49ad46f4ee67e53e83fe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '128907',
|
'id': '128907',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Stiftung Warentest',
|
'title': 'Stiftung Warentest',
|
||||||
'alt_title': 'Wie ein Test abläuft',
|
'alt_title': 'Wie ein Test abläuft',
|
||||||
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
|
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
|
||||||
'release_year': 2013,
|
|
||||||
'creator': 'SPIEGEL TV',
|
'creator': 'SPIEGEL TV',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 2509,
|
'duration': 2509,
|
||||||
@ -62,6 +61,7 @@ class NexxIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
# does not work via arc
|
# does not work via arc
|
||||||
'url': 'nexx:741:1269984',
|
'url': 'nexx:741:1269984',
|
||||||
@ -71,12 +71,26 @@ class NexxIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||||
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||||
'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 607,
|
'duration': 607,
|
||||||
'timestamp': 1518614955,
|
'timestamp': 1518614955,
|
||||||
'upload_date': '20180214',
|
'upload_date': '20180214',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
|
||||||
|
'url': 'nexx:747:1533779',
|
||||||
|
'md5': '6bf6883912b82b7069fb86c2297e9893',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1533779',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Aufregung um ausgebrochene Raubtiere',
|
||||||
|
'alt_title': 'Eifel-Zoo',
|
||||||
|
'description': 'md5:f21375c91c74ad741dcb164c427999d2',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 111,
|
||||||
|
'timestamp': 1527874460,
|
||||||
|
'upload_date': '20180601',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -141,6 +155,139 @@ class NexxIE(InfoExtractor):
|
|||||||
self._handle_error(result)
|
self._handle_error(result)
|
||||||
return result['result']
|
return result['result']
|
||||||
|
|
||||||
|
def _extract_free_formats(self, video, video_id):
|
||||||
|
stream_data = video['streamdata']
|
||||||
|
cdn = stream_data['cdnType']
|
||||||
|
assert cdn == 'free'
|
||||||
|
|
||||||
|
hash = video['general']['hash']
|
||||||
|
|
||||||
|
ps = compat_str(stream_data['originalDomain'])
|
||||||
|
if stream_data['applyFolderHierarchy'] == 1:
|
||||||
|
s = ('%04d' % int(video_id))[::-1]
|
||||||
|
ps += '/%s/%s' % (s[0:2], s[2:4])
|
||||||
|
ps += '/%s/%s_' % (video_id, hash)
|
||||||
|
|
||||||
|
t = 'http://%s' + ps
|
||||||
|
fd = stream_data['azureFileDistribution'].split(',')
|
||||||
|
cdn_provider = stream_data['cdnProvider']
|
||||||
|
|
||||||
|
def p0(p):
|
||||||
|
return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if cdn_provider == 'ak':
|
||||||
|
t += ','
|
||||||
|
for i in fd:
|
||||||
|
p = i.split(':')
|
||||||
|
t += p[1] + p0(int(p[0])) + ','
|
||||||
|
t += '.mp4.csmil/master.%s'
|
||||||
|
elif cdn_provider == 'ce':
|
||||||
|
k = t.split('/')
|
||||||
|
h = k.pop()
|
||||||
|
http_base = t = '/'.join(k)
|
||||||
|
http_base = http_base % stream_data['cdnPathHTTP']
|
||||||
|
t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
|
||||||
|
for i in fd:
|
||||||
|
p = i.split(':')
|
||||||
|
tbr = int(p[0])
|
||||||
|
filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
|
||||||
|
f = {
|
||||||
|
'url': http_base + '/' + filename,
|
||||||
|
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||||
|
'tbr': tbr,
|
||||||
|
}
|
||||||
|
width_height = p[1].split('x')
|
||||||
|
if len(width_height) == 2:
|
||||||
|
f.update({
|
||||||
|
'width': int_or_none(width_height[0]),
|
||||||
|
'height': int_or_none(width_height[1]),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
a = filename + ':%s' % (tbr * 1000)
|
||||||
|
t += a + ','
|
||||||
|
t = t[:-1] + '&audiostream=' + a.split(':')[0]
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
if cdn_provider == 'ce':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
|
||||||
|
mpd_id='%s-dash' % cdn, fatal=False))
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_azure_formats(self, video, video_id):
|
||||||
|
stream_data = video['streamdata']
|
||||||
|
cdn = stream_data['cdnType']
|
||||||
|
assert cdn == 'azure'
|
||||||
|
|
||||||
|
azure_locator = stream_data['azureLocator']
|
||||||
|
|
||||||
|
def get_cdn_shield_base(shield_type='', static=False):
|
||||||
|
for secure in ('', 's'):
|
||||||
|
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
|
||||||
|
if cdn_shield:
|
||||||
|
return 'http%s://%s' % (secure, cdn_shield)
|
||||||
|
else:
|
||||||
|
if 'fb' in stream_data['azureAccount']:
|
||||||
|
prefix = 'df' if static else 'f'
|
||||||
|
else:
|
||||||
|
prefix = 'd' if static else 'p'
|
||||||
|
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
|
||||||
|
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
|
||||||
|
|
||||||
|
language = video['general'].get('language_raw') or ''
|
||||||
|
|
||||||
|
azure_stream_base = get_cdn_shield_base()
|
||||||
|
is_ml = ',' in language
|
||||||
|
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
|
||||||
|
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
|
||||||
|
|
||||||
|
protection_token = try_get(
|
||||||
|
video, lambda x: x['protectiondata']['token'], compat_str)
|
||||||
|
if protection_token:
|
||||||
|
azure_manifest_url += '?hdnts=%s' % protection_token
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
azure_manifest_url % '(format=m3u8-aapl)',
|
||||||
|
video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='%s-hls' % cdn, fatal=False)
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
azure_manifest_url % '(format=mpd-time-csf)',
|
||||||
|
video_id, mpd_id='%s-dash' % cdn, fatal=False))
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
|
||||||
|
|
||||||
|
azure_progressive_base = get_cdn_shield_base('Prog', True)
|
||||||
|
azure_file_distribution = stream_data.get('azureFileDistribution')
|
||||||
|
if azure_file_distribution:
|
||||||
|
fds = azure_file_distribution.split(',')
|
||||||
|
if fds:
|
||||||
|
for fd in fds:
|
||||||
|
ss = fd.split(':')
|
||||||
|
if len(ss) == 2:
|
||||||
|
tbr = int_or_none(ss[0])
|
||||||
|
if tbr:
|
||||||
|
f = {
|
||||||
|
'url': '%s%s/%s_src_%s_%d.mp4' % (
|
||||||
|
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
|
||||||
|
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||||
|
'tbr': tbr,
|
||||||
|
}
|
||||||
|
width_height = ss[1].split('x')
|
||||||
|
if len(width_height) == 2:
|
||||||
|
f.update({
|
||||||
|
'width': int_or_none(width_height[0]),
|
||||||
|
'height': int_or_none(width_height[1]),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||||
@ -220,72 +367,15 @@ class NexxIE(InfoExtractor):
|
|||||||
general = video['general']
|
general = video['general']
|
||||||
title = general['title']
|
title = general['title']
|
||||||
|
|
||||||
stream_data = video['streamdata']
|
cdn = video['streamdata']['cdnType']
|
||||||
language = general.get('language_raw') or ''
|
|
||||||
|
|
||||||
# TODO: reverse more cdns
|
if cdn == 'azure':
|
||||||
|
formats = self._extract_azure_formats(video, video_id)
|
||||||
cdn = stream_data['cdnType']
|
elif cdn == 'free':
|
||||||
assert cdn == 'azure'
|
formats = self._extract_free_formats(video, video_id)
|
||||||
|
else:
|
||||||
azure_locator = stream_data['azureLocator']
|
# TODO: reverse more cdns
|
||||||
|
assert False
|
||||||
def get_cdn_shield_base(shield_type='', static=False):
|
|
||||||
for secure in ('', 's'):
|
|
||||||
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
|
|
||||||
if cdn_shield:
|
|
||||||
return 'http%s://%s' % (secure, cdn_shield)
|
|
||||||
else:
|
|
||||||
if 'fb' in stream_data['azureAccount']:
|
|
||||||
prefix = 'df' if static else 'f'
|
|
||||||
else:
|
|
||||||
prefix = 'd' if static else 'p'
|
|
||||||
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
|
|
||||||
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
|
|
||||||
|
|
||||||
azure_stream_base = get_cdn_shield_base()
|
|
||||||
is_ml = ',' in language
|
|
||||||
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
|
|
||||||
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
|
|
||||||
|
|
||||||
protection_token = try_get(
|
|
||||||
video, lambda x: x['protectiondata']['token'], compat_str)
|
|
||||||
if protection_token:
|
|
||||||
azure_manifest_url += '?hdnts=%s' % protection_token
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
azure_manifest_url % '(format=m3u8-aapl)',
|
|
||||||
video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='%s-hls' % cdn, fatal=False)
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
azure_manifest_url % '(format=mpd-time-csf)',
|
|
||||||
video_id, mpd_id='%s-dash' % cdn, fatal=False))
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
|
|
||||||
|
|
||||||
azure_progressive_base = get_cdn_shield_base('Prog', True)
|
|
||||||
azure_file_distribution = stream_data.get('azureFileDistribution')
|
|
||||||
if azure_file_distribution:
|
|
||||||
fds = azure_file_distribution.split(',')
|
|
||||||
if fds:
|
|
||||||
for fd in fds:
|
|
||||||
ss = fd.split(':')
|
|
||||||
if len(ss) == 2:
|
|
||||||
tbr = int_or_none(ss[0])
|
|
||||||
if tbr:
|
|
||||||
f = {
|
|
||||||
'url': '%s%s/%s_src_%s_%d.mp4' % (
|
|
||||||
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
|
|
||||||
'format_id': '%s-http-%d' % (cdn, tbr),
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
width_height = ss[1].split('x')
|
|
||||||
if len(width_height) == 2:
|
|
||||||
f.update({
|
|
||||||
'width': int_or_none(width_height[0]),
|
|
||||||
'height': int_or_none(width_height[1]),
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -1,18 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urlparse,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -20,236 +12,77 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NHLBaseInfoExtractor(InfoExtractor):
|
class NHLBaseIE(InfoExtractor):
|
||||||
@staticmethod
|
def _real_extract(self, url):
|
||||||
def _fix_json(json_string):
|
site, tmp_id = re.match(self._VALID_URL, url).groups()
|
||||||
return json_string.replace('\\\'', '\'')
|
video_data = self._download_json(
|
||||||
|
'https://%s/%s/%sid/v1/%s/details/web-v1.json'
|
||||||
|
% (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
|
||||||
|
if video_data.get('type') != 'video':
|
||||||
|
video_data = video_data['media']
|
||||||
|
video = video_data.get('video')
|
||||||
|
if video:
|
||||||
|
video_data = video
|
||||||
|
else:
|
||||||
|
videos = video_data.get('videos')
|
||||||
|
if videos:
|
||||||
|
video_data = videos[0]
|
||||||
|
|
||||||
def _real_extract_video(self, video_id):
|
video_id = compat_str(video_data['id'])
|
||||||
vid_parts = video_id.split(',')
|
title = video_data['title']
|
||||||
if len(vid_parts) == 3:
|
|
||||||
video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
|
|
||||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
|
||||||
data = self._download_json(
|
|
||||||
json_url, video_id, transform_source=self._fix_json)
|
|
||||||
return self._extract_video(data[0])
|
|
||||||
|
|
||||||
def _extract_video(self, info):
|
formats = []
|
||||||
video_id = info['id']
|
for playback in video_data.get('playbacks', []):
|
||||||
self.report_extraction(video_id)
|
playback_url = playback.get('url')
|
||||||
|
if not playback_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(playback_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
playback_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=playback.get('name', 'hls'), fatal=False)
|
||||||
|
self._check_formats(m3u8_formats, video_id)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
else:
|
||||||
|
height = int_or_none(playback.get('height'))
|
||||||
|
formats.append({
|
||||||
|
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
|
||||||
|
'url': playback_url,
|
||||||
|
'width': int_or_none(playback.get('width')),
|
||||||
|
'height': height,
|
||||||
|
'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
initial_video_url = info['publishPoint']
|
thumbnails = []
|
||||||
if info['formats'] == '1':
|
cuts = video_data.get('image', {}).get('cuts') or []
|
||||||
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
|
if isinstance(cuts, dict):
|
||||||
filename, ext = os.path.splitext(parsed_url.path)
|
cuts = cuts.values()
|
||||||
path = '%s_sd%s' % (filename, ext)
|
for thumbnail_data in cuts:
|
||||||
data = compat_urllib_parse_urlencode({
|
thumbnail_url = thumbnail_data.get('src')
|
||||||
'type': 'fvod',
|
if not thumbnail_url:
|
||||||
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail_data.get('width')),
|
||||||
|
'height': int_or_none(thumbnail_data.get('height')),
|
||||||
})
|
})
|
||||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
|
||||||
path_doc = self._download_xml(
|
|
||||||
path_url, video_id, 'Downloading final video url')
|
|
||||||
video_url = path_doc.find('path').text
|
|
||||||
else:
|
|
||||||
video_url = initial_video_url
|
|
||||||
|
|
||||||
join = compat_urlparse.urljoin
|
|
||||||
ret = {
|
|
||||||
'id': video_id,
|
|
||||||
'title': info['name'],
|
|
||||||
'url': video_url,
|
|
||||||
'description': info['description'],
|
|
||||||
'duration': int(info['duration']),
|
|
||||||
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
|
|
||||||
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
|
|
||||||
}
|
|
||||||
if video_url.startswith('rtmp:'):
|
|
||||||
mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
|
|
||||||
ret.update({
|
|
||||||
'tc_url': mobj.group('tc_url'),
|
|
||||||
'play_path': mobj.group('play_path'),
|
|
||||||
'app': mobj.group('app'),
|
|
||||||
'no_resume': True,
|
|
||||||
})
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
|
||||||
IE_NAME = 'nhl.com:videocenter'
|
|
||||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
|
||||||
'md5': 'db704a4ea09e8d3988c85e36cc892d09',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '453614',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Quick clip: Weise 4-3 goal vs Flames',
|
|
||||||
'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
|
|
||||||
'duration': 18,
|
|
||||||
'upload_date': '20131006',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
|
|
||||||
'md5': 'd22e82bc592f52d37d24b03531ee9696',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2014020024-628-h',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
|
|
||||||
'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
|
|
||||||
'duration': 0,
|
|
||||||
'upload_date': '20141011',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
|
|
||||||
'md5': 'c78fc64ea01777e426cfc202b746c825',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '58665',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Classic Game In Six - April 22, 1979',
|
|
||||||
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
|
|
||||||
'duration': 400,
|
|
||||||
'upload_date': '20100129'
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/?id=736722',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
|
|
||||||
'md5': '076fcb88c255154aacbf0a7accc3f340',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2014020299-X-h',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Penguins at Islanders / Game Highlights',
|
|
||||||
'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
|
|
||||||
'duration': 268,
|
|
||||||
'upload_date': '20141122',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '691469',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'RAW | Craig MacTavish Full Press Conference',
|
|
||||||
'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
|
|
||||||
'upload_date': '20141205',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # Requires rtmpdump
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
return self._real_extract_video(video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class NHLNewsIE(NHLBaseInfoExtractor):
|
|
||||||
IE_NAME = 'nhl.com:news'
|
|
||||||
IE_DESC = 'NHL news'
|
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
|
|
||||||
'md5': '4b3d1262e177687a3009937bd9ec0be8',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '736722',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Cal Clutterbuck has been fined $2,000',
|
|
||||||
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
|
|
||||||
'duration': 37,
|
|
||||||
'upload_date': '20150128',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# iframe embed
|
|
||||||
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
|
|
||||||
'md5': '9f663d1c006c90ac9fb82777d4294e12',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '836127',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
|
|
||||||
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
|
|
||||||
'duration': 93,
|
|
||||||
'upload_date': '20150923',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
news_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, news_id)
|
|
||||||
video_id = self._search_regex(
|
|
||||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
|
|
||||||
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
|
|
||||||
webpage, 'video id')
|
|
||||||
return self._real_extract_video(video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
|
|
||||||
IE_NAME = 'nhl.com:videocenter:category'
|
|
||||||
IE_DESC = 'NHL videocenter category'
|
|
||||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '999',
|
|
||||||
'title': 'Highlights',
|
|
||||||
},
|
|
||||||
'playlist_count': 12,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
team = mobj.group('team')
|
|
||||||
webpage = self._download_webpage(url, team)
|
|
||||||
cat_id = self._search_regex(
|
|
||||||
[r'var defaultCatId = "(.+?)";',
|
|
||||||
r'{statusIndex:0,index:0,.*?id:(.*?),'],
|
|
||||||
webpage, 'category id')
|
|
||||||
playlist_title = self._html_search_regex(
|
|
||||||
r'tab0"[^>]*?>(.*?)</td>',
|
|
||||||
webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
|
|
||||||
|
|
||||||
data = compat_urllib_parse_urlencode({
|
|
||||||
'cid': cat_id,
|
|
||||||
# This is the default value
|
|
||||||
'count': 12,
|
|
||||||
'ptrs': 3,
|
|
||||||
'format': 'json',
|
|
||||||
})
|
|
||||||
path = '/videocenter/servlets/browse?' + data
|
|
||||||
request_url = compat_urlparse.urljoin(url, path)
|
|
||||||
response = self._download_webpage(request_url, playlist_title)
|
|
||||||
response = self._fix_json(response)
|
|
||||||
if not response.strip():
|
|
||||||
self._downloader.report_warning('Got an empty response, trying '
|
|
||||||
'adding the "newvideos" parameter')
|
|
||||||
response = self._download_webpage(request_url + '&newvideos=true',
|
|
||||||
playlist_title)
|
|
||||||
response = self._fix_json(response)
|
|
||||||
videos = json.loads(response)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'id': video_id,
|
||||||
'title': playlist_title,
|
'title': title,
|
||||||
'id': cat_id,
|
'description': video_data.get('description'),
|
||||||
'entries': [self._extract_video(v) for v in videos],
|
'timestamp': parse_iso8601(video_data.get('date')),
|
||||||
|
'duration': parse_duration(video_data.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NHLIE(InfoExtractor):
|
class NHLIE(NHLBaseIE):
|
||||||
IE_NAME = 'nhl.com'
|
IE_NAME = 'nhl.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
|
||||||
_SITES_MAP = {
|
_CONTENT_DOMAIN = 'nhl.bamcontent.com'
|
||||||
'nhl': 'nhl',
|
|
||||||
'wch2016': 'wch',
|
|
||||||
}
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# type=video
|
# type=video
|
||||||
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
|
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
|
||||||
@ -293,59 +126,3 @@ class NHLIE(InfoExtractor):
|
|||||||
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
|
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
tmp_id, site = mobj.group('id'), mobj.group('site')
|
|
||||||
video_data = self._download_json(
|
|
||||||
'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
|
|
||||||
% (self._SITES_MAP[site], tmp_id), tmp_id)
|
|
||||||
if video_data.get('type') == 'article':
|
|
||||||
video_data = video_data['media']
|
|
||||||
|
|
||||||
video_id = compat_str(video_data['id'])
|
|
||||||
title = video_data['title']
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for playback in video_data.get('playbacks', []):
|
|
||||||
playback_url = playback.get('url')
|
|
||||||
if not playback_url:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(playback_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
playback_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id=playback.get('name', 'hls'), fatal=False)
|
|
||||||
self._check_formats(m3u8_formats, video_id)
|
|
||||||
formats.extend(m3u8_formats)
|
|
||||||
else:
|
|
||||||
height = int_or_none(playback.get('height'))
|
|
||||||
formats.append({
|
|
||||||
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
|
|
||||||
'url': playback_url,
|
|
||||||
'width': int_or_none(playback.get('width')),
|
|
||||||
'height': height,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
|
|
||||||
thumbnail_url = thumbnail_data.get('src')
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'id': thumbnail_id,
|
|
||||||
'url': thumbnail_url,
|
|
||||||
'width': int_or_none(thumbnail_data.get('width')),
|
|
||||||
'height': int_or_none(thumbnail_data.get('height')),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'timestamp': parse_iso8601(video_data.get('date')),
|
|
||||||
'duration': parse_duration(video_data.get('duration')),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
@ -85,7 +85,7 @@ class NickBrIE(MTVServicesInfoExtractor):
|
|||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
|
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
|
||||||
(?:www\.)?nickjr\.nl
|
(?:www\.)?nickjr\.[a-z]{2}
|
||||||
)
|
)
|
||||||
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
|
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
|
||||||
'''
|
'''
|
||||||
@ -98,6 +98,9 @@ class NickBrIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
|
'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -163,7 +163,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
# No authentication to be performed
|
# No authentication to be performed
|
||||||
if not username:
|
if not username:
|
||||||
return True
|
return True
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@ -13,38 +12,11 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NineCNineMediaBaseIE(InfoExtractor):
|
class NineCNineMediaIE(InfoExtractor):
|
||||||
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
|
|
||||||
|
|
||||||
|
|
||||||
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
|
|
||||||
IE_NAME = '9c9media:stack'
|
|
||||||
_GEO_COUNTRIES = ['CA']
|
|
||||||
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
|
|
||||||
stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
stack_base_url + 'm3u8', stack_id, 'mp4',
|
|
||||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
stack_base_url + 'f4m', stack_id,
|
|
||||||
f4m_id='hds', fatal=False))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': stack_id,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class NineCNineMediaIE(NineCNineMediaBaseIE):
|
|
||||||
IE_NAME = '9c9media'
|
IE_NAME = '9c9media'
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
||||||
|
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
||||||
@ -58,13 +30,26 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
|
|||||||
content_package = content['ContentPackages'][0]
|
content_package = content['ContentPackages'][0]
|
||||||
package_id = content_package['Id']
|
package_id = content_package['Id']
|
||||||
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
||||||
content_package = self._download_json(content_package_url, content_id)
|
content_package = self._download_json(
|
||||||
|
content_package_url, content_id, query={
|
||||||
|
'$include': '[HasClosedCaptions]',
|
||||||
|
})
|
||||||
|
|
||||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
|
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
|
manifest_base_url = content_package_url + 'manifest.'
|
||||||
multistacks = len(stacks) > 1
|
formats = []
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
manifest_base_url + 'm3u8', content_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
manifest_base_url + 'f4m', content_id,
|
||||||
|
f4m_id='hds', fatal=False))
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
manifest_base_url + 'mpd', content_id,
|
||||||
|
mpd_id='dash', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for image in content.get('Images', []):
|
for image in content.get('Images', []):
|
||||||
@ -85,10 +70,12 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
|
|||||||
continue
|
continue
|
||||||
container.append(e_name)
|
container.append(e_name)
|
||||||
|
|
||||||
description = content.get('Desc') or content.get('ShortDesc')
|
|
||||||
season = content.get('Season', {})
|
season = content.get('Season', {})
|
||||||
base_info = {
|
|
||||||
'description': description,
|
info = {
|
||||||
|
'id': content_id,
|
||||||
|
'title': title,
|
||||||
|
'description': content.get('Desc') or content.get('ShortDesc'),
|
||||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||||
'episode_number': int_or_none(content.get('Episode')),
|
'episode_number': int_or_none(content.get('Episode')),
|
||||||
'season': season.get('Name'),
|
'season': season.get('Name'),
|
||||||
@ -97,26 +84,19 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
|
|||||||
'series': content.get('Media', {}).get('Name'),
|
'series': content.get('Media', {}).get('Name'),
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
'duration': float_or_none(content_package.get('Duration')),
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
entries = []
|
if content_package.get('HasClosedCaptions'):
|
||||||
for stack in stacks:
|
info['subtitles'] = {
|
||||||
stack_id = compat_str(stack['Id'])
|
'en': [{
|
||||||
entry = {
|
'url': manifest_base_url + 'vtt',
|
||||||
'_type': 'url_transparent',
|
'ext': 'vtt',
|
||||||
'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
|
}, {
|
||||||
'id': stack_id,
|
'url': manifest_base_url + 'srt',
|
||||||
'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
|
'ext': 'srt',
|
||||||
'duration': float_or_none(stack.get('Duration')),
|
}]
|
||||||
'ie_key': 'NineCNineMediaStack',
|
|
||||||
}
|
}
|
||||||
entry.update(base_info)
|
|
||||||
entries.append(entry)
|
|
||||||
|
|
||||||
return {
|
return info
|
||||||
'_type': 'multi_video',
|
|
||||||
'id': content_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
||||||
|
@ -65,7 +65,7 @@ class NocoIE(InfoExtractor):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -36,8 +36,8 @@ class NPOIE(NPOBaseIE):
|
|||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
npo\.nl/(?:[^/]+/)*|
|
||||||
ntr\.nl/(?:[^/]+/){2,}|
|
(?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
|
||||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
||||||
)
|
)
|
||||||
@ -160,8 +160,20 @@ class NPOIE(NPOBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://npo.nl/KN_1698996',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False if any(ie.suitable(url)
|
||||||
|
for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
|
||||||
|
else super(NPOIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self._get_info(video_id)
|
return self._get_info(video_id)
|
||||||
@ -270,7 +282,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
video_url = stream_info.get('url')
|
video_url = stream_info.get('url')
|
||||||
if not video_url or video_url in urls:
|
if not video_url or video_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.add(item_url)
|
urls.add(video_url)
|
||||||
if determine_ext(video_url) == 'm3u8':
|
if determine_ext(video_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, ext='mp4',
|
video_url, video_id, ext='mp4',
|
||||||
@ -389,7 +401,7 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
|
|
||||||
class NPORadioIE(InfoExtractor):
|
class NPORadioIE(InfoExtractor):
|
||||||
IE_NAME = 'npo.nl:radio'
|
IE_NAME = 'npo.nl:radio'
|
||||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.npo.nl/radio/radio-1',
|
'url': 'http://www.npo.nl/radio/radio-1',
|
||||||
@ -404,6 +416,10 @@ class NPORadioIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _html_get_attribute_regex(attribute):
|
def _html_get_attribute_regex(attribute):
|
||||||
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
||||||
|
@ -16,12 +16,22 @@ from ..utils import (
|
|||||||
class NRKBaseIE(InfoExtractor):
|
class NRKBaseIE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['NO']
|
_GEO_COUNTRIES = ['NO']
|
||||||
|
|
||||||
|
_api_host = None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
data = self._download_json(
|
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||||
'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
|
|
||||||
video_id, 'Downloading mediaelement JSON')
|
for api_host in api_hosts:
|
||||||
|
data = self._download_json(
|
||||||
|
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||||
|
video_id, 'Downloading mediaelement JSON',
|
||||||
|
fatal=api_host == api_hosts[-1])
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
self._api_host = api_host
|
||||||
|
break
|
||||||
|
|
||||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||||
video_id = data.get('id') or video_id
|
video_id = data.get('id') or video_id
|
||||||
@ -191,7 +201,7 @@ class NRKIE(NRKBaseIE):
|
|||||||
)
|
)
|
||||||
(?P<id>[^?#&]+)
|
(?P<id>[^?#&]+)
|
||||||
'''
|
'''
|
||||||
_API_HOST = 'v8-psapi.nrk.no'
|
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video
|
# video
|
||||||
'url': 'http://www.nrk.no/video/PS*150533',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
@ -237,8 +247,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
(?:/\d{2}-\d{2}-\d{4})?
|
(?:/\d{2}-\d{2}-\d{4})?
|
||||||
(?:\#del=(?P<part_id>\d+))?
|
(?:\#del=(?P<part_id>\d+))?
|
||||||
''' % _EPISODE_RE
|
''' % _EPISODE_RE
|
||||||
_API_HOST = 'psapi-we.nrk.no'
|
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
||||||
|
@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
|
|||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||||
@ -301,6 +301,16 @@ class OpenloadIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
|
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.win/f/kUEfGclsU9o',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.download/f/kUEfGclsU9o',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Its title has not got its extension but url has it
|
||||||
|
'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
@ -362,8 +372,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
# Seems all videos have extensions in their titles
|
'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
|
||||||
'ext': determine_ext(title, 'mp4'),
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
|
@ -53,7 +53,7 @@ class PatreonIE(InfoExtractor):
|
|||||||
# needed. Keeping this commented for when this inevitably changes.
|
# needed. Keeping this commented for when this inevitably changes.
|
||||||
'''
|
'''
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -360,6 +361,50 @@ class PBSIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2365936247',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Antiques Roadshow - Indianapolis, Hour 2',
|
||||||
|
'description': 'md5:524b32249db55663e7231b6b8d1671a2',
|
||||||
|
'duration': 3180,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3007193718',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
|
||||||
|
'description': 'md5:37efbac85e0c09b009586523ec143652',
|
||||||
|
'duration': 6292,
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3011407934',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Stories from the Stage - Road Trip',
|
||||||
|
'duration': 1619,
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -422,6 +467,8 @@ class PBSIE(InfoExtractor):
|
|||||||
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
||||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||||
|
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||||
|
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||||
]
|
]
|
||||||
|
|
||||||
media_id = self._search_regex(
|
media_id = self._search_regex(
|
||||||
@ -456,7 +503,8 @@ class PBSIE(InfoExtractor):
|
|||||||
if not url:
|
if not url:
|
||||||
url = self._og_search_url(webpage)
|
url = self._og_search_url(webpage)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(
|
||||||
|
self._VALID_URL, self._proto_relative_url(url.strip()))
|
||||||
|
|
||||||
player_id = mobj.group('player_id')
|
player_id = mobj.group('player_id')
|
||||||
if not display_id:
|
if not display_id:
|
||||||
@ -466,13 +514,27 @@ class PBSIE(InfoExtractor):
|
|||||||
url, display_id, note='Downloading player page',
|
url, display_id, note='Downloading player page',
|
||||||
errnote='Could not download player page')
|
errnote='Could not download player page')
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
|
||||||
|
default=None)
|
||||||
|
if not video_id:
|
||||||
|
video_info = self._extract_video_data(
|
||||||
|
player_page, 'video data', display_id)
|
||||||
|
video_id = compat_str(
|
||||||
|
video_info.get('id') or video_info['contentID'])
|
||||||
else:
|
else:
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
display_id = video_id
|
display_id = video_id
|
||||||
|
|
||||||
return video_id, display_id, None, description
|
return video_id, display_id, None, description
|
||||||
|
|
||||||
|
def _extract_video_data(self, string, name, video_id, fatal=True):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||||
|
r'window\.videoBridge\s*=\s*({.+?});'],
|
||||||
|
string, name, default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=fatal)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
||||||
|
|
||||||
@ -503,20 +565,21 @@ class PBSIE(InfoExtractor):
|
|||||||
'http://player.pbs.org/%s/%s' % (page, video_id),
|
'http://player.pbs.org/%s/%s' % (page, video_id),
|
||||||
display_id, 'Downloading %s page' % page, fatal=False)
|
display_id, 'Downloading %s page' % page, fatal=False)
|
||||||
if player:
|
if player:
|
||||||
video_info = self._parse_json(
|
video_info = self._extract_video_data(
|
||||||
self._search_regex(
|
player, '%s video data' % page, display_id, fatal=False)
|
||||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
|
||||||
player, '%s video data' % page, default='{}'),
|
|
||||||
display_id, transform_source=js_to_json, fatal=False)
|
|
||||||
if video_info:
|
if video_info:
|
||||||
extract_redirect_urls(video_info)
|
extract_redirect_urls(video_info)
|
||||||
if not info:
|
if not info:
|
||||||
info = video_info
|
info = video_info
|
||||||
if not chapters:
|
if not chapters:
|
||||||
for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
|
raw_chapters = video_info.get('chapters') or []
|
||||||
chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
|
if not raw_chapters:
|
||||||
if not chapter:
|
for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
|
||||||
continue
|
chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
|
||||||
|
if not chapter:
|
||||||
|
continue
|
||||||
|
raw_chapters.append(chapter)
|
||||||
|
for chapter in raw_chapters:
|
||||||
start_time = float_or_none(chapter.get('start_time'), 1000)
|
start_time = float_or_none(chapter.get('start_time'), 1000)
|
||||||
duration = float_or_none(chapter.get('duration'), 1000)
|
duration = float_or_none(chapter.get('duration'), 1000)
|
||||||
if start_time is None or duration is None:
|
if start_time is None or duration is None:
|
||||||
|
249
youtube_dl/extractor/peertube.py
Normal file
249
youtube_dl/extractor/peertube.py
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_resolution,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PeerTubeIE(InfoExtractor):
|
||||||
|
_INSTANCES_RE = r'''(?:
|
||||||
|
# Taken from https://instances.joinpeertube.org/instances
|
||||||
|
tube\.openalgeria\.org|
|
||||||
|
peertube\.pointsecu\.fr|
|
||||||
|
peertube\.nogafa\.org|
|
||||||
|
peertube\.pl|
|
||||||
|
megatube\.lilomoino\.fr|
|
||||||
|
peertube\.tamanoir\.foucry\.net|
|
||||||
|
peertube\.inapurna\.org|
|
||||||
|
peertube\.netzspielplatz\.de|
|
||||||
|
video\.deadsuperhero\.com|
|
||||||
|
peertube\.devosi\.org|
|
||||||
|
peertube\.1312\.media|
|
||||||
|
tube\.worldofhauru\.xyz|
|
||||||
|
tube\.bootlicker\.party|
|
||||||
|
skeptikon\.fr|
|
||||||
|
peertube\.geekshell\.fr|
|
||||||
|
tube\.opportunis\.me|
|
||||||
|
peertube\.peshane\.net|
|
||||||
|
video\.blueline\.mg|
|
||||||
|
tube\.homecomputing\.fr|
|
||||||
|
videos\.cloudfrancois\.fr|
|
||||||
|
peertube\.viviers-fibre\.net|
|
||||||
|
tube\.ouahpiti\.info|
|
||||||
|
video\.tedomum\.net|
|
||||||
|
video\.g3l\.org|
|
||||||
|
fontube\.fr|
|
||||||
|
peertube\.gaialabs\.ch|
|
||||||
|
peertube\.extremely\.online|
|
||||||
|
peertube\.public-infrastructure\.eu|
|
||||||
|
tube\.kher\.nl|
|
||||||
|
peertube\.qtg\.fr|
|
||||||
|
tube\.22decembre\.eu|
|
||||||
|
facegirl\.me|
|
||||||
|
video\.migennes\.net|
|
||||||
|
janny\.moe|
|
||||||
|
tube\.p2p\.legal|
|
||||||
|
video\.atlanti\.se|
|
||||||
|
troll\.tv|
|
||||||
|
peertube\.geekael\.fr|
|
||||||
|
vid\.leotindall\.com|
|
||||||
|
video\.anormallostpod\.ovh|
|
||||||
|
p-tube\.h3z\.jp|
|
||||||
|
tube\.darfweb\.eu|
|
||||||
|
videos\.iut-orsay\.fr|
|
||||||
|
peertube\.solidev\.net|
|
||||||
|
videos\.symphonie-of-code\.fr|
|
||||||
|
testtube\.ortg\.de|
|
||||||
|
videos\.cemea\.org|
|
||||||
|
peertube\.gwendalavir\.eu|
|
||||||
|
video\.passageenseine\.fr|
|
||||||
|
videos\.festivalparminous\.org|
|
||||||
|
peertube\.touhoppai\.moe|
|
||||||
|
peertube\.duckdns\.org|
|
||||||
|
sikke\.fi|
|
||||||
|
peertube\.mastodon\.host|
|
||||||
|
firedragonvideos\.com|
|
||||||
|
vidz\.dou\.bet|
|
||||||
|
peertube\.koehn\.com|
|
||||||
|
peer\.hostux\.social|
|
||||||
|
share\.tube|
|
||||||
|
peertube\.walkingmountains\.fr|
|
||||||
|
medias\.libox\.fr|
|
||||||
|
peertube\.moe|
|
||||||
|
peertube\.xyz|
|
||||||
|
jp\.peertube\.network|
|
||||||
|
videos\.benpro\.fr|
|
||||||
|
tube\.otter\.sh|
|
||||||
|
peertube\.angristan\.xyz|
|
||||||
|
peertube\.parleur\.net|
|
||||||
|
peer\.ecutsa\.fr|
|
||||||
|
peertube\.heraut\.eu|
|
||||||
|
peertube\.tifox\.fr|
|
||||||
|
peertube\.maly\.io|
|
||||||
|
vod\.mochi\.academy|
|
||||||
|
exode\.me|
|
||||||
|
coste\.video|
|
||||||
|
tube\.aquilenet\.fr|
|
||||||
|
peertube\.gegeweb\.eu|
|
||||||
|
framatube\.org|
|
||||||
|
thinkerview\.video|
|
||||||
|
tube\.conferences-gesticulees\.net|
|
||||||
|
peertube\.datagueule\.tv|
|
||||||
|
video\.lqdn\.fr|
|
||||||
|
meilleurtube\.delire\.party|
|
||||||
|
tube\.mochi\.academy|
|
||||||
|
peertube\.dav\.li|
|
||||||
|
media\.zat\.im|
|
||||||
|
pytu\.be|
|
||||||
|
peertube\.valvin\.fr|
|
||||||
|
peertube\.nsa\.ovh|
|
||||||
|
video\.colibris-outilslibres\.org|
|
||||||
|
video\.hispagatos\.org|
|
||||||
|
tube\.svnet\.fr|
|
||||||
|
peertube\.video|
|
||||||
|
videos\.lecygnenoir\.info|
|
||||||
|
peertube3\.cpy\.re|
|
||||||
|
peertube2\.cpy\.re|
|
||||||
|
videos\.tcit\.fr|
|
||||||
|
peertube\.cpy\.re
|
||||||
|
)'''
|
||||||
|
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
peertube:(?P<host>[^:]+):|
|
||||||
|
https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
|
||||||
|
)
|
||||||
|
(?P<id>%s)
|
||||||
|
''' % (_INSTANCES_RE, _UUID_RE)
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||||
|
'md5': '80f24ff364cc9d333529506a263e7feb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'wow',
|
||||||
|
'description': 'wow such video, so gif',
|
||||||
|
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||||
|
'timestamp': 1519297480,
|
||||||
|
'upload_date': '20180222',
|
||||||
|
'uploader': 'Luclu7',
|
||||||
|
'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
|
||||||
|
'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
|
||||||
|
'license': 'Unknown',
|
||||||
|
'duration': 3,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'tags': list,
|
||||||
|
'categories': list,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# nsfw
|
||||||
|
'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_peertube_url(webpage, source_url):
|
||||||
|
mobj = re.match(
|
||||||
|
r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
|
||||||
|
% PeerTubeIE._UUID_RE, source_url)
|
||||||
|
if mobj and any(p in webpage for p in (
|
||||||
|
'<title>PeerTube<',
|
||||||
|
'There will be other non JS-based clients to access PeerTube',
|
||||||
|
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
|
||||||
|
return 'peertube:%s:%s' % mobj.group('host', 'id')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage, source_url):
|
||||||
|
entries = re.findall(
|
||||||
|
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
|
||||||
|
% (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
|
||||||
|
if not entries:
|
||||||
|
peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
|
||||||
|
if peertube_url:
|
||||||
|
entries = [peertube_url]
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host') or mobj.group('host_2')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
|
||||||
|
|
||||||
|
title = video['name']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for file_ in video['files']:
|
||||||
|
if not isinstance(file_, dict):
|
||||||
|
continue
|
||||||
|
file_url = file_.get('fileUrl')
|
||||||
|
if not file_url or not isinstance(file_url, compat_str):
|
||||||
|
continue
|
||||||
|
file_size = int_or_none(file_.get('size'))
|
||||||
|
format_id = try_get(
|
||||||
|
file_, lambda x: x['resolution']['label'], compat_str)
|
||||||
|
f = parse_resolution(format_id)
|
||||||
|
f.update({
|
||||||
|
'url': file_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': file_size,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def account_data(field):
|
||||||
|
return try_get(video, lambda x: x['account'][field], compat_str)
|
||||||
|
|
||||||
|
category = try_get(video, lambda x: x['category']['label'], compat_str)
|
||||||
|
categories = [category] if category else None
|
||||||
|
|
||||||
|
nsfw = video.get('nsfw')
|
||||||
|
if nsfw is bool:
|
||||||
|
age_limit = 18 if nsfw else 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
||||||
|
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||||
|
'uploader': account_data('displayName'),
|
||||||
|
'uploader_id': account_data('uuid'),
|
||||||
|
'uploder_url': account_data('url'),
|
||||||
|
'license': try_get(
|
||||||
|
video, lambda x: x['licence']['label'], compat_str),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'like_count': int_or_none(video.get('likes')),
|
||||||
|
'dislike_count': int_or_none(video.get('dislikes')),
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'tags': try_get(video, lambda x: x['tags'], list),
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -94,7 +94,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -140,10 +140,10 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
|
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id):
|
||||||
captions_post = {
|
captions_post = {
|
||||||
'a': author,
|
'a': author,
|
||||||
'cn': clip_id,
|
'cn': clip_idx,
|
||||||
'lc': lang,
|
'lc': lang,
|
||||||
'm': name,
|
'm': name,
|
||||||
}
|
}
|
||||||
@ -195,13 +195,13 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
|
|
||||||
author = qs.get('author', [None])[0]
|
author = qs.get('author', [None])[0]
|
||||||
name = qs.get('name', [None])[0]
|
name = qs.get('name', [None])[0]
|
||||||
clip_id = qs.get('clip', [None])[0]
|
clip_idx = qs.get('clip', [None])[0]
|
||||||
course_name = qs.get('course', [None])[0]
|
course_name = qs.get('course', [None])[0]
|
||||||
|
|
||||||
if any(not f for f in (author, name, clip_id, course_name,)):
|
if any(not f for f in (author, name, clip_idx, course_name,)):
|
||||||
raise ExtractorError('Invalid URL', expected=True)
|
raise ExtractorError('Invalid URL', expected=True)
|
||||||
|
|
||||||
display_id = '%s-%s' % (name, clip_id)
|
display_id = '%s-%s' % (name, clip_idx)
|
||||||
|
|
||||||
course = self._download_course(course_name, url, display_id)
|
course = self._download_course(course_name, url, display_id)
|
||||||
|
|
||||||
@ -217,7 +217,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
clip_index = clip_.get('index')
|
clip_index = clip_.get('index')
|
||||||
if clip_index is None:
|
if clip_index is None:
|
||||||
continue
|
continue
|
||||||
if compat_str(clip_index) == clip_id:
|
if compat_str(clip_index) == clip_idx:
|
||||||
clip = clip_
|
clip = clip_
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -225,6 +225,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
raise ExtractorError('Unable to resolve clip')
|
raise ExtractorError('Unable to resolve clip')
|
||||||
|
|
||||||
title = clip['title']
|
title = clip['title']
|
||||||
|
clip_id = clip.get('clipName') or clip.get('name') or clip['clipId']
|
||||||
|
|
||||||
QUALITIES = {
|
QUALITIES = {
|
||||||
'low': {'width': 640, 'height': 480},
|
'low': {'width': 640, 'height': 480},
|
||||||
@ -277,7 +278,7 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
clip_post = {
|
clip_post = {
|
||||||
'author': author,
|
'author': author,
|
||||||
'includeCaptions': False,
|
'includeCaptions': False,
|
||||||
'clipIndex': int(clip_id),
|
'clipIndex': int(clip_idx),
|
||||||
'courseName': course_name,
|
'courseName': course_name,
|
||||||
'locale': 'en',
|
'locale': 'en',
|
||||||
'moduleName': name,
|
'moduleName': name,
|
||||||
@ -330,10 +331,10 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
|
|
||||||
# TODO: other languages?
|
# TODO: other languages?
|
||||||
subtitles = self.extract_subtitles(
|
subtitles = self.extract_subtitles(
|
||||||
author, clip_id, 'en', name, duration, display_id)
|
author, clip_idx, 'en', name, duration, display_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': clip.get('clipName') or clip['name'],
|
'id': clip_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'creator': author,
|
'creator': author,
|
||||||
|
@ -43,7 +43,8 @@ class PornComIE(InfoExtractor):
|
|||||||
|
|
||||||
config = self._parse_json(
|
config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
|
(r'=\s*({.+?})\s*;\s*v1ar\b',
|
||||||
|
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
|
||||||
webpage, 'config', default='{}'),
|
webpage, 'config', default='{}'),
|
||||||
display_id, transform_source=js_to_json, fatal=False)
|
display_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
@ -69,7 +70,7 @@ class PornComIE(InfoExtractor):
|
|||||||
'height': int(height),
|
'height': int(height),
|
||||||
'filesize_approx': parse_filesize(filesize),
|
'filesize_approx': parse_filesize(filesize),
|
||||||
} for format_url, height, filesize in re.findall(
|
} for format_url, height, filesize in re.findall(
|
||||||
r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
|
r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
|
||||||
webpage)]
|
webpage)]
|
||||||
thumbnail = None
|
thumbnail = None
|
||||||
duration = None
|
duration = None
|
||||||
|
@ -53,7 +53,8 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
'format_id': compat_str(abr),
|
'format_id': compat_str(abr),
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
} for abr in (96, 128, 256)]
|
} for abr in (96, 128, 192, 256)]
|
||||||
|
self._check_formats(formats, episode_id)
|
||||||
|
|
||||||
description = clean_html(episode.get('longTeaser'))
|
description = clean_html(episode.get('longTeaser'))
|
||||||
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
|
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
|
||||||
|
@ -19,7 +19,7 @@ class RDSIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '604333',
|
'id': '604333',
|
||||||
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
||||||
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
|
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
|
||||||
'timestamp': 1430397346,
|
'timestamp': 1430397346,
|
||||||
|
@ -47,7 +47,7 @@ class RedditIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class RedditRIE(InfoExtractor):
|
class RedditRIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:(?:www|old)\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -86,6 +86,10 @@ class RedditRIE(InfoExtractor):
|
|||||||
# youtube
|
# youtube
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
|
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# reddit video @ nm reddit
|
||||||
|
'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -50,7 +50,7 @@ class RoosterTeethIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -14,20 +18,19 @@ class RTBFIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
video/[^?]+\?.*\bid=|
|
video/[^?]+\?.*\bid=|
|
||||||
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
|
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
|
||||||
auvio/[^/]+\?.*id=
|
auvio/[^/]+\?.*\b(?P<live>l)?id=
|
||||||
)(?P<id>\d+)'''
|
)(?P<id>\d+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
||||||
'md5': '799f334ddf2c0a582ba80c44655be570',
|
'md5': '8c876a1cceeb6cf31b476461ade72384',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1921274',
|
'id': '1921274',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Les Diables au coeur (épisode 2)',
|
'title': 'Les Diables au coeur (épisode 2)',
|
||||||
'description': 'Football - Diables Rouges',
|
'description': '(du 25/04/2014)',
|
||||||
'duration': 3099,
|
'duration': 3099.54,
|
||||||
'upload_date': '20140425',
|
'upload_date': '20140425',
|
||||||
'timestamp': 1398456336,
|
'timestamp': 1398456300,
|
||||||
'uploader': 'rtbfsport',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# geo restricted
|
# geo restricted
|
||||||
@ -39,6 +42,18 @@ class RTBFIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
|
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Live
|
||||||
|
'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Audio
|
||||||
|
'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# With Subtitle
|
||||||
|
'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
|
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
|
||||||
_PROVIDERS = {
|
_PROVIDERS = {
|
||||||
@ -53,46 +68,94 @@ class RTBFIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
live, media_id = re.match(self._VALID_URL, url).groups()
|
||||||
data = self._download_json(
|
embed_page = self._download_webpage(
|
||||||
'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
|
'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
|
||||||
|
media_id, query={'id': media_id})
|
||||||
|
data = self._parse_json(self._html_search_regex(
|
||||||
|
r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
|
||||||
|
|
||||||
error = data.get('error')
|
error = data.get('error')
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
|
||||||
data = data['data']
|
|
||||||
|
|
||||||
provider = data.get('provider')
|
provider = data.get('provider')
|
||||||
if provider in self._PROVIDERS:
|
if provider in self._PROVIDERS:
|
||||||
return self.url_result(data['url'], self._PROVIDERS[provider])
|
return self.url_result(data['url'], self._PROVIDERS[provider])
|
||||||
|
|
||||||
|
title = data['title']
|
||||||
|
is_live = data.get('isLive')
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
height_re = r'-(\d+)p\.'
|
||||||
formats = []
|
formats = []
|
||||||
for key, format_id in self._QUALITIES:
|
|
||||||
format_url = data.get(key + 'Url')
|
m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
|
||||||
if format_url:
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
|
||||||
|
http_url = data.get('url')
|
||||||
|
if formats and http_url and re.search(height_re, http_url):
|
||||||
|
http_url = fix_url(http_url)
|
||||||
|
for m3u8_f in formats[:]:
|
||||||
|
height = m3u8_f.get('height')
|
||||||
|
if not height:
|
||||||
|
continue
|
||||||
|
f = m3u8_f.copy()
|
||||||
|
del f['protocol']
|
||||||
|
f.update({
|
||||||
|
'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
|
||||||
|
'url': re.sub(height_re, '-%dp.' % height, http_url),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
else:
|
||||||
|
sources = data.get('sources') or {}
|
||||||
|
for key, format_id in self._QUALITIES:
|
||||||
|
format_url = sources.get(key)
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
height_re, format_url, 'height', default=None))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': format_url,
|
'url': fix_url(format_url),
|
||||||
|
'height': height,
|
||||||
})
|
})
|
||||||
|
|
||||||
thumbnails = []
|
mpd_url = data.get('urlDash')
|
||||||
for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
|
if not data.get('drm') and mpd_url:
|
||||||
if thumbnail_id != 'default':
|
formats.extend(self._extract_mpd_formats(
|
||||||
thumbnails.append({
|
mpd_url, media_id, mpd_id='dash', fatal=False))
|
||||||
'url': self._IMAGE_HOST + thumbnail_url,
|
|
||||||
'id': thumbnail_id,
|
audio_url = data.get('urlAudio')
|
||||||
})
|
if audio_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'audio',
|
||||||
|
'url': audio_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for track in (data.get('tracks') or {}).values():
|
||||||
|
sub_url = track.get('url')
|
||||||
|
if not sub_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('lang') or 'fr', []).append({
|
||||||
|
'url': sub_url,
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': media_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': data['title'],
|
'title': title,
|
||||||
'description': data.get('description') or data.get('subtitle'),
|
'description': strip_or_none(data.get('description')),
|
||||||
'thumbnails': thumbnails,
|
'thumbnail': data.get('thumbnail'),
|
||||||
'duration': data.get('duration') or data.get('realDuration'),
|
'duration': float_or_none(data.get('realDuration')),
|
||||||
'timestamp': int_or_none(data.get('created')),
|
'timestamp': int_or_none(data.get('liveFrom')),
|
||||||
'view_count': int_or_none(data.get('viewCount')),
|
'series': data.get('programLabel'),
|
||||||
'uploader': data.get('channel'),
|
'subtitles': subtitles,
|
||||||
'tags': data.get('tags'),
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user