mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-26 10:24:33 +01:00
Merge branch 'master' of https://github.com/ytdl-org/youtube-dl
This commit is contained in:
commit
f377e2e7f5
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.05.11
|
||||
[debug] youtube-dl version 2019.08.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.05.11
|
||||
[debug] youtube-dl version 2019.08.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.08.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.08.02**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@ python:
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
|
205
ChangeLog
205
ChangeLog
@ -1,3 +1,208 @@
|
||||
version 2019.08.02
|
||||
|
||||
Extractors
|
||||
+ [tvigle] Add support for HLS and DASH formats (#21967)
|
||||
* [tvigle] Fix extraction (#21967)
|
||||
+ [yandexvideo] Add support for DASH formats (#21971)
|
||||
* [discovery] Use API call for video data extraction (#21808)
|
||||
+ [mgtv] Extract format_note (#21881)
|
||||
* [tvn24] Fix metadata extraction (#21833, #21834)
|
||||
* [dlive] Relax URL regular expression (#21909)
|
||||
+ [openload] Add support for oload.best (#21913)
|
||||
* [youtube] Improve metadata extraction for age gate content (#21943)
|
||||
|
||||
|
||||
version 2019.07.30
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix and improve title and description extraction (#21934)
|
||||
|
||||
|
||||
version 2019.07.27
|
||||
|
||||
Extractors
|
||||
+ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
|
||||
+ [discovery] Add support go.discovery.com URLs
|
||||
* [youtube:playlist] Relax video regular expression (#21844)
|
||||
* [generic] Restrict --default-search schemeless URLs detection pattern
|
||||
(#21842)
|
||||
* [vrv] Fix CMS signing query extraction (#21809)
|
||||
|
||||
|
||||
version 2019.07.16
|
||||
|
||||
Extractors
|
||||
+ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
|
||||
(#21281, #21290)
|
||||
* [kaltura] Check source format URL (#21290)
|
||||
* [ctsnews] Fix YouTube embeds extraction (#21678)
|
||||
+ [einthusan] Add support for einthusan.com (#21748, #21775)
|
||||
+ [youtube] Add support for invidious.mastodon.host (#21777)
|
||||
+ [gfycat] Extend URL regular expression (#21779, #21780)
|
||||
* [youtube] Restrict is_live extraction (#21782)
|
||||
|
||||
|
||||
version 2019.07.14
|
||||
|
||||
Extractors
|
||||
* [porn91] Fix extraction (#21312)
|
||||
+ [yandexmusic] Extract track number and disk number (#21421)
|
||||
+ [yandexmusic] Add support for multi disk albums (#21420, #21421)
|
||||
* [lynda] Handle missing subtitles (#20490, #20513)
|
||||
+ [youtube] Add more invidious instances to URL regular expression (#21694)
|
||||
* [twitter] Improve uploader id extraction (#21705)
|
||||
* [spankbang] Fix and improve metadata extraction
|
||||
* [spankbang] Fix extraction (#21763, #21764)
|
||||
+ [dlive] Add support for dlive.tv (#18080)
|
||||
+ [livejournal] Add support for livejournal.com (#21526)
|
||||
* [roosterteeth] Fix free episode extraction (#16094)
|
||||
* [dbtv] Fix extraction
|
||||
* [bellator] Fix extraction
|
||||
- [rudo] Remove extractor (#18430, #18474)
|
||||
* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224)
|
||||
* [bleacherreport] Fix Bleacher Report CMS extraction
|
||||
* [espn] Fix fivethirtyeight.com extraction
|
||||
* [5tv] Relax video URL regular expression and support https URLs
|
||||
* [youtube] Fix is_live extraction (#21734)
|
||||
* [youtube] Fix authentication (#11270)
|
||||
|
||||
|
||||
version 2019.07.12
|
||||
|
||||
Core
|
||||
+ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016)
|
||||
|
||||
Extractors
|
||||
+ [mgtv] Pass Referer HTTP header for format URLs (#21726)
|
||||
+ [beeg] Add support for api/v6 v2 URLs without t argument (#21701)
|
||||
* [voxmedia:volume] Improvevox embed extraction (#16846)
|
||||
* [funnyordie] Move extraction to VoxMedia extractor (#16846)
|
||||
* [gameinformer] Fix extraction (#8895, #15363, #17206)
|
||||
* [funk] Fix extraction (#17915)
|
||||
* [packtpub] Relax lesson URL regular expression (#21695)
|
||||
* [packtpub] Fix extraction (#21268)
|
||||
* [philharmoniedeparis] Relax URL regular expression (#21672)
|
||||
* [peertube] Detect embed URLs in generic extraction (#21666)
|
||||
* [mixer:vod] Relax URL regular expression (#21657, #21658)
|
||||
+ [lecturio] Add support id based URLs (#21630)
|
||||
+ [go] Add site info for disneynow (#21613)
|
||||
* [ted] Restrict info regular expression (#21631)
|
||||
* [twitch:vod] Actualize m3u8 URL (#21538, #21607)
|
||||
* [vzaar] Fix videos with empty title (#21606)
|
||||
* [tvland] Fix extraction (#21384)
|
||||
* [arte] Clean extractor (#15583, #21614)
|
||||
|
||||
|
||||
version 2019.07.02
|
||||
|
||||
Core
|
||||
+ [utils] Introduce random_user_agent and use as default User-Agent (#21546)
|
||||
|
||||
Extractors
|
||||
+ [vevo] Add support for embed.vevo.com URLs (#21565)
|
||||
+ [openload] Add support for oload.biz (#21574)
|
||||
* [xiami] Update API base URL (#21575)
|
||||
* [yourporn] Fix extraction (#21585)
|
||||
+ [acast] Add support for URLs with episode id (#21444)
|
||||
+ [dailymotion] Add support for DM.player embeds
|
||||
* [soundcloud] Update client id
|
||||
|
||||
|
||||
version 2019.06.27
|
||||
|
||||
Extractors
|
||||
+ [go] Add support for disneynow.com (#21528)
|
||||
* [mixer:vod] Relax URL regular expression (#21531, #21536)
|
||||
* [drtv] Relax URL regular expression
|
||||
* [fusion] Fix extraction (#17775, #21269)
|
||||
- [nfb] Remove extractor (#21518)
|
||||
+ [beeg] Add support for api/v6 v2 URLs (#21511)
|
||||
+ [brightcove:new] Add support for playlists (#21331)
|
||||
+ [openload] Add support for oload.life (#21495)
|
||||
* [vimeo:channel,group] Make title extraction non fatal
|
||||
* [vimeo:likes] Implement extrator in terms of channel extractor (#21493)
|
||||
+ [pornhub] Add support for more paged video sources
|
||||
+ [pornhub] Add support for downloading single pages and search pages (#15570)
|
||||
* [pornhub] Rework extractors (#11922, #16078, #17454, #17936)
|
||||
+ [youtube] Add another signature function pattern
|
||||
* [tf1] Fix extraction (#21365, #21372)
|
||||
* [crunchyroll] Move Accept-Language workaround to video extractor since
|
||||
it causes playlists not to list any videos
|
||||
* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443)
|
||||
|
||||
|
||||
version 2019.06.21
|
||||
|
||||
Core
|
||||
* [utils] Restrict parse_codecs and add theora as known vcodec (#21381)
|
||||
|
||||
Extractors
|
||||
* [youtube] Update signature function patterns (#21469, #21476)
|
||||
* [youtube] Make --write-annotations non fatal (#21452)
|
||||
+ [sixplay] Add support for rtlmost.hu (#21405)
|
||||
* [youtube] Hardcode codec metadata for av01 video only formats (#21381)
|
||||
* [toutv] Update client key (#21370)
|
||||
+ [biqle] Add support for new embed domain
|
||||
* [cbs] Improve DRM protected videos detection (#21339)
|
||||
|
||||
|
||||
version 2019.06.08
|
||||
|
||||
Core
|
||||
* [downloader/common] Improve rate limit (#21301)
|
||||
* [utils] Improve strip_or_none
|
||||
* [extractor/common] Strip src attribute for HTML5 entries code (#18485,
|
||||
#21169)
|
||||
|
||||
Extractors
|
||||
* [ted] Fix playlist extraction (#20844, #21032)
|
||||
* [vlive:playlist] Fix video extraction when no playlist is found (#20590)
|
||||
+ [vlive] Add CH+ support (#16887, #21209)
|
||||
+ [openload] Add support for oload.website (#21329)
|
||||
+ [tvnow] Extract HD formats (#21201)
|
||||
+ [redbulltv] Add support for rrn:content URLs (#21297)
|
||||
* [youtube] Fix average rating extraction (#21304)
|
||||
+ [bitchute] Extract HTML5 formats (#21306)
|
||||
* [cbsnews] Fix extraction (#9659, #15397)
|
||||
* [vvvvid] Relax URL regular expression (#21299)
|
||||
+ [prosiebensat1] Add support for new API (#21272)
|
||||
+ [vrv] Extract adaptive_hls formats (#21243)
|
||||
* [viki] Switch to HTTPS (#21001)
|
||||
* [LiveLeak] Check if the original videos exist (#21206, #21208)
|
||||
* [rtp] Fix extraction (#15099)
|
||||
* [youtube] Improve DRM protected videos detection (#1774)
|
||||
+ [srgssrplay] Add support for popupvideoplayer URLs (#21155)
|
||||
+ [24video] Add support for porno.24video.net (#21194)
|
||||
+ [24video] Add support for 24video.site (#21193)
|
||||
- [pornflip] Remove extractor
|
||||
- [criterion] Remove extractor (#21195)
|
||||
* [pornhub] Use HTTPS (#21061)
|
||||
* [bitchute] Fix uploader extraction (#21076)
|
||||
* [streamcloud] Reduce waiting time to 6 seconds (#21092)
|
||||
- [novamov] Remove extractors (#21077)
|
||||
+ [openload] Add support for oload.press (#21135)
|
||||
* [vivo] Fix extraction (#18906, #19217)
|
||||
|
||||
|
||||
version 2019.05.20
|
||||
|
||||
Core
|
||||
+ [extractor/common] Move workaround for applying first Set-Cookie header
|
||||
into a separate _apply_first_set_cookie_header method
|
||||
|
||||
Extractors
|
||||
* [safari] Fix authentication (#21090)
|
||||
* [vk] Use _apply_first_set_cookie_header
|
||||
* [vrt] Fix extraction (#20527)
|
||||
+ [canvas] Add support for vrtnieuws and sporza site ids and extract
|
||||
AES HLS formats
|
||||
+ [vrv] Extract captions (#19238)
|
||||
* [tele5] Improve video id extraction
|
||||
* [tele5] Relax URL regular expression (#21020, #21063)
|
||||
* [svtplay] Update API URL (#21075)
|
||||
+ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
|
||||
|
||||
|
||||
version 2019.05.11
|
||||
|
||||
Core
|
||||
|
66
README.md
66
README.md
@ -1216,6 +1216,72 @@ Incorrect:
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Inline values
|
||||
|
||||
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
|
||||
### Collapse fallbacks
|
||||
|
||||
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||
|
||||
#### Example
|
||||
|
||||
Good:
|
||||
|
||||
```python
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
Unwieldy:
|
||||
|
||||
```python
|
||||
description = (
|
||||
self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, default=None)
|
||||
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||
```
|
||||
|
||||
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||
|
||||
### Trailing parentheses
|
||||
|
||||
Always move trailing parentheses after the last argument.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list,
|
||||
)
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
@ -58,16 +58,8 @@
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
- **arte.tv:concert**
|
||||
- **arte.tv:creative**
|
||||
- **arte.tv:ddc**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
@ -78,7 +70,6 @@
|
||||
- **AudioBoom**
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **auroravid**: AuroraVid
|
||||
- **AWAAN**
|
||||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
@ -150,6 +141,7 @@
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCMA**
|
||||
@ -174,7 +166,6 @@
|
||||
- **Clipsyndicate**
|
||||
- **CloserToTruth**
|
||||
- **CloudflareStream**
|
||||
- **cloudtime**: CloudTime
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
@ -194,7 +185,6 @@
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
@ -233,6 +223,8 @@
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **dlive:stream**
|
||||
- **dlive:vod**
|
||||
- **Dotsub**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
@ -315,9 +307,7 @@
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **Funimation**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
- **FunnyOrDie**
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
@ -460,6 +450,7 @@
|
||||
- **linkedin:learning:course**
|
||||
- **LinuxAcademy**
|
||||
- **LiTV**
|
||||
- **LiveJournal**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
- **livestream**
|
||||
@ -583,7 +574,6 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
- **nhl.com**
|
||||
@ -609,7 +599,6 @@
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
- **nowvideo**: NowVideo
|
||||
- **Noz**
|
||||
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **npo.nl:live**
|
||||
@ -693,11 +682,11 @@
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
- **PornHubUserVideos**
|
||||
- **PornHubPagedVideoList**
|
||||
- **PornHubUser**
|
||||
- **PornHubUserVideosUpload**
|
||||
- **Pornotube**
|
||||
- **PornoVoisines**
|
||||
- **PornoXO**
|
||||
@ -734,6 +723,7 @@
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
- **RedBullTVRrnContent**
|
||||
- **Reddit**
|
||||
- **RedditR**
|
||||
- **RedTube**
|
||||
@ -767,7 +757,6 @@
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
@ -899,7 +888,6 @@
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **theoperaplatform**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
@ -1024,7 +1012,6 @@
|
||||
- **videomore:video**
|
||||
- **VideoPremium**
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **VidLii**
|
||||
- **vidme**
|
||||
@ -1071,7 +1058,7 @@
|
||||
- **VoxMediaVolume**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
||||
- **VrtNU**: VrtNU.be
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
@ -1101,7 +1088,6 @@
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
@ -1131,6 +1117,7 @@
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **yahoo:gyao**
|
||||
- **yahoo:gyao:player**
|
||||
- **yahoo:japannews**: Yahoo! Japan News
|
||||
- **YandexDisk**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
|
@ -73,6 +73,7 @@ from youtube_dl.utils import (
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@ -752,6 +753,18 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_strip_or_none(self):
|
||||
self.assertEqual(strip_or_none(' abc'), 'abc')
|
||||
self.assertEqual(strip_or_none('abc '), 'abc')
|
||||
self.assertEqual(strip_or_none(' abc '), 'abc')
|
||||
self.assertEqual(strip_or_none('\tabc\t'), 'abc')
|
||||
self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
|
||||
self.assertEqual(strip_or_none('abc'), 'abc')
|
||||
self.assertEqual(strip_or_none(''), '')
|
||||
self.assertEqual(strip_or_none(None), None)
|
||||
self.assertEqual(strip_or_none(42), None)
|
||||
self.assertEqual(strip_or_none([]), None)
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
@ -809,6 +822,15 @@ class TestUtil(unittest.TestCase):
|
||||
'vcodec': 'av01.0.05M.08',
|
||||
'acodec': 'none',
|
||||
})
|
||||
self.assertEqual(parse_codecs('theora, vorbis'), {
|
||||
'vcodec': 'theora',
|
||||
'acodec': 'vorbis',
|
||||
})
|
||||
self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
|
||||
'vcodec': 'unknownvcodec',
|
||||
'acodec': 'unknownacodec',
|
||||
})
|
||||
self.assertEqual(parse_codecs('unknown'), {})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
|
@ -94,7 +94,7 @@ def _real_main(argv=None):
|
||||
if opts.verbose:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except IOError:
|
||||
sys.exit('ERROR: batch file could not be read')
|
||||
sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
|
||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
@ -176,7 +176,9 @@ class FileDownloader(object):
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
|
||||
sleep_time = float(byte_counter) / rate_limit - elapsed
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
|
@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
|
@ -146,7 +146,7 @@ def write_piff_header(stream, params):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||
avcc_payload += u16.pack(len(sps))
|
||||
avcc_payload += sps
|
||||
|
@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
)|
|
||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
@ -7,6 +7,7 @@ import functools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
@ -27,7 +28,7 @@ class ACastIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@ -46,28 +47,37 @@ class ACastIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
||||
display_id)
|
||||
media_url = s['url']
|
||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
||||
episode_url = s.get('episodeUrl')
|
||||
if episode_url:
|
||||
display_id = episode_url
|
||||
else:
|
||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e['name']
|
||||
title = e.get('name') or s['title']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('description') or e.get('summary'),
|
||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate')),
|
||||
'duration': float_or_none(s.get('duration') or e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
|
@ -25,6 +25,11 @@ MSO_INFO = {
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'ATT': {
|
||||
'name': 'AT&T U-verse',
|
||||
'username_field': 'userid',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'ATTOTT': {
|
||||
'name': 'DIRECTV NOW',
|
||||
'username_field': 'email',
|
||||
|
@ -4,17 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
@ -25,59 +18,7 @@ from ..utils import (
|
||||
# add tests.
|
||||
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
|
||||
IE_NAME = 'arte.tv'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config = self._download_xml(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
formats = [{
|
||||
'format_id': q.attrib['quality'],
|
||||
# The playpath starts at 'mp4:', if we don't manually
|
||||
# split the url, rtmpdump will incorrectly parse them
|
||||
'url': q.text.split('mp4:', 1)[0],
|
||||
'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
|
||||
'ext': 'flv',
|
||||
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
|
||||
} for q in config.findall('./urls/url')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = config.find('.//name').text
|
||||
thumbnail = config.find('.//firstThumbnailUrl').text
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
if 'vid' in query:
|
||||
video_id = query['vid'][0]
|
||||
else:
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
video_id = mobj.group('id')
|
||||
return video_id, lang
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||
|
||||
LANGS = {
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
'en': 'E[ANG]',
|
||||
'es': 'E[ESP]',
|
||||
'it': 'E[ITA]',
|
||||
'pl': 'E[POL]',
|
||||
}
|
||||
|
||||
langcode = LANGS.get(lang, lang)
|
||||
@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
l = re.escape(langcode)
|
||||
|
||||
# Language preference from most to least priority
|
||||
# Reference: section 5.6.3 of
|
||||
# http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
|
||||
# Reference: section 6.8 of
|
||||
# https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
|
||||
PREFERENCES = (
|
||||
# original version in requested language, without subtitles
|
||||
r'VO{0}$'.format(l),
|
||||
@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
entries = [
|
||||
self.url_result(url)
|
||||
for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1',
|
||||
'info_dict': {
|
||||
'id': '057405-001-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)',
|
||||
'upload_date': '20150716',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
||||
'playlist_count': 11,
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:info'
|
||||
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||
'info_dict': {
|
||||
'id': '067528-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Service civique, un cache misère ?',
|
||||
'upload_date': '20160403',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:future'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses',
|
||||
'info_dict': {
|
||||
'id': '050940-028-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les écrevisses aussi peuvent être anxieuses',
|
||||
'upload_date': '20140902',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
lang = 'de'
|
||||
elif lang == 'emission':
|
||||
lang = 'fr'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
|
||||
script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
|
||||
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
'id': '186',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
|
||||
'upload_date': '20140128',
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck',
|
||||
'md5': 'a5b9dd5575a11d93daf0e3f404f45438',
|
||||
'info_dict': {
|
||||
'id': '062494-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck',
|
||||
'upload_date': '20150807',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:magazine'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..."
|
||||
'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium',
|
||||
'md5': '2a9369bcccf847d1c741e51416299f25',
|
||||
'info_dict': {
|
||||
'id': '065965-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trepalium - Extrait Ep.01',
|
||||
'upload_date': '20160121',
|
||||
},
|
||||
}, {
|
||||
# Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium"
|
||||
'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium',
|
||||
'md5': 'fedc64fc7a946110fe311634e79782ca',
|
||||
'info_dict': {
|
||||
'id': '054813-004_PLUS7-F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trepalium (4/6)',
|
||||
'description': 'md5:10057003c34d54e95350be4f9b05cb40',
|
||||
'upload_date': '20160218',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis',
|
||||
'only_matching': True,
|
||||
}]
|
||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(
|
||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
||||
video_id, lang)
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
http://www\.arte\.tv
|
||||
/(?:playerv2/embed|arte_vp/index)\.php\?json_url=
|
||||
https://www\.arte\.tv
|
||||
/player/v3/index\.php\?json_url=
|
||||
(?P<json_url>
|
||||
http://arte\.tv/papi/tvguide/videos/stream/player/
|
||||
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
|
||||
https?://api\.arte\.tv/api/player/v1/config/
|
||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class TheOperaPlatformIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'theoperaplatform'
|
||||
_VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
|
||||
'md5': '970655901fa2e82e04c00b955e9afe7b',
|
||||
'info_dict': {
|
||||
'id': '060338-009-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verdi - OTELLO',
|
||||
'upload_date': '20160927',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'info_dict': {
|
||||
'id': 'PL-013263',
|
||||
'title': 'Areva & Uramin',
|
||||
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
|
||||
'id': 'RC-016954',
|
||||
'title': 'Earn a Living',
|
||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, lang = self._extract_url_info(url)
|
||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
|
@ -5,14 +5,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
)
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor):
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor):
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
||||
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor):
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
||||
video_title=title)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||
_TEST = {
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||
' | AsianCrush')
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
|
@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE):
|
||||
|
||||
class BeamProVodIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:vod'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||
'info_dict': {
|
||||
@ -119,7 +119,13 @@ class BeamProVodIE(BeamProBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_format(vod, vod_type):
|
||||
|
@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
@ -11,6 +14,7 @@ from ..utils import (
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# api/v6 v1
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'info_dict': {
|
||||
@ -24,6 +28,14 @@ class BeegIE(InfoExtractor):
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# api/v6 v2
|
||||
'url': 'https://beeg.com/1941093077?t=911-1391',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# api/v6 v2 w/o t
|
||||
'url': 'https://beeg.com/1277207756',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/video/5416503',
|
||||
'only_matching': True,
|
||||
@ -41,11 +53,25 @@ class BeegIE(InfoExtractor):
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
if len(video_id) >= 10:
|
||||
query = {
|
||||
'v': 2,
|
||||
}
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
t = qs.get('t', [''])[0].split('-')
|
||||
if len(t) > 1:
|
||||
query.update({
|
||||
's': t[0],
|
||||
'e': t[1],
|
||||
})
|
||||
else:
|
||||
query = {'v': 1}
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.')
|
||||
fatal=api_path == 'api.', query=query)
|
||||
if video:
|
||||
break
|
||||
|
||||
|
@ -6,7 +6,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
from .rudo import RudoIE
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||
'id': 'b4xd0LK3SK',
|
||||
'ext': 'mp4',
|
||||
'uploader': '(none)',
|
||||
'upload_date': '20160708',
|
||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||
# TODO: fix url_transparent information overriding
|
||||
# 'uploader': 'Juan Pablo Echenique',
|
||||
'title': 'Comentario Oscar Cáceres',
|
||||
},
|
||||
'params': {
|
||||
# empty m3u8 manifest
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = RudoIE._extract_url(webpage)
|
||||
rudo_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage, 'embed URL', None, group='url')
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
|
@ -42,7 +42,7 @@ class BIQLEIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe.+?src="((?:https?:)?//daxab\.com/[^"]+)".*?></iframe>',
|
||||
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
|
||||
webpage, 'embed url'))
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
@ -55,6 +55,11 @@ class BitChuteIE(InfoExtractor):
|
||||
formats = [
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
|
||||
if not formats:
|
||||
formats = self._parse_html5_media_entries(
|
||||
url, webpage, video_id)[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -65,8 +70,9 @@ class BitChuteIE(InfoExtractor):
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
|
||||
'uploader', fatal=False)
|
||||
(r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
video = article_data.get('video')
|
||||
if video:
|
||||
video_type = video['type']
|
||||
if video_type == 'cms.bleacherreport.com':
|
||||
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
elif video_type == 'ooyala.com':
|
||||
info['url'] = 'ooyala:%s' % video['id']
|
||||
@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor):
|
||||
|
||||
|
||||
class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
|
||||
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
@ -483,7 +483,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
class BrightcoveNewIE(AdobePassIE):
|
||||
IE_NAME = 'brightcove:new'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
|
||||
'md5': 'c8100925723840d4b0d243f7025703be',
|
||||
@ -516,6 +516,21 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# playlist stream
|
||||
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
|
||||
'info_dict': {
|
||||
'id': '5718313430001',
|
||||
'title': 'No Audio Playlist',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
|
||||
@ -715,7 +730,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
|
||||
})
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
@ -736,7 +751,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||
headers = {
|
||||
'Accept': 'application/json;pk=%s' % policy_key,
|
||||
}
|
||||
@ -771,5 +786,12 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
return self._parse_brightcove_metadata(
|
||||
json_data, video_id, headers=headers)
|
||||
|
@ -69,7 +69,7 @@ class CBSIE(CBSBaseIE):
|
||||
last_e = None
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'):
|
||||
if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
|
@ -1,40 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import zlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsEmbedIE(CBSIE):
|
||||
IE_NAME = 'cbsnews:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||
compat_urllib_parse_unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS), None)['video']['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
class CBSNewsIE(CBSIE):
|
||||
IE_NAME = 'cbsnews'
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# 60 minutes
|
||||
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
|
||||
'info_dict': {
|
||||
'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
|
||||
'ext': 'mp4',
|
||||
'title': 'Artificial Intelligence',
|
||||
'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
|
||||
'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
|
||||
'ext': 'flv',
|
||||
'title': 'Artificial Intelligence, real-life applications',
|
||||
'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1606,
|
||||
'duration': 317,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'timestamp': 1498431900,
|
||||
'upload_date': '20170625',
|
||||
'timestamp': 1476046464,
|
||||
'upload_date': '20161009',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'info_dict': {
|
||||
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
|
||||
'ext': 'mp4',
|
||||
@ -60,37 +82,29 @@ class CBSNewsIE(CBSIE):
|
||||
# 48 hours
|
||||
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||
'info_dict': {
|
||||
'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cold as Ice',
|
||||
'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
|
||||
'upload_date': '20170604',
|
||||
'timestamp': 1496538000,
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||
|
||||
if video_info:
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
else:
|
||||
state = self._parse_json(self._search_regex(
|
||||
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
|
||||
'playlist JSON info', group='json'), video_id)['state']
|
||||
item = state['playlist'][state['pid']]
|
||||
entries = []
|
||||
for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
|
||||
entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
|
||||
|
||||
item = self._parse_json(self._html_search_regex(
|
||||
r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
|
||||
webpage, 'video JSON info'), display_id)['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
|
@ -67,6 +67,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
sanitize_filename,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@ -219,7 +220,7 @@ class InfoExtractor(object):
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
@ -2480,7 +2481,7 @@ class InfoExtractor(object):
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
src = strip_or_none(media_attributes.get('src'))
|
||||
if src:
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
@ -2490,7 +2491,7 @@ class InfoExtractor(object):
|
||||
s_attr = extract_attributes(source_tag)
|
||||
# data-video-src and data-src are non standard but seen
|
||||
# several times in the wild
|
||||
src = dict_get(s_attr, ('src', 'data-video-src', 'data-src'))
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(s_attr.get('type'))
|
||||
@ -2533,7 +2534,7 @@ class InfoExtractor(object):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind in ('subtitles', 'captions'):
|
||||
src = track_attributes.get('src')
|
||||
src = strip_or_none(track_attributes.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||
@ -2818,15 +2819,19 @@ class InfoExtractor(object):
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
# Some sites (e.g. [1-3]) may serve two cookies under the same name
|
||||
# in Set-Cookie header and expect the first (old) one to be set rather
|
||||
# than second (new). However, as of RFC6265 the newer one cookie
|
||||
# should be set into cookie store what actually happens.
|
||||
# We will workaround this issue by resetting the cookie to
|
||||
# the first one manually.
|
||||
# 1. https://new.vk.com/
|
||||
# 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
|
||||
# 3. https://learning.oreilly.com/
|
||||
"""
|
||||
Apply first Set-Cookie header instead of the last. Experimental.
|
||||
|
||||
Some sites (e.g. [1-3]) may serve two cookies under the same name
|
||||
in Set-Cookie header and expect the first (old) one to be set rather
|
||||
than second (new). However, as of RFC6265 the newer one cookie
|
||||
should be set into cookie store what actually happens.
|
||||
We will workaround this issue by resetting the cookie to
|
||||
the first one manually.
|
||||
1. https://new.vk.com/
|
||||
2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
|
||||
3. https://learning.oreilly.com/
|
||||
"""
|
||||
for header, cookies in url_handle.headers.items():
|
||||
if header.lower() != 'set-cookie':
|
||||
continue
|
||||
|
@ -1,39 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CriterionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.criterion.com/films/184-le-samourai',
|
||||
'md5': 'bc51beba55685509883a9a7830919ec3',
|
||||
'info_dict': {
|
||||
'id': '184',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Samouraï',
|
||||
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
final_url = self._search_regex(
|
||||
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._search_regex(
|
||||
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -103,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
# should be imposed or not (from what I can see it just takes the first language
|
||||
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
@ -269,6 +256,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
'1080': ('80', '108'),
|
||||
}
|
||||
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
# should be imposed or not (from what I can see it just takes the first language
|
||||
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
iv = bytes_to_intlist(compat_b64decode(iv))
|
||||
@ -661,9 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(url), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||
webpage, 'title')
|
||||
title = self._html_search_meta('name', webpage, default=None)
|
||||
|
||||
episode_paths = re.findall(
|
||||
r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
|
||||
webpage)
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
@ -14,8 +15,8 @@ class CtsNewsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||
'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
@ -26,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
|
||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
@ -62,8 +63,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
video_url = mp4_feed['source_url']
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||
youtube_url = YoutubeIE._extract_url(page)
|
||||
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
|
@ -137,10 +137,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
return list(map(lambda m: unescapeHTML(m[1]), matches))
|
||||
# https://developer.dailymotion.com/player#player-parameters
|
||||
for mobj in re.finditer(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
|
||||
urls.append(unescapeHTML(mobj.group('url')))
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -7,50 +7,51 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
||||
'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
|
||||
'md5': 'b8f850ba1860adbda668d367f9b77699',
|
||||
'info_dict': {
|
||||
'id': '3649835190001',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'id': 'PynxJnNWChE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'timestamp': 1404039863,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'uploader_id': '1027729757001',
|
||||
'upload_date': '20160916',
|
||||
'duration': 69,
|
||||
'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
|
||||
'uploader': 'Dagbladet',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew']
|
||||
'add_ie': ['Youtube']
|
||||
}, {
|
||||
'url': 'http://dbtv.no/3649835190001',
|
||||
'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/vice/5000634109001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
||||
'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
return {
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
if len(video_id) == 11:
|
||||
info.update({
|
||||
'url': video_id,
|
||||
'ie_key': 'Youtube',
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'url': 'jwplatform:' + video_id,
|
||||
'ie_key': 'JWPlatform',
|
||||
})
|
||||
return info
|
||||
|
@ -5,23 +5,17 @@ import re
|
||||
import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:(?:www|go)\.)?discovery|
|
||||
(?:www\.)?
|
||||
(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||
'info_dict': {
|
||||
'id': '5a2d9b4d6b66d17a5026e1fd',
|
||||
'id': '5a2f35ce6b66d17a5026e29e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dave Foley',
|
||||
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
|
||||
'duration': 608,
|
||||
'title': 'Riding with Matthew Perry',
|
||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||
'duration': 84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@ -56,20 +50,20 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# using `show_slug` is important to get the correct video data
|
||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
react_data = self._parse_json(self._search_regex(
|
||||
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
|
||||
webpage, 'react data'), display_id)
|
||||
content_blocks = react_data['layout'][path]['contentBlocks']
|
||||
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
||||
video_id = video['id']
|
||||
site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
@ -79,27 +73,34 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
||||
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
||||
video_id, fatal=False) or {}
|
||||
display_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://%s.com/anonymous' % site, display_id, query={
|
||||
'https://%s.com/anonymous' % site, display_id,
|
||||
'Downloading token JSON metadata', query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
try:
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + 'content/videos',
|
||||
display_id, 'Downloading content JSON metadata',
|
||||
headers=headers, query={
|
||||
'slug': display_id,
|
||||
'show_slug': show_slug,
|
||||
})[0]
|
||||
video_id = video['id']
|
||||
stream = self._download_json(
|
||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||
display_id, headers=headers)
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
|
97
youtube_dl/extractor/dlive.py
Normal file
97
youtube_dl/extractor/dlive.py
Normal file
@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class DLiveVODIE(InfoExtractor):
|
||||
IE_NAME = 'dlive:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
|
||||
'info_dict': {
|
||||
'id': '3mTzOl4WR',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minecraft with james charles epic',
|
||||
'upload_date': '20190701',
|
||||
'timestamp': 1562011015,
|
||||
'uploader_id': 'pdp',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
|
||||
broadcast = self._download_json(
|
||||
'https://graphigo.prd.dlive.tv/', vod_id,
|
||||
data=json.dumps({'query': '''query {
|
||||
pastBroadcast(permlink:"%s+%s") {
|
||||
content
|
||||
createdAt
|
||||
length
|
||||
playbackUrl
|
||||
title
|
||||
thumbnailUrl
|
||||
viewCount
|
||||
}
|
||||
}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
|
||||
title = broadcast['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': vod_id,
|
||||
'title': title,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'description': broadcast.get('content'),
|
||||
'thumbnail': broadcast.get('thumbnailUrl'),
|
||||
'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
|
||||
'view_count': int_or_none(broadcast.get('viewCount')),
|
||||
}
|
||||
|
||||
|
||||
class DLiveStreamIE(InfoExtractor):
|
||||
IE_NAME = 'dlive:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_name = self._match_id(url)
|
||||
user = self._download_json(
|
||||
'https://graphigo.prd.dlive.tv/', display_name,
|
||||
data=json.dumps({'query': '''query {
|
||||
userByDisplayName(displayname:"%s") {
|
||||
livestream {
|
||||
content
|
||||
createdAt
|
||||
title
|
||||
thumbnailUrl
|
||||
watchingCount
|
||||
}
|
||||
username
|
||||
}
|
||||
}''' % display_name}).encode())['data']['userByDisplayName']
|
||||
livestream = user['livestream']
|
||||
title = livestream['title']
|
||||
username = user['username']
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
|
||||
display_name, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': display_name,
|
||||
'title': self._live_title(title),
|
||||
'uploader': display_name,
|
||||
'uploader_id': username,
|
||||
'formats': formats,
|
||||
'description': livestream.get('content'),
|
||||
'thumbnail': livestream.get('thumbnailUrl'),
|
||||
'is_live': True,
|
||||
'timestamp': int_or_none(livestream.get('createdAt'), 1000),
|
||||
'view_count': int_or_none(livestream.get('watchingCount')),
|
||||
}
|
@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['DK']
|
||||
IE_NAME = 'drtv'
|
||||
@ -80,6 +80,9 @@ class DRTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@ -18,7 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com))/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
@ -32,6 +33,9 @@ class EinthusanIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
@ -41,7 +45,9 @@ class EinthusanIE(InfoExtractor):
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@ -53,7 +59,7 @@ class EinthusanIE(InfoExtractor):
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
|
@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
|
||||
'info_dict': {
|
||||
'id': '21846851',
|
||||
'ext': 'mp4',
|
||||
'id': '56032156',
|
||||
'ext': 'flv',
|
||||
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
|
||||
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
|
||||
'timestamp': 1513960621,
|
||||
'upload_date': '20171222',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
|
||||
webpage, 'embed url')
|
||||
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
return self.url_result(embed_url, 'AbcNewsVideo')
|
||||
|
@ -58,17 +58,8 @@ from .ard import (
|
||||
ARDMediathekIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVInfoIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVCinemaIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
TheOperaPlatformIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .asiancrush import (
|
||||
@ -173,6 +164,7 @@ from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
@ -240,7 +232,6 @@ from .condenast import CondeNastIE
|
||||
from .corus import CorusIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollIE,
|
||||
@ -404,11 +395,7 @@ from .frontendmasters import (
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .funimation import FunimationIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
FunkChannelIE,
|
||||
)
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
@ -592,6 +579,7 @@ from .linkedin import (
|
||||
)
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
LiveLeakEmbedIE,
|
||||
@ -745,7 +733,6 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nhl import NHLIE
|
||||
@ -772,13 +759,6 @@ from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
NowVideoIE,
|
||||
VideoWeedIE,
|
||||
WholeCloudIE,
|
||||
)
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
@ -896,12 +876,12 @@ from .polskieradio import (
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
PornHubUserVideosIE,
|
||||
PornHubUserIE,
|
||||
PornHubPagedVideoListIE,
|
||||
PornHubUserVideosUploadIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
@ -946,7 +926,10 @@ from .raywenderlich import (
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
RedBullTVRrnContentIE,
|
||||
)
|
||||
from .reddit import (
|
||||
RedditIE,
|
||||
RedditRIE,
|
||||
@ -985,7 +968,6 @@ from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@ -1273,6 +1255,10 @@ from .udn import UDNEmbedIE
|
||||
from .ufctv import UFCTVIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .dlive import (
|
||||
DLiveVODIE,
|
||||
DLiveStreamIE,
|
||||
)
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
@ -1462,6 +1448,7 @@ from .yahoo import (
|
||||
YahooSearchIE,
|
||||
YahooGyaOPlayerIE,
|
||||
YahooGyaOIE,
|
||||
YahooJapanNewsIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
from .yandexmusic import (
|
||||
|
@ -428,7 +428,7 @@ class FacebookIE(InfoExtractor):
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
|
@ -9,7 +9,7 @@ from ..utils import int_or_none
|
||||
|
||||
class FiveTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
http://
|
||||
https?://
|
||||
(?:www\.)?5-tv\.ru/
|
||||
(?:
|
||||
(?:[^/]+/)+(?P<id>\d+)|
|
||||
@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor):
|
||||
'duration': 180,
|
||||
},
|
||||
}, {
|
||||
# redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
|
||||
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
|
||||
'info_dict': {
|
||||
'id': 'glavnoe',
|
||||
@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor):
|
||||
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'redirect to «Известия. Главное» project page',
|
||||
}, {
|
||||
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
|
||||
'only_matching': True,
|
||||
@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
[r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
|
||||
[r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
|
||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||
webpage, 'video url')
|
||||
|
||||
|
@ -1,89 +1,21 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||
}
|
||||
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||
|
||||
@staticmethod
|
||||
def _make_headers(referer):
|
||||
headers = FunkBaseIE._HEADERS.copy()
|
||||
headers['Referer'] = referer
|
||||
return headers
|
||||
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:%s' % video['sourceId'],
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': video['sourceId'],
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'season_number': int_or_none(video.get('seasonNr')),
|
||||
'episode_number': int_or_none(video.get('episodeNr')),
|
||||
}
|
||||
|
||||
|
||||
class FunkMixIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
|
||||
'md5': '8edf617c2f2b7c9847dfda313f199009',
|
||||
'info_dict': {
|
||||
'id': '123748',
|
||||
'ext': 'mp4',
|
||||
'title': '"Die realste Kifferdoku aller Zeiten"',
|
||||
'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
|
||||
'timestamp': 1490274721,
|
||||
'upload_date': '20170323',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mix_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers=self._make_headers(url), query={
|
||||
'size': 100,
|
||||
})['_embedded']['curatedListList']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas
|
||||
if try_get(
|
||||
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||
compat_str) == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
|
||||
class FunkChannelIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE):
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
|
||||
}, {
|
||||
# only available via byIdList API
|
||||
'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
|
||||
'info_dict': {
|
||||
'id': '205067',
|
||||
'ext': 'mp4',
|
||||
'title': 'Martin Sonneborn erklärt die EU',
|
||||
'description': 'md5:050f74626e4ed87edf4626d2024210c0',
|
||||
'timestamp': 1494424042,
|
||||
'upload_date': '20170510',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
headers = self._make_headers(url)
|
||||
|
||||
video = None
|
||||
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
for page_num in itertools.count():
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
|
||||
headers=headers, query={
|
||||
'filterFsk': 'false',
|
||||
'sort': 'creationDate,desc',
|
||||
'size': 100,
|
||||
'page': page_num,
|
||||
}, fatal=False)
|
||||
if not by_channel_alias:
|
||||
break
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if not video_list:
|
||||
break
|
||||
try:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
break
|
||||
except StopIteration:
|
||||
pass
|
||||
if not try_get(
|
||||
by_channel_alias, lambda x: x['_links']['next']):
|
||||
break
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||
query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
|
||||
if not video:
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
video = next(r for r in results if r.get('alias') == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
display_id, nexx_id = re.match(self._VALID_URL, url).groups()
|
||||
video = self._download_json(
|
||||
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:' + nexx_id,
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': nexx_id,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'channel_id': str_or_none(video.get('channelId')),
|
||||
'display_id': display_id,
|
||||
'tags': video.get('tags'),
|
||||
'thumbnail': video.get('imageUrlLandscape'),
|
||||
}
|
||||
|
@ -1,162 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9',
|
||||
'info_dict': {
|
||||
'id': '0732f586d7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
||||
'thumbnail': r're:^http:.*\.jpg$',
|
||||
'uploader': 'DASjr',
|
||||
'timestamp': 1317904928,
|
||||
'upload_date': '20111006',
|
||||
'duration': 318.3,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||
'info_dict': {
|
||||
'id': 'e402820827',
|
||||
'ext': 'mp4',
|
||||
'title': 'Please Use This Song (Jon Lajoie)',
|
||||
'description': 'Please use this to sell something. www.jonlajoie.com',
|
||||
'thumbnail': r're:^http:.*\.jpg$',
|
||||
'timestamp': 1398988800,
|
||||
'upload_date': '20140502',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
|
||||
if not links:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1',
|
||||
webpage, 'm3u8 url', group='url')
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
source_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
||||
if source_formats:
|
||||
self._sort_formats(source_formats)
|
||||
|
||||
for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
|
||||
for path, ext in links:
|
||||
ff = f.copy()
|
||||
if ff:
|
||||
if ext != 'mp4':
|
||||
ff = dict(
|
||||
[(k, v) for k, v in ff.items()
|
||||
if k in ('height', 'width', 'format_id')])
|
||||
ff.update({
|
||||
'format_id': ff['format_id'].replace('hls', ext),
|
||||
'ext': ext,
|
||||
'protocol': 'http',
|
||||
})
|
||||
else:
|
||||
ff.update({
|
||||
'format_id': '%s-%d' % (ext, bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
ff['url'] = self._proto_relative_url(
|
||||
'%s%d.%s' % (path, bitrate, ext))
|
||||
formats.append(ff)
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
formats.extend(m3u8_formats)
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||
subtitles[src_lang] = [{
|
||||
'ext': src.split('/')[-1],
|
||||
'url': 'http://www.funnyordie.com%s' % src,
|
||||
}]
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp', default=None))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
title, description, thumbnail, duration = [None] * 4
|
||||
|
||||
medium = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
if medium:
|
||||
title = medium.get('title')
|
||||
duration = float_or_none(medium.get('duration'))
|
||||
if not timestamp:
|
||||
timestamp = unified_timestamp(medium.get('publishDate'))
|
||||
|
||||
post = self._parse_json(
|
||||
self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
if post:
|
||||
if not title:
|
||||
title = post.get('name')
|
||||
description = post.get('description')
|
||||
thumbnail = post.get('picture')
|
||||
|
||||
if not title:
|
||||
title = self._og_search_title(webpage)
|
||||
if not description:
|
||||
description = self._og_search_description(webpage)
|
||||
if not duration:
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
('video:duration', 'duration'), webpage, 'duration', default=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -1,35 +1,84 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
|
||||
'id': '3145868',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
|
||||
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
|
||||
'duration': 140.0,
|
||||
'timestamp': 1442589635,
|
||||
'uploader': 'UNIVISON',
|
||||
'upload_date': '20150918',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['Anvato'],
|
||||
}, {
|
||||
'url': 'http://fusion.tv/video/201781',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video.get('excerpt'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
'series': video.get('show'),
|
||||
}
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
||||
formats = []
|
||||
src = video.get('src') or {}
|
||||
for f_id, f in src.items():
|
||||
for q_id, q in f.items():
|
||||
q_url = q.get('url')
|
||||
if not q_url:
|
||||
continue
|
||||
ext = determine_ext(q_url, mimetype2ext(q.get('type')))
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
|
||||
elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '-'.join([f_id, q_id]),
|
||||
'url': q_url,
|
||||
'width': int_or_none(q.get('width')),
|
||||
'height': int_or_none(q.get('height')),
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': 'anvato:uni:' + video['video_ids']['anvato'],
|
||||
'ie_key': 'Anvato',
|
||||
})
|
||||
|
||||
return info
|
||||
|
@ -1,12 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
)
|
||||
|
||||
|
||||
class GameInformerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
|
||||
_TESTS = [{
|
||||
# normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
|
||||
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
|
||||
'md5': '292f26da1ab4beb4c9099f1304d2b071',
|
||||
'info_dict': {
|
||||
@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor):
|
||||
'upload_date': '20150928',
|
||||
'uploader_id': '694940074001',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Brightcove id inside unique element with field--name-field-brightcove-video-id class
|
||||
'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
|
||||
'info_dict': {
|
||||
'id': '6057111913001',
|
||||
'ext': 'mp4',
|
||||
'title': 'New Gameplay Today – Streets Of Rogue',
|
||||
'timestamp': 1562699001,
|
||||
'upload_date': '20190709',
|
||||
'uploader_id': '694940074001',
|
||||
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
brightcove_id = self._search_regex(
|
||||
[r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
|
||||
webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
||||
brightcove_id)
|
||||
brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
|
||||
brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
|
||||
return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)
|
||||
|
@ -2104,6 +2104,23 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
},
|
||||
{
|
||||
# DailyMotion embed with DM.player
|
||||
'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
|
||||
'info_dict': {
|
||||
'id': 'k6aKkGHd9FJs4mtJN39',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
|
||||
'description': 'This video is private.',
|
||||
'uploader_id': 'x1jf30l',
|
||||
'uploader': 'beIN SPORTS USA',
|
||||
'upload_date': '20190528',
|
||||
'timestamp': 1559062971,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2209,7 +2226,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if '/' in url:
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@ -2583,19 +2600,6 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Mpora')
|
||||
|
||||
# Look for embedded NovaMov-based player
|
||||
mobj = re.search(
|
||||
r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
|
||||
(?P<url>http://(?:(?:embed|www)\.)?
|
||||
(?:novamov\.com|
|
||||
nowvideo\.(?:ch|sx|eu|at|ag|co)|
|
||||
videoweed\.(?:es|com)|
|
||||
movshare\.(?:net|sx|ag)|
|
||||
divxstage\.(?:eu|net|ch|co|at|ag))
|
||||
/embed\.php.+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Facebook player
|
||||
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||
if facebook_urls:
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||
'only_matching': True
|
||||
|
@ -34,9 +34,13 @@ class GoIE(AdobePassIE):
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'resource_id': 'DisneyXD',
|
||||
},
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'resource_id': 'Disney',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
_VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
@ -71,6 +75,9 @@ class GoIE(AdobePassIE):
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@ -80,7 +87,9 @@ class GoIE(AdobePassIE):
|
||||
display_id)['video']
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
@ -89,7 +98,7 @@ class GoIE(AdobePassIE):
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
||||
default=None)
|
||||
default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
|
@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# unavailable source format
|
||||
'url': 'kaltura:513551:1_66x4rg7o',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@ -306,12 +311,17 @@ class KalturaIE(InfoExtractor):
|
||||
f['fileExt'] = 'mp4'
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
format_id = '%(fileExt)s-%(bitrate)s' % f
|
||||
# Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
|
||||
if f.get('isOriginal') is True and not self._is_valid_url(
|
||||
video_url, entry_id, format_id):
|
||||
continue
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
# -f mp4-56)
|
||||
vcodec = 'none' if 'videoCodecId' not in f and f.get(
|
||||
'frameRate') == 0 else f.get('videoCodecId')
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'format_id': format_id,
|
||||
'ext': f.get('fileExt'),
|
||||
'tbr': int_or_none(f['bitrate']),
|
||||
'fps': int_or_none(f.get('frameRate')),
|
||||
|
@ -6,8 +6,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@ -19,6 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class LecturioBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
|
||||
_LOGIN_URL = 'https://app.lecturio.com/en/login'
|
||||
_NETRC_MACHINE = 'lecturio'
|
||||
|
||||
@ -67,51 +68,56 @@ class LecturioIE(LecturioBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
|
||||
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
||||
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
||||
'md5': '9a42cf1d8282a6311bf7211bbde26fde',
|
||||
'info_dict': {
|
||||
'id': '39634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
||||
'title': 'Important Concepts and Terms — Introduction to Microbiology',
|
||||
},
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'Arabic': 'ar',
|
||||
'Bulgarian': 'bg',
|
||||
'German': 'de',
|
||||
'English': 'en',
|
||||
'Spanish': 'es',
|
||||
'Persian': 'fa',
|
||||
'French': 'fr',
|
||||
'Japanese': 'ja',
|
||||
'Polish': 'pl',
|
||||
'Pashto': 'ps',
|
||||
'Russian': 'ru',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id') or mobj.group('id_de')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
||||
display_id)
|
||||
|
||||
lecture_id = self._search_regex(
|
||||
r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id')
|
||||
|
||||
api_url = self._search_regex(
|
||||
r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'api url', group='url')
|
||||
|
||||
video = self._download_json(api_url, display_id)
|
||||
|
||||
nt = mobj.group('nt') or mobj.group('nt_de')
|
||||
lecture_id = mobj.group('id')
|
||||
display_id = nt or lecture_id
|
||||
api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + api_path, display_id)
|
||||
title = video['title'].strip()
|
||||
if not lecture_id:
|
||||
pid = video.get('productId') or video.get('uid')
|
||||
if pid:
|
||||
spid = pid.split('_')
|
||||
if spid and len(spid) == 2:
|
||||
lecture_id = spid[1]
|
||||
|
||||
formats = []
|
||||
for format_ in video['content']['media']:
|
||||
@ -129,24 +135,30 @@ class LecturioIE(LecturioBaseIE):
|
||||
continue
|
||||
label = str_or_none(format_.get('label'))
|
||||
filesize = int_or_none(format_.get('fileSize'))
|
||||
formats.append({
|
||||
f = {
|
||||
'url': file_url,
|
||||
'format_id': label,
|
||||
'filesize': float_or_none(filesize, invscale=1000)
|
||||
}
|
||||
if label:
|
||||
mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
|
||||
if mobj:
|
||||
f.update({
|
||||
'format_id': mobj.group(2),
|
||||
'height': int(mobj.group(1)),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
cc = self._parse_json(
|
||||
self._search_regex(
|
||||
r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles',
|
||||
default='{}'), display_id, fatal=False)
|
||||
for cc_label, cc_url in cc.items():
|
||||
cc_url = url_or_none(cc_url)
|
||||
captions = video.get('captions') or []
|
||||
for cc in captions:
|
||||
cc_url = cc.get('url')
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = self._search_regex(
|
||||
cc_label = cc.get('translatedCode')
|
||||
lang = cc.get('languageCode') or self._search_regex(
|
||||
r'/([a-z]{2})_', cc_url, 'lang',
|
||||
default=cc_label.split()[0] if cc_label else 'en')
|
||||
original_lang = self._search_regex(
|
||||
@ -160,7 +172,7 @@ class LecturioIE(LecturioBaseIE):
|
||||
})
|
||||
|
||||
return {
|
||||
'id': lecture_id,
|
||||
'id': lecture_id or nt,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
@ -169,37 +181,40 @@ class LecturioIE(LecturioBaseIE):
|
||||
|
||||
|
||||
class LecturioCourseIE(LecturioBaseIE):
|
||||
_VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
|
||||
'info_dict': {
|
||||
'id': 'microbiology-introduction',
|
||||
'title': 'Microbiology: Introduction',
|
||||
'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
|
||||
},
|
||||
'playlist_count': 45,
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/course/c/6434',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nt, course_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = nt or course_id
|
||||
api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
|
||||
course = self._download_json(
|
||||
self._API_BASE_URL + api_path, display_id)
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>',
|
||||
webpage):
|
||||
params = extract_attributes(mobj.group(0))
|
||||
lecture_url = urljoin(url, params.get('data-url'))
|
||||
lecture_id = params.get('data-id')
|
||||
for lecture in course.get('lectures', []):
|
||||
lecture_id = str_or_none(lecture.get('id'))
|
||||
lecture_url = lecture.get('url')
|
||||
if lecture_url:
|
||||
lecture_url = urljoin(url, lecture_url)
|
||||
else:
|
||||
lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
|
||||
entries.append(self.url_result(
|
||||
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
return self.playlist_result(
|
||||
entries, display_id, course.get('title'),
|
||||
clean_html(course.get('description')))
|
||||
|
||||
|
||||
class LecturioDeCourseIE(LecturioBaseIE):
|
||||
|
@ -326,7 +326,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
elif play_json.get('code'):
|
||||
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
|
||||
else:
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
raise ExtractorError('Letv cloud returned an unknown error')
|
||||
|
||||
def b64decode(s):
|
||||
return compat_b64decode(s).decode('utf-8')
|
||||
|
42
youtube_dl/extractor/livejournal.py
Normal file
42
youtube_dl/extractor/livejournal.py
Normal file
@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveJournalIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
|
||||
'md5': 'adaf018388572ced8a6f301ace49d4b2',
|
||||
'info_dict': {
|
||||
'id': '1263729',
|
||||
'ext': 'mp4',
|
||||
'title': 'Истребители против БПЛА',
|
||||
'upload_date': '20190624',
|
||||
'timestamp': 1561406715,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
record = self._parse_json(self._search_regex(
|
||||
r'Site\.page\s*=\s*({.+?});', webpage,
|
||||
'page data'), video_id)['video']['record']
|
||||
storage_id = compat_str(record['storageid'])
|
||||
title = record.get('name')
|
||||
if title:
|
||||
# remove filename extension(.mp4, .mov, etc...)
|
||||
title = title.rsplit('.', 1)[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': record.get('thumbnail'),
|
||||
'timestamp': int_or_none(record.get('timecreate')),
|
||||
'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
|
||||
'ie_key': 'EaglePlatform',
|
||||
}
|
@ -82,6 +82,10 @@ class LiveLeakIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No original video
|
||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -134,8 +138,10 @@ class LiveLeakIE(InfoExtractor):
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
||||
if self._is_valid_url(orig_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': 'original' + ('-' + format_id if format_id else ''),
|
||||
'format_id': format_id,
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
|
@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Status="NotFound", Message="Transcript not found"
|
||||
'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
@ -247,11 +251,16 @@ class LyndaIE(LyndaBaseIE):
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
subs = self._download_json(url, None, False)
|
||||
subs = self._download_webpage(
|
||||
url, video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
if not subs or 'Status="NotFound"' in subs:
|
||||
return {}
|
||||
subs = self._parse_json(subs, video_id, fatal=False)
|
||||
if not subs:
|
||||
return {}
|
||||
fixed_subs = self._fix_subtitles(subs)
|
||||
if fixed_subs:
|
||||
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
|
@ -79,6 +79,10 @@ class MGTVIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
'protocol': 'm3u8_native',
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'format_note': stream.get('name'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -1,112 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
IE_NAME = 'nfb'
|
||||
IE_DESC = 'National Film Board of Canada'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||
'info_dict': {
|
||||
'id': 'qallunaat_why_white_people_are_funny',
|
||||
'ext': 'flv',
|
||||
'title': 'Qallunaat! Why White People Are Funny ',
|
||||
'description': 'md5:6b8e32dde3abf91e58857b174916620c',
|
||||
'duration': 3128,
|
||||
'creator': 'Mark Sandiford',
|
||||
'uploader': 'Mark Sandiford',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_xml(
|
||||
'https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
video_id, 'Downloading player config XML',
|
||||
data=urlencode_postdata({'getConfig': 'true'}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
|
||||
})
|
||||
|
||||
title, description, thumbnail, duration, uploader, author = [None] * 6
|
||||
thumbnails, formats = [[]] * 2
|
||||
subtitles = {}
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
quality_key = qualities(('low', 'high'))
|
||||
thumbnails = []
|
||||
for asset in media.findall('assets/asset'):
|
||||
asset_url = xpath_text(asset, 'default/url', default=None)
|
||||
if not asset_url:
|
||||
continue
|
||||
quality = asset.get('quality')
|
||||
thumbnails.append({
|
||||
'url': asset_url,
|
||||
'id': quality,
|
||||
'preference': quality_key(quality),
|
||||
})
|
||||
elif media.get('type') == 'video':
|
||||
title = xpath_text(media, 'title', fatal=True)
|
||||
for asset in media.findall('assets/asset'):
|
||||
quality = asset.get('quality')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', quality or '', 'height', default=None))
|
||||
for node in asset:
|
||||
streamer = xpath_text(node, 'streamerURI', default=None)
|
||||
if not streamer:
|
||||
continue
|
||||
play_path = xpath_text(node, 'url', default=None)
|
||||
if not play_path:
|
||||
continue
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'app': streamer.split('/', 3)[3],
|
||||
'play_path': play_path,
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
description = clean_html(xpath_text(media, 'description'))
|
||||
uploader = xpath_text(media, 'author')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
for subtitle in media.findall('./subtitles/subtitle'):
|
||||
subtitle_url = xpath_text(subtitle, 'url', default=None)
|
||||
if not subtitle_url:
|
||||
continue
|
||||
lang = xpath_text(subtitle, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'creator': uploader,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -1,212 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NovaMovIE(InfoExtractor):
|
||||
IE_NAME = 'novamov'
|
||||
IE_DESC = 'NovaMov'
|
||||
|
||||
_VALID_URL_TEMPLATE = r'''(?x)
|
||||
http://
|
||||
(?:
|
||||
(?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
|
||||
(?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
|
||||
)
|
||||
(?P<id>[a-z\d]{13})
|
||||
'''
|
||||
_VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'}
|
||||
|
||||
_HOST = 'www.novamov.com'
|
||||
|
||||
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
|
||||
_FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
|
||||
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
|
||||
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
|
||||
_URL_TEMPLATE = 'http://%s/video/%s'
|
||||
|
||||
_TEST = None
|
||||
|
||||
def _check_existence(self, webpage, video_id):
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = self._URL_TEMPLATE % (self._HOST, video_id)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading video page')
|
||||
|
||||
self._check_existence(webpage, video_id)
|
||||
|
||||
def extract_filekey(default=NO_DEFAULT):
|
||||
filekey = self._search_regex(
|
||||
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
|
||||
if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
|
||||
return self._search_regex(
|
||||
r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
|
||||
else:
|
||||
return filekey
|
||||
|
||||
filekey = extract_filekey(default=None)
|
||||
|
||||
if not filekey:
|
||||
fields = self._hidden_inputs(webpage)
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
|
||||
'post url', default=url, group='url')
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(url, post_url)
|
||||
request = sanitized_Request(
|
||||
post_url, urlencode_postdata(fields))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('Referer', post_url)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading continue to the video page')
|
||||
self._check_existence(webpage, video_id)
|
||||
|
||||
filekey = extract_filekey()
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
|
||||
|
||||
api_response = self._download_webpage(
|
||||
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
|
||||
'Downloading video api response')
|
||||
|
||||
response = compat_urlparse.parse_qs(api_response)
|
||||
|
||||
if 'error_msg' in response:
|
||||
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
|
||||
|
||||
video_url = response['url'][0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description
|
||||
}
|
||||
|
||||
|
||||
class WholeCloudIE(NovaMovIE):
|
||||
IE_NAME = 'wholecloud'
|
||||
IE_DESC = 'WholeCloud'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
|
||||
|
||||
_HOST = 'www.wholecloud.net'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
|
||||
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.wholecloud.net/video/559e28be54d96',
|
||||
'md5': 'abd31a2132947262c50429e1d16c1bfd',
|
||||
'info_dict': {
|
||||
'id': '559e28be54d96',
|
||||
'ext': 'flv',
|
||||
'title': 'dissapeared image',
|
||||
'description': 'optical illusion dissapeared image magic illusion',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class NowVideoIE(NovaMovIE):
|
||||
IE_NAME = 'nowvideo'
|
||||
IE_DESC = 'NowVideo'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
|
||||
|
||||
_HOST = 'www.nowvideo.to'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
|
||||
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
|
||||
'md5': '12c82cad4f2084881d8bc60ee29df092',
|
||||
'info_dict': {
|
||||
'id': 'f1d6fce9a968b',
|
||||
'ext': 'flv',
|
||||
'title': 'youtubedl test video BaWjenozKc',
|
||||
'description': 'Description',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class VideoWeedIE(NovaMovIE):
|
||||
IE_NAME = 'videoweed'
|
||||
IE_DESC = 'VideoWeed'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'}
|
||||
|
||||
_HOST = 'www.videoweed.es'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
|
||||
_URL_TEMPLATE = 'http://%s/file/%s'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.videoweed.es/file/b42178afbea14',
|
||||
'md5': 'abd31a2132947262c50429e1d16c1bfd',
|
||||
'info_dict': {
|
||||
'id': 'b42178afbea14',
|
||||
'ext': 'flv',
|
||||
'title': 'optical illusion dissapeared image magic illusion',
|
||||
'description': ''
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class CloudTimeIE(NovaMovIE):
|
||||
IE_NAME = 'cloudtime'
|
||||
IE_DESC = 'CloudTime'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'}
|
||||
|
||||
_HOST = 'www.cloudtime.to'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
|
||||
|
||||
_TEST = None
|
||||
|
||||
|
||||
class AuroraVidIE(NovaMovIE):
|
||||
IE_NAME = 'auroravid'
|
||||
IE_DESC = 'AuroraVid'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'}
|
||||
|
||||
_HOST = 'www.auroravid.to'
|
||||
|
||||
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
|
||||
'md5': '7205f346a52bbeba427603ba10d4b935',
|
||||
'info_dict': {
|
||||
'id': '4rurhn9x446jj',
|
||||
'ext': 'flv',
|
||||
'title': 'search engine optimization',
|
||||
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
||||
},
|
||||
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
|
||||
}, {
|
||||
'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
|
||||
'only_matching': True,
|
||||
}]
|
File diff suppressed because it is too large
Load Diff
@ -5,26 +5,27 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
# compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
# remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
# urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PacktPubBaseIE(InfoExtractor):
|
||||
_PACKT_BASE = 'https://www.packtpub.com'
|
||||
_MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
|
||||
# _PACKT_BASE = 'https://www.packtpub.com'
|
||||
_STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/'
|
||||
|
||||
|
||||
class PacktPubIE(PacktPubBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
|
||||
@ -40,6 +41,9 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
}, {
|
||||
'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_NETRC_MACHINE = 'packtpub'
|
||||
_TOKEN = None
|
||||
@ -50,9 +54,9 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
return
|
||||
try:
|
||||
self._TOKEN = self._download_json(
|
||||
self._MAPT_REST + '/users/tokens', None,
|
||||
'https://services.packtpub.com/auth-v1/users/tokens', None,
|
||||
'Downloading Authorization Token', data=json.dumps({
|
||||
'email': username,
|
||||
'username': username,
|
||||
'password': password,
|
||||
}).encode())['data']['access']
|
||||
except ExtractorError as e:
|
||||
@ -61,54 +65,40 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
def _handle_error(self, response):
|
||||
if response.get('status') != 'success':
|
||||
raise ExtractorError(
|
||||
'% said: %s' % (self.IE_NAME, response['message']),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
response = super(PacktPubIE, self)._download_json(*args, **kwargs)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, chapter_id, video_id = mobj.group(
|
||||
'course_id', 'chapter_id', 'id')
|
||||
course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Bearer ' + self._TOKEN
|
||||
video = self._download_json(
|
||||
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
||||
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
||||
try:
|
||||
video_url = self._download_json(
|
||||
'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id,
|
||||
'Downloading JSON video', headers=headers)['data']
|
||||
|
||||
content = video.get('content')
|
||||
if not content:
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
self.raise_login_required('This video is locked')
|
||||
raise
|
||||
|
||||
video_url = content['file']
|
||||
# TODO: find a better way to avoid duplicating course requests
|
||||
# metadata = self._download_json(
|
||||
# '%s/products/%s/chapters/%s/sections/%s/metadata'
|
||||
# % (self._MAPT_REST, course_id, chapter_id, video_id),
|
||||
# video_id)['data']
|
||||
|
||||
metadata = self._download_json(
|
||||
'%s/products/%s/chapters/%s/sections/%s/metadata'
|
||||
% (self._MAPT_REST, course_id, chapter_id, video_id),
|
||||
video_id)['data']
|
||||
|
||||
title = metadata['pageTitle']
|
||||
course_title = metadata.get('title')
|
||||
if course_title:
|
||||
title = remove_end(title, ' - %s' % course_title)
|
||||
timestamp = unified_timestamp(metadata.get('publicationDate'))
|
||||
thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
|
||||
# title = metadata['pageTitle']
|
||||
# course_title = metadata.get('title')
|
||||
# if course_title:
|
||||
# title = remove_end(title, ' - %s' % course_title)
|
||||
# timestamp = unified_timestamp(metadata.get('publicationDate'))
|
||||
# thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'title': display_id or video_id, # title,
|
||||
# 'thumbnail': thumbnail,
|
||||
# 'timestamp': timestamp,
|
||||
}
|
||||
|
||||
|
||||
@ -119,6 +109,7 @@ class PacktPubCourseIE(PacktPubBaseIE):
|
||||
'info_dict': {
|
||||
'id': '9781787122215',
|
||||
'title': 'Learn Nodejs by building 12 projects [Video]',
|
||||
'description': 'md5:489da8d953f416e51927b60a1c7db0aa',
|
||||
},
|
||||
'playlist_count': 90,
|
||||
}, {
|
||||
@ -136,35 +127,38 @@ class PacktPubCourseIE(PacktPubBaseIE):
|
||||
url, course_id = mobj.group('url', 'id')
|
||||
|
||||
course = self._download_json(
|
||||
'%s/products/%s/metadata' % (self._MAPT_REST, course_id),
|
||||
course_id)['data']
|
||||
self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id)
|
||||
metadata = self._download_json(
|
||||
self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id,
|
||||
course_id, fatal=False) or {}
|
||||
|
||||
entries = []
|
||||
for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
|
||||
if chapter.get('type') != 'chapter':
|
||||
continue
|
||||
children = chapter.get('children')
|
||||
if not isinstance(children, list):
|
||||
for chapter_num, chapter in enumerate(course['chapters'], 1):
|
||||
chapter_id = str_or_none(chapter.get('id'))
|
||||
sections = chapter.get('sections')
|
||||
if not chapter_id or not isinstance(sections, list):
|
||||
continue
|
||||
chapter_info = {
|
||||
'chapter': chapter.get('title'),
|
||||
'chapter_number': chapter_num,
|
||||
'chapter_id': chapter.get('id'),
|
||||
'chapter_id': chapter_id,
|
||||
}
|
||||
for section in children:
|
||||
if section.get('type') != 'section':
|
||||
continue
|
||||
section_url = section.get('seoUrl')
|
||||
if not isinstance(section_url, compat_str):
|
||||
for section in sections:
|
||||
section_id = str_or_none(section.get('id'))
|
||||
if not section_id or section.get('contentType') != 'video':
|
||||
continue
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': urljoin(url + '/', section_url),
|
||||
'url': '/'.join([url, chapter_id, section_id]),
|
||||
'title': strip_or_none(section.get('title')),
|
||||
'description': clean_html(section.get('summary')),
|
||||
'thumbnail': metadata.get('coverImage'),
|
||||
'timestamp': unified_timestamp(metadata.get('publicationDate')),
|
||||
'ie_key': PacktPubIE.ie_key(),
|
||||
}
|
||||
entry.update(chapter_info)
|
||||
entries.append(entry)
|
||||
|
||||
return self.playlist_result(entries, course_id, course.get('title'))
|
||||
return self.playlist_result(
|
||||
entries, course_id, metadata.get('title'),
|
||||
clean_html(metadata.get('about')))
|
||||
|
@ -168,7 +168,7 @@ class PeerTubeIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_peertube_url(webpage, source_url):
|
||||
mobj = re.match(
|
||||
r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
|
||||
r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
|
||||
% PeerTubeIE._UUID_RE, source_url)
|
||||
if mobj and any(p in webpage for p in (
|
||||
'<title>PeerTube<',
|
||||
|
@ -14,7 +14,7 @@ class PhilharmonieDeParisIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)|
|
||||
live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)|
|
||||
pad\.philharmoniedeparis\.fr/doc/CIMU/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@ -40,6 +40,12 @@ class PhilharmonieDeParisIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_LIVE_URL = 'https://live.philharmoniedeparis.fr'
|
||||
|
||||
|
@ -39,7 +39,12 @@ class Porn91IE(InfoExtractor):
|
||||
r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
|
||||
title = title.replace('\n', '')
|
||||
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
video_link_url = self._search_regex(
|
||||
r'<textarea[^>]+id=["\']fm-video_link[^>]+>([^<]+)</textarea>',
|
||||
webpage, 'video link')
|
||||
videopage = self._download_webpage(video_link_url, video_id)
|
||||
|
||||
info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0]
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
|
||||
|
@ -1,101 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class PornFlipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
|
||||
'md5': '98c46639849145ae1fd77af532a9278c',
|
||||
'info_dict': {
|
||||
'id': 'wz7DfNhMmep',
|
||||
'ext': 'mp4',
|
||||
'title': '2 Amateurs swallow make his dream cumshots true',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 112,
|
||||
'timestamp': 1481655502,
|
||||
'upload_date': '20161213',
|
||||
'uploader_id': '106786',
|
||||
'uploader': 'figifoto',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.pornflip.com/v/%s' % video_id, video_id)
|
||||
|
||||
flashvars = compat_parse_qs(self._search_regex(
|
||||
r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
|
||||
webpage, 'flashvars', group='flashvars'))
|
||||
|
||||
title = flashvars['video_vars[title]'][0]
|
||||
|
||||
def flashvar(kind):
|
||||
return try_get(
|
||||
flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
|
||||
|
||||
formats = []
|
||||
for key, value in flashvars.items():
|
||||
if not (value and isinstance(value, list)):
|
||||
continue
|
||||
format_url = value[0]
|
||||
if key == 'video_vars[hds_manifest]':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-%s' % height,
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
(r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
|
||||
r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
|
||||
webpage, 'uploader', fatal=False, group='uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': flashvar('big_thumb'),
|
||||
'duration': int_or_none(flashvar('duration')),
|
||||
'timestamp': unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp')),
|
||||
'uploader_id': flashvar('author_id'),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(flashvar('views')),
|
||||
'age_limit': 18,
|
||||
}
|
@ -170,7 +170,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
def dl_webpage(platform):
|
||||
self._set_cookie(host, 'platform', platform)
|
||||
return self._download_webpage(
|
||||
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
|
||||
'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
|
||||
video_id, 'Downloading %s webpage' % platform)
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
@ -372,37 +372,92 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
entries, playlist_id, title, playlist.get('description'))
|
||||
|
||||
|
||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)'
|
||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/4667351',
|
||||
'info_dict': {
|
||||
'id': '4667351',
|
||||
'title': 'Nataly Hot',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious',
|
||||
'info_dict': {
|
||||
'id': 'liz-vicious',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/users/russianveet69',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/channels/povd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('id')
|
||||
return self.url_result(
|
||||
'%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
|
||||
video_id=user_id)
|
||||
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
||||
|
||||
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
item_id = mobj.group('id')
|
||||
|
||||
page = int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None))
|
||||
|
||||
page_url = self._make_page_url(url)
|
||||
|
||||
entries = []
|
||||
for page_num in (page, ) if page is not None else itertools.count(1):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
page_url, item_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
break
|
||||
raise
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend(page_entries)
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
return self.playlist_result(orderedSet(entries), item_id)
|
||||
|
||||
|
||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
'id': 'zoe_ph',
|
||||
},
|
||||
'playlist_mincount': 171,
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/rushandlia/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
|
||||
'info_dict': {
|
||||
'id': 'pornstar/jenny-blighe/videos',
|
||||
},
|
||||
'playlist_mincount': 149,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
|
||||
'info_dict': {
|
||||
'id': 'pornstar/jenny-blighe/videos',
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}, {
|
||||
# default sorting as Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos',
|
||||
'info_dict': {
|
||||
'id': 'povd',
|
||||
'id': 'channels/povd/videos',
|
||||
},
|
||||
'playlist_mincount': 293,
|
||||
}, {
|
||||
@ -421,31 +476,107 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
|
||||
# Most Viewed Videos
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
# Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Longest Videos
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Newest Videos
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/video',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/video?page=3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/video/search?search=123',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/categories/teen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/categories/teen?page=3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/hd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/hd?page=3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/described-video',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/described-video?page=2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/playlist/44121572',
|
||||
'info_dict': {
|
||||
'id': 'playlist/44121572',
|
||||
},
|
||||
'playlist_mincount': 132,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
|
||||
else super(PornHubPagedVideoListIE, cls).suitable(url))
|
||||
|
||||
def _make_page_url(self, url):
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _has_more(webpage):
|
||||
return re.search(
|
||||
r'''(?x)
|
||||
<li[^>]+\bclass=["\']page_next|
|
||||
<link[^>]+\brel=["\']next|
|
||||
<button[^>]+\bid=["\']moreDataBtn
|
||||
''', webpage) is not None
|
||||
|
||||
|
||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'info_dict': {
|
||||
'id': 'jenny-blighe',
|
||||
},
|
||||
'playlist_mincount': 129,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _make_page_url(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
user_id = mobj.group('id')
|
||||
return '%s/ajax' % mobj.group('url')
|
||||
|
||||
entries = []
|
||||
for page_num in itertools.count(1):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
break
|
||||
raise
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend(page_entries)
|
||||
|
||||
return self.playlist_result(entries, user_id)
|
||||
@staticmethod
|
||||
def _has_more(webpage):
|
||||
return True
|
||||
|
@ -16,6 +16,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_ACCESS_ID = None
|
||||
_SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
|
||||
_V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
|
||||
|
||||
def _extract_video_info(self, url, clip_id):
|
||||
client_location = url
|
||||
|
||||
@ -31,7 +36,43 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
if video.get('is_protected') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
duration = float_or_none(video.get('duration'))
|
||||
formats = []
|
||||
if self._ACCESS_ID:
|
||||
raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
|
||||
server_token = (self._download_json(
|
||||
self._V4_BASE_URL + 'protocols', clip_id,
|
||||
'Downloading protocols JSON',
|
||||
headers=self.geo_verification_headers(), query={
|
||||
'access_id': self._ACCESS_ID,
|
||||
'client_token': sha1((raw_ct).encode()).hexdigest(),
|
||||
'video_id': clip_id,
|
||||
}, fatal=False) or {}).get('server_token')
|
||||
if server_token:
|
||||
urls = (self._download_json(
|
||||
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
|
||||
'access_id': self._ACCESS_ID,
|
||||
'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
|
||||
'protocols': self._SUPPORTED_PROTOCOLS,
|
||||
'server_token': server_token,
|
||||
'video_id': clip_id,
|
||||
}, fatal=False) or {}).get('urls') or {}
|
||||
for protocol, variant in urls.items():
|
||||
source_url = variant.get('clear', {}).get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
if protocol == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, clip_id, mpd_id=protocol, fatal=False))
|
||||
elif protocol == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=protocol, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': protocol,
|
||||
})
|
||||
if not formats:
|
||||
source_ids = [compat_str(source['id']) for source in video['sources']]
|
||||
|
||||
client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
@ -52,7 +93,6 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
return None
|
||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
formats = []
|
||||
for source_id in source_ids:
|
||||
client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
urls = self._download_json(
|
||||
@ -117,7 +157,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'duration': duration,
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@ -344,6 +384,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
_TOKEN = 'prosieben'
|
||||
_SALT = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
_CLIENT_NAME = 'kolibri-2.0.19-splec4'
|
||||
|
||||
_ACCESS_ID = 'x_prosiebenmaxx-de'
|
||||
_ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
|
||||
_IV = 'Aeluchoc6aevechuipiexeeboowedaok'
|
||||
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
|
@ -104,3 +104,25 @@ class RedBullTVIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class RedBullTVRrnContentIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._og_search_url(webpage)
|
||||
|
||||
return self.url_result(
|
||||
video_url, ie=RedBullTVIE.ie_key(),
|
||||
video_id=RedBullTVIE._match_id(video_url))
|
||||
|
@ -4,32 +4,34 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
str_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class RoosterTeethIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
|
||||
_LOGIN_URL = 'https://roosterteeth.com/login'
|
||||
_NETRC_MACHINE = 'roosterteeth'
|
||||
_TESTS = [{
|
||||
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'md5': 'e2bd7764732d785ef797700a2489f212',
|
||||
'info_dict': {
|
||||
'id': '26576',
|
||||
'id': '9156',
|
||||
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'ext': 'mp4',
|
||||
'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
|
||||
'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
|
||||
'title': 'Million Dollars, But... The Game Announcement',
|
||||
'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
|
||||
'thumbnail': r're:^https?://.*\.png$',
|
||||
'series': 'Million Dollars, But...',
|
||||
'episode': 'Million Dollars, But... The Game Announcement',
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
|
||||
@ -47,6 +49,9 @@ class RoosterTeethIE(InfoExtractor):
|
||||
# only available for FIRST members
|
||||
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@ -89,60 +94,55 @@ class RoosterTeethIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
episode = strip_or_none(unescapeHTML(self._search_regex(
|
||||
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
|
||||
default=None, group='title')))
|
||||
|
||||
title = strip_or_none(self._og_search_title(
|
||||
webpage, default=None)) or episode
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
|
||||
webpage, 'm3u8 url', default=None, group='url')
|
||||
|
||||
if not m3u8_url:
|
||||
if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
|
||||
try:
|
||||
m3u8_url = self._download_json(
|
||||
api_episode_url + '/videos', display_id,
|
||||
'Downloading video JSON metadata')['data'][0]['attributes']['url']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
|
||||
self.raise_login_required(
|
||||
'%s is only available for FIRST members' % display_id)
|
||||
|
||||
if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
|
||||
self.raise_login_required('%s is not available yet' % display_id)
|
||||
|
||||
raise ExtractorError('Unable to extract m3u8 URL')
|
||||
raise
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
|
||||
episode = self._download_json(
|
||||
api_episode_url, display_id,
|
||||
'Downloading episode JSON metadata')['data'][0]
|
||||
attributes = episode['attributes']
|
||||
title = attributes.get('title') or attributes['display_title']
|
||||
video_id = compat_str(episode['id'])
|
||||
|
||||
series = self._search_regex(
|
||||
(r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
|
||||
webpage, 'series', fatal=False)
|
||||
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>Comments \((\d+)\)<', webpage,
|
||||
'comment count', fatal=False))
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'containerId\s*=\s*["\']episode-(\d+)\1',
|
||||
r'<div[^<]+id=["\']episode-(\d+)'), webpage,
|
||||
'video id', default=display_id)
|
||||
thumbnails = []
|
||||
for image in episode.get('included', {}).get('images', []):
|
||||
if image.get('type') == 'episode_image':
|
||||
img_attributes = image.get('attributes') or {}
|
||||
for k in ('thumb', 'small', 'medium', 'large'):
|
||||
img_url = img_attributes.get(k)
|
||||
if img_url:
|
||||
thumbnails.append({
|
||||
'id': k,
|
||||
'url': img_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'comment_count': comment_count,
|
||||
'description': attributes.get('description') or attributes.get('caption'),
|
||||
'thumbnails': thumbnails,
|
||||
'series': attributes.get('show_title'),
|
||||
'season_number': int_or_none(attributes.get('season_number')),
|
||||
'season_id': attributes.get('season_id'),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(attributes.get('number')),
|
||||
'episode_id': str_or_none(episode.get('uuid')),
|
||||
'formats': formats,
|
||||
'channel_id': attributes.get('channel_id'),
|
||||
'duration': int_or_none(attributes.get('length')),
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ class RtlNlIE(InfoExtractor):
|
||||
'duration': 1167.96,
|
||||
},
|
||||
}, {
|
||||
# best format avaialble a3t
|
||||
# best format available a3t
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||
'info_dict': {
|
||||
|
@ -1,9 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class RTPIE(InfoExtractor):
|
||||
@ -18,10 +20,6 @@ class RTPIE(InfoExtractor):
|
||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||
'only_matching': True,
|
||||
@ -33,57 +31,36 @@ class RTPIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, display_name='title', fatal=True)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
player_config = self._search_regex(
|
||||
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
|
||||
config = self._parse_json(player_config, video_id)
|
||||
|
||||
path, ext = config.get('file').rsplit('.', 1)
|
||||
formats = [{
|
||||
'format_id': 'rtmp',
|
||||
'ext': ext,
|
||||
'vcodec': config.get('type') == 'audio' and 'none' or None,
|
||||
'preference': -2,
|
||||
'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
|
||||
'app': config.get('application'),
|
||||
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
|
||||
'page_url': url,
|
||||
'rtmp_live': config.get('live', False),
|
||||
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
|
||||
'rtmp_real_time': True,
|
||||
}]
|
||||
|
||||
# Construct regular HTTP download URLs
|
||||
replacements = {
|
||||
'audio': {
|
||||
'format_id': 'mp3',
|
||||
'pattern': r'^nas2\.share/wavrss/',
|
||||
'repl': 'http://rsspod.rtp.pt/podcasts/',
|
||||
'vcodec': 'none',
|
||||
},
|
||||
'video': {
|
||||
'format_id': 'mp4_h264',
|
||||
'pattern': r'^nas2\.share/h264/',
|
||||
'repl': 'http://rsspod.rtp.pt/videocasts/',
|
||||
'vcodec': 'h264',
|
||||
},
|
||||
}
|
||||
r = replacements[config['type']]
|
||||
if re.match(r['pattern'], config['file']) is not None:
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'(?s)RTPPlayer\(({.+?})\);', webpage,
|
||||
'player config'), video_id, js_to_json)
|
||||
file_url = config['file']
|
||||
ext = determine_ext(file_url)
|
||||
if ext == 'm3u8':
|
||||
file_key = config.get('fileKey')
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=file_key)
|
||||
if file_key:
|
||||
formats.append({
|
||||
'format_id': r['format_id'],
|
||||
'url': re.sub(r['pattern'], r['repl'], config['file']),
|
||||
'vcodec': r['vcodec'],
|
||||
'url': 'https://cdn-ondemand.rtp.pt' + file_key,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
}]
|
||||
if config.get('mediaType') == 'audio':
|
||||
for f in formats:
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'description': self._html_search_meta(['description', 'twitter:description'], webpage),
|
||||
'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@ -1,53 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
get_element_by_class,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class RudoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rudo.video/vod/oTzw0MGnyG',
|
||||
'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
|
||||
'info_dict': {
|
||||
'id': 'oTzw0MGnyG',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comentario Tomás Mosciatti',
|
||||
'upload_date': '20160617',
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
|
||||
|
||||
jwplayer_data = self._parse_json(self._search_regex(
|
||||
r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
|
||||
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'upload_date': unified_strdate(get_element_by_class('date', webpage)),
|
||||
})
|
||||
|
||||
return info_dict
|
@ -3,8 +3,11 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@ -22,10 +25,8 @@ class SharedBaseIE(InfoExtractor):
|
||||
|
||||
video_url = self._extract_video_url(webpage, video_id, url)
|
||||
|
||||
title = compat_b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False))
|
||||
title = self._extract_title(webpage)
|
||||
filesize = int_or_none(self._extract_filesize(webpage))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -35,6 +36,14 @@ class SharedBaseIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
def _extract_title(self, webpage):
|
||||
return compat_b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title')).decode('utf-8')
|
||||
|
||||
def _extract_filesize(self, webpage):
|
||||
return self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False)
|
||||
|
||||
|
||||
class SharedIE(SharedBaseIE):
|
||||
IE_DESC = 'shared.sx'
|
||||
@ -82,11 +91,27 @@ class VivoIE(SharedBaseIE):
|
||||
'id': 'd7ddda0e78',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken',
|
||||
'filesize': 528031,
|
||||
'filesize': 515659,
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_video_url(self, webpage, video_id, *args):
|
||||
def _extract_title(self, webpage):
|
||||
title = self._html_search_regex(
|
||||
r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
|
||||
'title', default=None, group='title')
|
||||
if title:
|
||||
ext = determine_ext(title)
|
||||
if ext.lower() in KNOWN_EXTENSIONS:
|
||||
title = title.rpartition('.' + ext)[0]
|
||||
return title
|
||||
return self._og_search_title(webpage)
|
||||
|
||||
def _extract_filesize(self, webpage):
|
||||
return parse_filesize(self._search_regex(
|
||||
r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
|
||||
webpage, 'filesize', fatal=False))
|
||||
|
||||
def _extract_video_url(self, webpage, video_id, url):
|
||||
def decode_url(encoded_url):
|
||||
return compat_b64decode(encoded_url).decode('utf-8')
|
||||
|
||||
|
@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
@ -35,6 +35,9 @@ class SixPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -43,6 +46,7 @@ class SixPlayIE(InfoExtractor):
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
|
||||
'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
|
@ -197,7 +197,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# not avaialble via api.soundcloud.com/i1/tracks/id/streams
|
||||
# not available via api.soundcloud.com/i1/tracks/id/streams
|
||||
{
|
||||
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
|
||||
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
|
||||
@ -221,7 +221,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7'
|
||||
_CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
|
@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor):
|
||||
'description': 'dillion harper masturbates on a bed',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'silly2587',
|
||||
'timestamp': 1422571989,
|
||||
'upload_date': '20150129',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@ -106,31 +109,36 @@ class SpankBangIE(InfoExtractor):
|
||||
|
||||
for format_id, format_url in stream.items():
|
||||
if format_id.startswith(STREAM_URL_PREFIX):
|
||||
if format_url and isinstance(format_url, list):
|
||||
format_url = format_url[0]
|
||||
extract_format(
|
||||
format_id[len(STREAM_URL_PREFIX):], format_url)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
|
||||
description = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'class="user"[^>]*><img[^>]+>([^<]+)',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
|
||||
r'class="user"[^>]*><img[^>]+>([^<]+)'),
|
||||
webpage, 'uploader', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
webpage, 'duration', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
|
||||
r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': title or video_id,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
@ -138,7 +146,8 @@ class SpankBangIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
}, info
|
||||
)
|
||||
|
||||
|
||||
class SpankBangPlaylistIE(InfoExtractor):
|
||||
|
@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
|
||||
|
@ -106,7 +106,16 @@ class SRGSSRIE(InfoExtractor):
|
||||
|
||||
class SRGSSRPlayIE(InfoExtractor):
|
||||
IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
|
||||
_VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+\?id=(?P<id>[0-9a-f\-]{36}|\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|play)\.)?
|
||||
(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
|
||||
(?:
|
||||
[^/]+/(?P<type>video|audio)/[^?]+|
|
||||
popup(?P<type_2>video|audio)player
|
||||
)
|
||||
\?id=(?P<id>[0-9a-f\-]{36}|\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||
@ -163,9 +172,15 @@ class SRGSSRPlayIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
bu = mobj.group('bu')
|
||||
media_type = mobj.group('type') or mobj.group('type_2')
|
||||
media_id = mobj.group('id')
|
||||
# other info can be extracted from url + '&layout=json'
|
||||
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
|
||||
|
@ -45,7 +45,7 @@ class StreamcloudIE(InfoExtractor):
|
||||
value="([^"]*)"
|
||||
''', orig_webpage)
|
||||
|
||||
self._sleep(12, video_id)
|
||||
self._sleep(6, video_id)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, data=urlencode_postdata(fields), headers={
|
||||
|
@ -5,8 +5,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
@ -20,7 +24,7 @@ class TEDIE(InfoExtractor):
|
||||
(?P<proto>https?://)
|
||||
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
|
||||
(
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
(?P<type_playlist>playlists(?:/(?P<playlist_id>\d+))?) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
|
|
||||
@ -84,6 +88,7 @@ class TEDIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '10',
|
||||
'title': 'Who are the hackers?',
|
||||
'description': 'md5:49a0dbe8fb76d81a0e64b4a80af7f15a'
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
@ -128,7 +133,7 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
def _extract_info(self, webpage):
|
||||
info_json = self._search_regex(
|
||||
r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
|
||||
r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*</script>',
|
||||
webpage, 'info json')
|
||||
return json.loads(info_json)
|
||||
|
||||
@ -150,22 +155,22 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, name,
|
||||
'Downloading playlist webpage')
|
||||
info = self._extract_info(webpage)
|
||||
|
||||
playlist_info = try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['playlist'],
|
||||
dict) or info['playlist']
|
||||
playlist_entries = []
|
||||
for entry in re.findall(r'(?s)<[^>]+data-ga-context=["\']playlist["\'][^>]*>', webpage):
|
||||
attrs = extract_attributes(entry)
|
||||
entry_url = compat_urlparse.urljoin(url, attrs['href'])
|
||||
playlist_entries.append(self.url_result(entry_url, self.ie_key()))
|
||||
|
||||
final_url = self._og_search_url(webpage, fatal=False)
|
||||
playlist_id = (
|
||||
re.match(self._VALID_URL, final_url).group('playlist_id')
|
||||
if final_url else None)
|
||||
|
||||
playlist_entries = [
|
||||
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
for talk in try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['talks'],
|
||||
dict) or info['talks']
|
||||
]
|
||||
return self.playlist_result(
|
||||
playlist_entries,
|
||||
playlist_id=compat_str(playlist_info['id']),
|
||||
playlist_title=playlist_info['title'])
|
||||
playlist_entries, playlist_id=playlist_id,
|
||||
playlist_title=self._og_search_title(webpage, fatal=False),
|
||||
playlist_description=self._og_search_description(webpage))
|
||||
|
||||
def _talk_info(self, url, video_name):
|
||||
webpage = self._download_webpage(url, video_name)
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
@ -43,12 +44,49 @@ class TF1IE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html',
|
||||
'info_dict': {
|
||||
'id': '13641379',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:f392bc52245dc5ad43771650c96fb620',
|
||||
'description': 'md5:44bc54f0a21322f5b91d68e76a544eae',
|
||||
'upload_date': '20190611',
|
||||
},
|
||||
'params': {
|
||||
# Sometimes wat serves the whole file with the --test option
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
wat_id = None
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|</script>)', webpage,
|
||||
'data', default='{}'), video_id, fatal=False)
|
||||
|
||||
if data:
|
||||
try:
|
||||
wat_id = next(
|
||||
video.get('streamId')
|
||||
for key, video in data.items()
|
||||
if isinstance(video, dict)
|
||||
and video.get('slug') == video_id)
|
||||
if not isinstance(wat_id, compat_str) or not wat_id.isdigit():
|
||||
wat_id = None
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
if not wat_id:
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
|
||||
(r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
|
||||
r'(["\']?)streamId\1\s*:\s*(["\']?)(?P<id>\d+)\2'),
|
||||
webpage, 'wat id', group='id')
|
||||
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
@ -38,7 +38,7 @@ class TouTvIE(RadioCanadaIE):
|
||||
'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36'
|
||||
_CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4'
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
|
@ -9,6 +9,8 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/video/sokrat/',
|
||||
'md5': '36514aed3657d4f70b4b2cef8eb520cd',
|
||||
'info_dict': {
|
||||
'id': '1848932',
|
||||
'display_id': 'sokrat',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Сократ',
|
||||
'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
|
||||
'duration': 6586,
|
||||
@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
|
||||
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||
'info_dict': {
|
||||
'id': '5142516',
|
||||
'ext': 'flv',
|
||||
@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
(r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
|
||||
r'var\s+cloudId\s*=\s*["\'](\d+)',
|
||||
r'cloudId\s*=\s*["\'](\d+)',
|
||||
r'class="video-preview current_playing" id="(\d+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
@ -90,20 +90,39 @@ class TvigleIE(InfoExtractor):
|
||||
age_limit = parse_age_limit(item.get('ageRestrictions'))
|
||||
|
||||
formats = []
|
||||
for vcodec, fmts in item['videos'].items():
|
||||
for vcodec, url_or_fmts in item['videos'].items():
|
||||
if vcodec == 'hls':
|
||||
m3u8_url = url_or_none(url_or_fmts)
|
||||
if not m3u8_url:
|
||||
continue
|
||||
for format_id, video_url in fmts.items():
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif vcodec == 'dash':
|
||||
mpd_url = url_or_none(url_or_fmts)
|
||||
if not mpd_url:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
if not isinstance(url_or_fmts, dict):
|
||||
continue
|
||||
for format_id, video_url in url_or_fmts.items():
|
||||
if format_id == 'm3u8':
|
||||
continue
|
||||
video_url = url_or_none(video_url)
|
||||
if not video_url:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
filesize = int_or_none(try_get(
|
||||
item, lambda x: x['video_files_size'][vcodec][format_id]))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%s-%s' % (vcodec, format_id),
|
||||
'vcodec': vcodec,
|
||||
'height': int_or_none(height),
|
||||
'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)),
|
||||
'filesize': filesize,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -1,32 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from .spike import ParamountNetworkIE
|
||||
|
||||
|
||||
class TVLandIE(MTVServicesInfoExtractor):
|
||||
class TVLandIE(ParamountNetworkIE):
|
||||
IE_NAME = 'tvland.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
|
||||
_TESTS = [{
|
||||
# Geo-restricted. Without a proxy metadata are still there. With a
|
||||
# proxy it redirects to http://m.tvland.com/app/
|
||||
'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
|
||||
'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19',
|
||||
'info_dict': {
|
||||
'description': 'md5:80973e81b916a324e05c14a3fb506d29',
|
||||
'title': 'The Invasion',
|
||||
'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a',
|
||||
'title': 'The Dog',
|
||||
},
|
||||
'playlist': [],
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
|
||||
'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6',
|
||||
'md5': 'e2c6389401cf485df26c79c247b08713',
|
||||
'info_dict': {
|
||||
'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
|
||||
'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757',
|
||||
'ext': 'mp4',
|
||||
'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies',
|
||||
'description': 'md5:7d192f56ca8d958645c83f0de8ef0269',
|
||||
'upload_date': '20151228',
|
||||
'timestamp': 1451289600,
|
||||
'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6',
|
||||
'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2',
|
||||
'upload_date': '20190430',
|
||||
'timestamp': 1556658000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
@ -17,9 +18,21 @@ class TVN24IE(InfoExtractor):
|
||||
'id': '1584444',
|
||||
'ext': 'mp4',
|
||||
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
|
||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
|
||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
|
||||
'thumbnail': 're:https?://.*[.]jpeg',
|
||||
}
|
||||
}, {
|
||||
# different layout
|
||||
'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
|
||||
'info_dict': {
|
||||
'id': '1771763',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
|
||||
'thumbnail': 're:https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
|
||||
'only_matching': True,
|
||||
@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
|
||||
webpage, 'title')
|
||||
|
||||
def extract_json(attr, name, fatal=True):
|
||||
def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
|
||||
name, group='json', fatal=fatal) or '{}',
|
||||
video_id, transform_source=unescapeHTML, fatal=fatal)
|
||||
name, group='json', default=default, fatal=fatal) or '{}',
|
||||
display_id, transform_source=unescapeHTML, fatal=fatal)
|
||||
|
||||
quality_data = extract_json('data-quality', 'formats')
|
||||
|
||||
@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
|
||||
'thumbnail', group='url')
|
||||
|
||||
video_id = None
|
||||
|
||||
share_params = extract_json(
|
||||
'data-share-params', 'share params', fatal=False)
|
||||
'data-share-params', 'share params', default=None)
|
||||
if isinstance(share_params, dict):
|
||||
video_id = share_params.get('id') or video_id
|
||||
video_id = share_params.get('id')
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-vid-id=["\'](\d+)', webpage, 'video id',
|
||||
default=None) or self._search_regex(
|
||||
r',(\d+)\.html', url, 'video id', default=display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -47,15 +47,23 @@ class TVNowBaseIE(InfoExtractor):
|
||||
r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
|
||||
'.ism/' + suffix, manifest_url))
|
||||
|
||||
def make_urls(proto, suffix):
|
||||
urls = [url_repl(proto, suffix)]
|
||||
hd_url = urls[0].replace('/manifest/', '/ngvod/')
|
||||
if hd_url != urls[0]:
|
||||
urls.append(hd_url)
|
||||
return urls
|
||||
|
||||
for man_url in make_urls('dash', '.mpd'):
|
||||
formats = self._extract_mpd_formats(
|
||||
url_repl('dash', '.mpd'), video_id,
|
||||
mpd_id='dash', fatal=False)
|
||||
man_url, video_id, mpd_id='dash', fatal=False)
|
||||
for man_url in make_urls('hss', 'Manifest'):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
url_repl('hss', 'Manifest'),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
man_url, video_id, ism_id='mss', fatal=False))
|
||||
for man_url in make_urls('hls', '.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url_repl('hls', '.m3u8'), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
if formats:
|
||||
break
|
||||
else:
|
||||
|
@ -14,7 +14,18 @@ from ..utils import (
|
||||
|
||||
class TwentyFourVideoIE(InfoExtractor):
|
||||
IE_NAME = '24video'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
(?:(?:www|porno)\.)?24video\.
|
||||
(?:net|me|xxx|sexy?|tube|adult|site)
|
||||
)/
|
||||
(?:
|
||||
video/(?:(?:view|xml)/)?|
|
||||
player/new24_play\.swf\?id=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
@ -42,6 +53,12 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.24video.tube/video/view/2363750',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.24video.site/video/view/2640421',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -317,7 +317,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
'Downloading %s access token' % self._ITEM_TYPE)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/vod/%s?%s' % (
|
||||
'%s/vod/%s.m3u8?%s' % (
|
||||
self._USHER_BASE, item_id,
|
||||
compat_urllib_parse_urlencode({
|
||||
'allow_source': 'true',
|
||||
|
@ -428,11 +428,22 @@ class TwitterIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/foobar/status/1087791357756956680',
|
||||
'info_dict': {
|
||||
'id': '1087791357756956680',
|
||||
'ext': 'mp4',
|
||||
'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:66d493500c013e3e2d434195746a7f78',
|
||||
'uploader': 'Twitter',
|
||||
'uploader_id': 'Twitter',
|
||||
'duration': 61.567,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user_id')
|
||||
twid = mobj.group('id')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
@ -441,8 +452,13 @@ class TwitterIE(InfoExtractor):
|
||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||
|
||||
if user_id is None:
|
||||
mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||
user_id = None
|
||||
|
||||
redirect_mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||
if redirect_mobj:
|
||||
user_id = redirect_mobj.group('user_id')
|
||||
|
||||
if not user_id:
|
||||
user_id = mobj.group('user_id')
|
||||
|
||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||
|
@ -34,6 +34,7 @@ class VevoIE(VevoBaseIE):
|
||||
(?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
|
||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
||||
https?://embed\.vevo\.com/.*?[?&]isrc=|
|
||||
vevo:)
|
||||
(?P<id>[^&?#]+)'''
|
||||
|
||||
@ -144,6 +145,9 @@ class VevoIE(VevoBaseIE):
|
||||
# Geo-restricted to Netherlands/Germany
|
||||
'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_VERSIONS = {
|
||||
0: 'youtube', # only in AuthenticateVideo videoVersions
|
||||
|
@ -21,7 +21,7 @@ from ..utils import (
|
||||
class VikiBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
||||
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
||||
_API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
|
||||
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '2.2.5.1428709186'
|
||||
@ -377,7 +377,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||
for video in page['response']:
|
||||
video_id = video['id']
|
||||
entries.append(self.url_result(
|
||||
'http://www.viki.com/videos/%s' % video_id, 'Viki'))
|
||||
'https://www.viki.com/videos/%s' % video_id, 'Viki'))
|
||||
if not page['pagination']['next']:
|
||||
break
|
||||
|
||||
|
@ -2,12 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
@ -16,10 +18,10 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
NO_DEFAULT,
|
||||
OnDemandPagedList,
|
||||
parse_filesize,
|
||||
qualities,
|
||||
RegexNotFoundError,
|
||||
@ -99,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
webpage, 'vuid', group='vuid')
|
||||
return xsrft, vuid
|
||||
|
||||
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
|
||||
vimeo_config = self._search_regex(
|
||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
|
||||
webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
|
||||
if vimeo_config:
|
||||
return self._parse_json(vimeo_config, video_id)
|
||||
|
||||
def _set_vimeo_cookie(self, name, value):
|
||||
self._set_cookie('vimeo.com', name, value)
|
||||
|
||||
@ -254,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
\.
|
||||
)?
|
||||
vimeo(?P<pro>pro)?\.com/
|
||||
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)?
|
||||
(?:
|
||||
(?:
|
||||
@ -581,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# and latter we extract those that are Vimeo specific.
|
||||
self.report_extraction(video_id)
|
||||
|
||||
vimeo_config = self._search_regex(
|
||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
|
||||
'vimeo config', default=None)
|
||||
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
|
||||
if vimeo_config:
|
||||
seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
|
||||
seed_status = vimeo_config.get('seed_status', {})
|
||||
if seed_status.get('state') == 'failed':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, seed_status['title']),
|
||||
@ -814,7 +821,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
return self._TITLE or self._html_search_regex(
|
||||
self._TITLE_RE, webpage, 'list title', fatal=False)
|
||||
|
||||
def _login_list_password(self, page_url, list_id, webpage):
|
||||
login_form = self._search_regex(
|
||||
@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
|
||||
class VimeoAlbumIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:album'
|
||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/album/2632481',
|
||||
@ -925,21 +933,39 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||
'params': {
|
||||
'videopassword': 'youtube-dl',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: respect page number
|
||||
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/page:%d/' % (base_url, pagenum)
|
||||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
||||
api_page = page + 1
|
||||
query = {
|
||||
'fields': 'link',
|
||||
'page': api_page,
|
||||
'per_page': self._PAGE_SIZE,
|
||||
}
|
||||
if hashed_pass:
|
||||
query['_hashed_pass'] = hashed_pass
|
||||
videos = self._download_json(
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorizaion,
|
||||
})['data']
|
||||
for video in videos:
|
||||
link = video.get('link')
|
||||
if not link:
|
||||
continue
|
||||
yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link))
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
webpage = self._login_list_password(url, album_id, webpage)
|
||||
api_config = self._extract_vimeo_config(webpage, album_id)['api']
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, album_id, api_config['jwt'],
|
||||
api_config.get('hashed_pass')), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, album_id, self._html_search_regex(
|
||||
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
|
||||
|
||||
|
||||
class VimeoGroupsIE(VimeoAlbumIE):
|
||||
@ -955,7 +981,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
|
||||
}]
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._og_search_title(webpage)
|
||||
return self._og_search_title(webpage, fatal=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -1065,7 +1091,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
|
||||
|
||||
|
||||
class VimeoLikesIE(InfoExtractor):
|
||||
class VimeoLikesIE(VimeoChannelIE):
|
||||
_VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
|
||||
IE_NAME = 'vimeo:likes'
|
||||
IE_DESC = 'Vimeo user likes'
|
||||
@ -1073,55 +1099,20 @@ class VimeoLikesIE(InfoExtractor):
|
||||
'url': 'https://vimeo.com/user755559/likes/',
|
||||
'playlist_mincount': 293,
|
||||
'info_dict': {
|
||||
'id': 'user755559_likes',
|
||||
'description': 'See all the videos urza likes',
|
||||
'title': 'Videos urza likes',
|
||||
'id': 'user755559',
|
||||
'title': 'urza’s Likes',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vimeo.com/stormlapse/likes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
page_count = self._int(
|
||||
self._search_regex(
|
||||
r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
|
||||
.*?</a></li>\s*<li\s+class="pagination_next">
|
||||
''', webpage, 'page count', default=1),
|
||||
'page count', fatal=True)
|
||||
PAGE_SIZE = 12
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
def _get_page(idx):
|
||||
page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % (
|
||||
user_id, idx + 1)
|
||||
webpage = self._download_webpage(
|
||||
page_url, user_id,
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count))
|
||||
video_list = self._search_regex(
|
||||
r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>',
|
||||
webpage, 'video content')
|
||||
paths = re.findall(
|
||||
r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list)
|
||||
for path in paths:
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': compat_urlparse.urljoin(page_url, path),
|
||||
}
|
||||
|
||||
pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': '%s_likes' % user_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': pl,
|
||||
}
|
||||
return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
|
||||
|
||||
|
||||
class VHXEmbedIE(InfoExtractor):
|
||||
|
@ -24,6 +24,7 @@ from ..utils import (
|
||||
class VLiveIE(InfoExtractor):
|
||||
IE_NAME = 'vlive'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||
_NETRC_MACHINE = 'vlive'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
@ -47,12 +48,55 @@ class VLiveIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vlive.tv/video/129100',
|
||||
'md5': 'ca2569453b79d66e5b919e5d308bff6b',
|
||||
'info_dict': {
|
||||
'id': '129100',
|
||||
'ext': 'mp4',
|
||||
'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
|
||||
'creator': 'BTS+',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:10',
|
||||
},
|
||||
'skip': 'This video is only available for CH+ subscribers',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
email, password = self._get_login_info()
|
||||
if None in (email, password):
|
||||
return
|
||||
|
||||
def is_logged_in():
|
||||
login_info = self._download_json(
|
||||
'https://www.vlive.tv/auth/loginInfo', None,
|
||||
note='Downloading login info',
|
||||
headers={'Referer': 'https://www.vlive.tv/home'})
|
||||
return try_get(
|
||||
login_info, lambda x: x['message']['login'], bool) or False
|
||||
|
||||
LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
|
||||
self._request_webpage(
|
||||
LOGIN_URL, None, note='Downloading login cookies')
|
||||
|
||||
self._download_webpage(
|
||||
LOGIN_URL, None, note='Logging in',
|
||||
data=urlencode_postdata({'email': email, 'pwd': password}),
|
||||
headers={
|
||||
'Referer': LOGIN_URL,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
|
||||
if not is_logged_in():
|
||||
raise ExtractorError('Unable to log in', expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@ -77,10 +121,7 @@ class VLiveIE(InfoExtractor):
|
||||
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
|
||||
return self._live(video_id, webpage)
|
||||
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
|
||||
if long_video_id and key:
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
else:
|
||||
status = 'COMING_SOON'
|
||||
|
||||
if status == 'LIVE_END':
|
||||
raise ExtractorError('Uploading for replay. Please wait...',
|
||||
@ -91,13 +132,15 @@ class VLiveIE(InfoExtractor):
|
||||
raise ExtractorError('We are sorry, '
|
||||
'but the live broadcast has been canceled.',
|
||||
expected=True)
|
||||
elif status == 'ONLY_APP':
|
||||
raise ExtractorError('Unsupported video type', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unknown status %s' % status)
|
||||
|
||||
def _get_common_fields(self, webpage):
|
||||
title = self._og_search_title(webpage)
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
|
||||
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
|
||||
webpage, 'creator', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
return {
|
||||
@ -107,14 +150,7 @@ class VLiveIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _live(self, video_id, webpage):
|
||||
init_page = self._download_webpage(
|
||||
'https://www.vlive.tv/video/init/view',
|
||||
video_id, note='Downloading live webpage',
|
||||
data=urlencode_postdata({'videoSeq': video_id}),
|
||||
headers={
|
||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
init_page = self._download_init_page(video_id)
|
||||
|
||||
live_params = self._search_regex(
|
||||
r'"liveStreamInfo"\s*:\s*(".*"),',
|
||||
@ -140,6 +176,17 @@ class VLiveIE(InfoExtractor):
|
||||
return info
|
||||
|
||||
def _replay(self, video_id, webpage, long_video_id, key):
|
||||
if '' in (long_video_id, key):
|
||||
init_page = self._download_init_page(video_id)
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
|
||||
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
|
||||
video_id)
|
||||
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
|
||||
self.raise_login_required(
|
||||
'This video is only available for CH+ subscribers')
|
||||
long_video_id, key = video_info['vid'], video_info['inkey']
|
||||
|
||||
playinfo = self._download_json(
|
||||
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
|
||||
% compat_urllib_parse_urlencode({
|
||||
@ -180,6 +227,16 @@ class VLiveIE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
def _download_init_page(self, video_id):
|
||||
return self._download_webpage(
|
||||
'https://www.vlive.tv/video/init/view',
|
||||
video_id, note='Downloading live webpage',
|
||||
data=urlencode_postdata({'videoSeq': video_id}),
|
||||
headers={
|
||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
|
||||
|
||||
class VLiveChannelIE(InfoExtractor):
|
||||
IE_NAME = 'vlive:channel'
|
||||
@ -275,26 +332,45 @@ class VLiveChannelIE(InfoExtractor):
|
||||
class VLivePlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'vlive:playlist'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
|
||||
_TESTS = [{
|
||||
# regular working playlist
|
||||
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
|
||||
'info_dict': {
|
||||
'id': '117963',
|
||||
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
|
||||
},
|
||||
'playlist_mincount': 10
|
||||
}, {
|
||||
# playlist with no playlistVideoSeqs
|
||||
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
|
||||
'info_dict': {
|
||||
'id': '22912',
|
||||
'title': 'Valentine Day Message from TWICE'
|
||||
'id': '22867',
|
||||
'ext': 'mp4',
|
||||
'title': '[V LIVE] Valentine Day Message from MINA',
|
||||
'creator': 'TWICE',
|
||||
'view_count': int
|
||||
},
|
||||
'playlist_mincount': 9
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _build_video_result(self, video_id, message):
|
||||
self.to_screen(message)
|
||||
return self.url_result(
|
||||
self._VIDEO_URL_TEMPLATE % video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, playlist_id = mobj.group('video_id', 'id')
|
||||
|
||||
VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result(
|
||||
VIDEO_URL_TEMPLATE % video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=video_id)
|
||||
return self._build_video_result(
|
||||
video_id,
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download video'
|
||||
@ -304,15 +380,21 @@ class VLivePlaylistIE(InfoExtractor):
|
||||
'http://www.vlive.tv/video/%s/playlist/%s'
|
||||
% (video_id, playlist_id), playlist_id)
|
||||
|
||||
item_ids = self._parse_json(
|
||||
self._search_regex(
|
||||
raw_item_ids = self._search_regex(
|
||||
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
|
||||
'playlist video seqs'),
|
||||
playlist_id)
|
||||
'playlist video seqs', default=None, fatal=False)
|
||||
|
||||
if not raw_item_ids:
|
||||
return self._build_video_result(
|
||||
video_id,
|
||||
'Downloading just video %s because no playlist was found'
|
||||
% video_id)
|
||||
|
||||
item_ids = self._parse_json(raw_item_ids, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
|
||||
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
|
||||
video_id=compat_str(item_id))
|
||||
for item_id in item_ids]
|
||||
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VoxMediaVolumeIE(OnceIE):
|
||||
@ -13,18 +16,43 @@ class VoxMediaVolumeIE(OnceIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id)
|
||||
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if not provider_video_id:
|
||||
continue
|
||||
|
||||
setup = self._parse_json(self._search_regex(
|
||||
r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
|
||||
video_data = setup.get('video') or {}
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_data.get('title_short'),
|
||||
'description': video_data.get('description_long') or video_data.get('description_short'),
|
||||
'thumbnail': video_data.get('brightcove_thumbnail')
|
||||
}
|
||||
asset = setup.get('asset') or setup.get('params') or {}
|
||||
|
||||
formats = []
|
||||
hls_url = asset.get('hls_url')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
mp4_url = asset.get('mp4_url')
|
||||
if mp4_url:
|
||||
tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += '-' + tbr
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': mp4_url,
|
||||
'tbr': int_or_none(tbr),
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if not provider_video_id:
|
||||
continue
|
||||
if provider_video_type == 'brightcove':
|
||||
info['formats'] = self._extract_once_formats(provider_video_id)
|
||||
self._sort_formats(info['formats'])
|
||||
@ -39,46 +67,49 @@ class VoxMediaVolumeIE(OnceIE):
|
||||
|
||||
|
||||
class VoxMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
# Volume embed, Youtube
|
||||
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
|
||||
'info_dict': {
|
||||
'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
|
||||
'id': 'j4mLW6x17VM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Google\'s new material design direction',
|
||||
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
|
||||
'title': 'Material world: how Google discovered what software is made of',
|
||||
'description': 'md5:dfc17e7715e3b542d66e33a109861382',
|
||||
'upload_date': '20190710',
|
||||
'uploader_id': 'TheVerge',
|
||||
'uploader': 'The Verge',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# data-ooyala-id
|
||||
# Volume embed, Youtube
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
'md5': 'd744484ff127884cd2ba09e3fa604e4b',
|
||||
'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
|
||||
'info_dict': {
|
||||
'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B',
|
||||
'id': 'Gy8Md3Eky38',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Nexus 6: hands-on with Google\'s phablet',
|
||||
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
|
||||
'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d',
|
||||
'uploader_id': 'TheVerge',
|
||||
'upload_date': '20141021',
|
||||
'uploader': 'The Verge',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'skip': 'Video Not Found',
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'similar to the previous test',
|
||||
}, {
|
||||
# volume embed
|
||||
# Volume embed, Youtube
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
'info_dict': {
|
||||
'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
|
||||
'id': 'YCjDnX-Xzhg',
|
||||
'ext': 'mp4',
|
||||
'title': 'The new frontier of LGBTQ civil rights, explained',
|
||||
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
|
||||
'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
|
||||
'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'upload_date': '20150915',
|
||||
'uploader': 'Vox',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'similar to the previous test',
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
|
||||
@ -93,6 +124,7 @@ class VoxMediaIE(InfoExtractor):
|
||||
'uploader': 'Vox',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'Page no longer contain videos',
|
||||
}, {
|
||||
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@ -118,10 +150,11 @@ class VoxMediaIE(InfoExtractor):
|
||||
'description': 'md5:e02d56b026d51aa32c010676765a690d',
|
||||
},
|
||||
}],
|
||||
'skip': 'Page no longer contain videos',
|
||||
}, {
|
||||
# volume embed, Brightcove Once
|
||||
'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
|
||||
'md5': '01571a896281f77dc06e084138987ea2',
|
||||
'md5': '2dbc77b8b0bff1894c2fce16eded637d',
|
||||
'info_dict': {
|
||||
'id': '1231c973d',
|
||||
'ext': 'mp4',
|
||||
|
@ -64,7 +64,15 @@ class VRVBaseIE(InfoExtractor):
|
||||
|
||||
def _call_cms(self, path, video_id, note):
|
||||
if not self._CMS_SIGNING:
|
||||
self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
|
||||
index = self._call_api('index', video_id, 'CMS Signing')
|
||||
self._CMS_SIGNING = index.get('cms_signing') or {}
|
||||
if not self._CMS_SIGNING:
|
||||
for signing_policy in index.get('signing_policies', []):
|
||||
signing_path = signing_policy.get('path')
|
||||
if signing_path and signing_path.startswith('/cms/'):
|
||||
name, value = signing_policy.get('name'), signing_policy.get('value')
|
||||
if name and value:
|
||||
self._CMS_SIGNING[name] = value
|
||||
return self._download_json(
|
||||
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
|
||||
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
|
||||
@ -130,7 +138,7 @@ class VRVIE(VRVBaseIE):
|
||||
self._TOKEN_SECRET = token_credentials['oauth_token_secret']
|
||||
|
||||
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
|
||||
if not url or stream_format not in ('hls', 'dash'):
|
||||
if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
|
||||
return []
|
||||
stream_id_list = []
|
||||
if audio_lang:
|
||||
@ -140,7 +148,7 @@ class VRVIE(VRVBaseIE):
|
||||
format_id = stream_format
|
||||
if stream_id_list:
|
||||
format_id += '-' + '-'.join(stream_id_list)
|
||||
if stream_format == 'hls':
|
||||
if 'hls' in stream_format:
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VVVVIDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# video_type == 'video/vvvvid'
|
||||
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
|
||||
|
@ -32,6 +32,10 @@ class VzaarIE(InfoExtractor):
|
||||
'ext': 'mp3',
|
||||
'title': 'MP3',
|
||||
},
|
||||
}, {
|
||||
# with null videoTitle
|
||||
'url': 'https://view.vzaar.com/20313539/download',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -45,7 +49,7 @@ class VzaarIE(InfoExtractor):
|
||||
video_data = self._download_json(
|
||||
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
|
||||
|
||||
title = video_data['videoTitle']
|
||||
title = video_data.get('videoTitle') or video_id
|
||||
|
||||
formats = []
|
||||
|
||||
|
@ -7,7 +7,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class XiamiBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
|
||||
_API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
@ -1,12 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
@ -18,7 +20,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
from .brightcove import (
|
||||
@ -556,3 +560,130 @@ class YahooGyaOIE(InfoExtractor):
|
||||
'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
|
||||
YahooGyaOPlayerIE.ie_key(), video_id))
|
||||
return self.playlist_result(entries, program_id)
|
||||
|
||||
|
||||
class YahooJapanNewsIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo:japannews'
|
||||
IE_DESC = 'Yahoo! Japan News'
|
||||
_VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_TESTS = [{
|
||||
'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
|
||||
'info_dict': {
|
||||
'id': '1736242',
|
||||
'ext': 'mp4',
|
||||
'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
|
||||
'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
|
||||
'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://headlines.yahoo.co.jp/videonews/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.yahoo.co.jp',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.yahoo.co.jp/feature/1356',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _extract_formats(self, json_data, content_id):
|
||||
formats = []
|
||||
|
||||
video_data = try_get(
|
||||
json_data,
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
for vid in video_data or []:
|
||||
delivery = vid.get('delivery')
|
||||
url = url_or_none(vid.get('Url'))
|
||||
if not delivery or not url:
|
||||
continue
|
||||
elif delivery == 'hls':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
url, content_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
|
||||
'height': int_or_none(vid.get('height')),
|
||||
'width': int_or_none(vid.get('width')),
|
||||
'tbr': int_or_none(vid.get('bitrate')),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
display_id = mobj.group('id') or host
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title', default=None
|
||||
) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
|
||||
|
||||
if display_id == host:
|
||||
# Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
|
||||
stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
|
||||
entries = [
|
||||
self.url_result(
|
||||
smuggle_url(
|
||||
'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
|
||||
{'geo_countries': ['JP']}),
|
||||
ie='BrightcoveNew', video_id=plist_id)
|
||||
for plist_id in stream_plists]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
# Article page
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image', webpage, 'thumbnail', default=None)
|
||||
space_id = self._search_regex([
|
||||
r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
|
||||
r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
|
||||
r'<!--\s+SpaceID=(\d+)'
|
||||
], webpage, 'spaceid')
|
||||
|
||||
content_id = self._search_regex(
|
||||
r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
|
||||
webpage, 'contentid', group='contentid')
|
||||
|
||||
json_data = self._download_json(
|
||||
'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
|
||||
content_id,
|
||||
query={
|
||||
'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
|
||||
'output': 'json',
|
||||
'space_id': space_id,
|
||||
'domain': host,
|
||||
'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
|
||||
'device_type': '1100',
|
||||
})
|
||||
formats = self._extract_formats(json_data, content_id)
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@ -51,23 +52,43 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
|
||||
'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
'track': 'Gypsy Eyes 1',
|
||||
'album': 'Gypsy Soul',
|
||||
'album_artist': 'Carlo Ambrosio',
|
||||
'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari',
|
||||
'artist': 'Carlo Ambrosio & Fabio Di Bari',
|
||||
'release_year': 2009,
|
||||
},
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}
|
||||
}, {
|
||||
# multiple disks
|
||||
'url': 'http://music.yandex.ru/album/3840501/track/705105',
|
||||
'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e',
|
||||
'info_dict': {
|
||||
'id': '705105',
|
||||
'ext': 'mp3',
|
||||
'title': 'Hooverphonic - Sometimes',
|
||||
'filesize': 5743386,
|
||||
'duration': 239.27,
|
||||
'track': 'Sometimes',
|
||||
'album': 'The Best of Hooverphonic',
|
||||
'album_artist': 'Hooverphonic',
|
||||
'artist': 'Hooverphonic',
|
||||
'release_year': 2016,
|
||||
'genre': 'pop',
|
||||
'disc_number': 2,
|
||||
'track_number': 9,
|
||||
},
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -110,9 +131,21 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
'abr': int_or_none(download_data.get('bitrate')),
|
||||
}
|
||||
|
||||
def extract_artist_name(artist):
|
||||
decomposed = artist.get('decomposed')
|
||||
if not isinstance(decomposed, list):
|
||||
return artist['name']
|
||||
parts = [artist['name']]
|
||||
for element in decomposed:
|
||||
if isinstance(element, dict) and element.get('name'):
|
||||
parts.append(element['name'])
|
||||
elif isinstance(element, compat_str):
|
||||
parts.append(element)
|
||||
return ''.join(parts)
|
||||
|
||||
def extract_artist(artist_list):
|
||||
if artist_list and isinstance(artist_list, list):
|
||||
artists_names = [a['name'] for a in artist_list if a.get('name')]
|
||||
artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
|
||||
if artists_names:
|
||||
return ', '.join(artists_names)
|
||||
|
||||
@ -121,10 +154,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
album = albums[0]
|
||||
if isinstance(album, dict):
|
||||
year = album.get('year')
|
||||
disc_number = int_or_none(try_get(
|
||||
album, lambda x: x['trackPosition']['volume']))
|
||||
track_number = int_or_none(try_get(
|
||||
album, lambda x: x['trackPosition']['index']))
|
||||
track_info.update({
|
||||
'album': album.get('title'),
|
||||
'album_artist': extract_artist(album.get('artists')),
|
||||
'release_year': int_or_none(year),
|
||||
'genre': album.get('genre'),
|
||||
'disc_number': disc_number,
|
||||
'track_number': track_number,
|
||||
})
|
||||
|
||||
track_artist = extract_artist(track.get('artists'))
|
||||
@ -152,7 +192,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
IE_DESC = 'Яндекс.Музыка - Альбом'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://music.yandex.ru/album/540508',
|
||||
'info_dict': {
|
||||
'id': '540508',
|
||||
@ -160,7 +200,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://music.yandex.ru/album/3840501',
|
||||
'info_dict': {
|
||||
'id': '3840501',
|
||||
'title': 'Hooverphonic - The Best of Hooverphonic (2016)',
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
@ -169,7 +217,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
||||
album_id, 'Downloading album JSON')
|
||||
|
||||
entries = self._build_playlist(album['volumes'][0])
|
||||
entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
|
||||
|
||||
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
||||
year = album.get('year')
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
|
||||
# episode, sports
|
||||
'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DASH with DRM
|
||||
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
|
||||
'disable_trackings': 1,
|
||||
})['content']
|
||||
|
||||
m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content['streams'][0]['url'])
|
||||
title = content.get('title') or content.get('computed_title')
|
||||
|
||||
ext = determine_ext(content_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mpd':
|
||||
formats = self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash')
|
||||
else:
|
||||
formats = [{'url': content_url}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = content.get('description')
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user