mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-12-26 08:17:54 +01:00
commit
9599ce0bc3
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.29**
|
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2019.10.29
|
[debug] youtube-dl version 2019.11.05
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.29**
|
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
@ -18,13 +18,13 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.29**
|
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.29**
|
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2019.10.29
|
[debug] youtube-dl version 2019.11.05
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.29**
|
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,6 +21,12 @@ matrix:
|
|||||||
- python: 3.7
|
- python: 3.7
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=download
|
env: YTDL_TEST_SET=download
|
||||||
|
- python: 3.8
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=core
|
||||||
|
- python: 3.8
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=download
|
||||||
- python: 3.8-dev
|
- python: 3.8-dev
|
||||||
dist: xenial
|
dist: xenial
|
||||||
env: YTDL_TEST_SET=core
|
env: YTDL_TEST_SET=core
|
||||||
|
44
ChangeLog
44
ChangeLog
@ -1,3 +1,47 @@
|
|||||||
|
version 2019.11.05
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [scte] Add support for learning.scte.org (#22975)
|
||||||
|
+ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
|
||||||
|
* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
|
||||||
|
* [jamendo] Improve extraction
|
||||||
|
* Fix album extraction (#18564)
|
||||||
|
* Improve metadata extraction (#18565, #21379)
|
||||||
|
* [mediaset] Relax URL guid matching (#18352)
|
||||||
|
+ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
|
||||||
|
* [telegraaf] Fix extraction
|
||||||
|
+ [bellmedia] Add support for marilyn.ca videos (#22193)
|
||||||
|
* [stv] Fix extraction (#22928)
|
||||||
|
- [iconosquare] Remove extractor
|
||||||
|
- [keek] Remove extractor
|
||||||
|
- [gameone] Remove extractor (#21778)
|
||||||
|
- [flipagram] Remove extractor
|
||||||
|
- [bambuser] Remove extractor
|
||||||
|
* [wistia] Reduce embed extraction false positives
|
||||||
|
+ [wistia] Add support for inline embeds (#22931)
|
||||||
|
- [go90] Remove extractor
|
||||||
|
* [kakao] Remove raw request
|
||||||
|
+ [kakao] Extract format total bitrate
|
||||||
|
* [daum] Fix VOD and Clip extracton (#15015)
|
||||||
|
* [kakao] Improve extraction
|
||||||
|
+ Add support for embed URLs
|
||||||
|
+ Add support for Kakao Legacy vid based embed URLs
|
||||||
|
* Only extract fields used for extraction
|
||||||
|
* Strip description and extract tags
|
||||||
|
* [mixcloud] Fix cloudcast data extraction (#22821)
|
||||||
|
* [yahoo] Improve extraction
|
||||||
|
+ Add support for live streams (#3597, #3779, #22178)
|
||||||
|
* Bypass cookie consent page for european domains (#16948, #22576)
|
||||||
|
+ Add generic support for embeds (#20332)
|
||||||
|
* [tv2] Fix and improve extraction (#22787)
|
||||||
|
+ [tv2dk] Add support for TV2 DK sites
|
||||||
|
* [onet] Improve extraction …
|
||||||
|
+ Add support for onet100.vod.pl
|
||||||
|
+ Extract m3u8 formats
|
||||||
|
* Correct audio only format info
|
||||||
|
* [fox9] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
version 2019.10.29
|
version 2019.10.29
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -752,8 +752,8 @@ As a last resort, you can also uninstall the version installed by your package m
|
|||||||
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
|
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
hash -r
|
hash -r
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -76,8 +76,6 @@
|
|||||||
- **awaan:video**
|
- **awaan:video**
|
||||||
- **AZMedien**: AZ Medien videos
|
- **AZMedien**: AZ Medien videos
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
- **bambuser**
|
|
||||||
- **bambuser:channel**
|
|
||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
- **Bandcamp:weekly**
|
- **Bandcamp:weekly**
|
||||||
@ -284,12 +282,12 @@
|
|||||||
- **FiveThirtyEight**
|
- **FiveThirtyEight**
|
||||||
- **FiveTV**
|
- **FiveTV**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Flipagram**
|
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
- **Formula1**
|
- **Formula1**
|
||||||
- **FOX**
|
- **FOX**
|
||||||
- **FOX9**
|
- **FOX9**
|
||||||
|
- **FOX9News**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **foxnews**: Fox News and Fox Business Video
|
- **foxnews**: Fox News and Fox Business Video
|
||||||
- **foxnews:article**
|
- **foxnews:article**
|
||||||
@ -315,8 +313,6 @@
|
|||||||
- **FXNetworks**
|
- **FXNetworks**
|
||||||
- **Gaia**
|
- **Gaia**
|
||||||
- **GameInformer**
|
- **GameInformer**
|
||||||
- **GameOne**
|
|
||||||
- **gameone:playlist**
|
|
||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
- **Gaskrank**
|
- **Gaskrank**
|
||||||
@ -331,7 +327,6 @@
|
|||||||
- **Globo**
|
- **Globo**
|
||||||
- **GloboArticle**
|
- **GloboArticle**
|
||||||
- **Go**
|
- **Go**
|
||||||
- **Go90**
|
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
@ -366,7 +361,6 @@
|
|||||||
- **Hungama**
|
- **Hungama**
|
||||||
- **HungamaSong**
|
- **HungamaSong**
|
||||||
- **Hypem**
|
- **Hypem**
|
||||||
- **Iconosquare**
|
|
||||||
- **ign.com**
|
- **ign.com**
|
||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
@ -406,7 +400,6 @@
|
|||||||
- **Kankan**
|
- **Kankan**
|
||||||
- **Karaoketv**
|
- **Karaoketv**
|
||||||
- **KarriereVideos**
|
- **KarriereVideos**
|
||||||
- **keek**
|
|
||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
- **Ketnet**
|
- **Ketnet**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
@ -777,6 +770,8 @@
|
|||||||
- **Screencast**
|
- **Screencast**
|
||||||
- **ScreencastOMatic**
|
- **ScreencastOMatic**
|
||||||
- **scrippsnetworks:watch**
|
- **scrippsnetworks:watch**
|
||||||
|
- **SCTE**
|
||||||
|
- **SCTECourse**
|
||||||
- **Seeker**
|
- **Seeker**
|
||||||
- **SenateISVP**
|
- **SenateISVP**
|
||||||
- **SendtoNews**
|
- **SendtoNews**
|
||||||
@ -926,6 +921,7 @@
|
|||||||
- **TV2**
|
- **TV2**
|
||||||
- **tv2.hu**
|
- **tv2.hu**
|
||||||
- **TV2Article**
|
- **TV2Article**
|
||||||
|
- **TV2DK**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **TV5MondePlus**: TV5MONDE+
|
- **TV5MondePlus**: TV5MONDE+
|
||||||
- **TVA**
|
- **TVA**
|
||||||
|
@ -1,95 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import (
|
|
||||||
compat_HTTPError,
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
qualities,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class AddAnimeIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
|
||||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '24MR3YO5SAS9',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'One Piece 606',
|
|
||||||
'title': 'One Piece 606',
|
|
||||||
},
|
|
||||||
'skip': 'Video is gone',
|
|
||||||
}, {
|
|
||||||
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if not isinstance(ee.cause, compat_HTTPError) or \
|
|
||||||
ee.cause.code != 503:
|
|
||||||
raise
|
|
||||||
|
|
||||||
redir_webpage = ee.cause.read().decode('utf-8')
|
|
||||||
action = self._search_regex(
|
|
||||||
r'<form id="challenge-form" action="([^"]+)"',
|
|
||||||
redir_webpage, 'Redirect form')
|
|
||||||
vc = self._search_regex(
|
|
||||||
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
|
|
||||||
redir_webpage, 'redirect vc value')
|
|
||||||
av = re.search(
|
|
||||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
|
||||||
redir_webpage)
|
|
||||||
if av is None:
|
|
||||||
raise ExtractorError('Cannot find redirect math task')
|
|
||||||
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
|
|
||||||
|
|
||||||
parsed_url = compat_urllib_parse_urlparse(url)
|
|
||||||
av_val = av_res + len(parsed_url.netloc)
|
|
||||||
confirm_url = (
|
|
||||||
parsed_url.scheme + '://' + parsed_url.netloc
|
|
||||||
+ action + '?'
|
|
||||||
+ compat_urllib_parse_urlencode({
|
|
||||||
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
|
|
||||||
self._download_webpage(
|
|
||||||
confirm_url, video_id,
|
|
||||||
note='Confirming after redirect')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
FORMATS = ('normal', 'hq')
|
|
||||||
quality = qualities(FORMATS)
|
|
||||||
formats = []
|
|
||||||
for format_id in FORMATS:
|
|
||||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
|
||||||
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
|
||||||
fatal=False)
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'quality': quality(format_id),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
video_title = self._og_search_title(webpage)
|
|
||||||
video_description = self._og_search_description(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description
|
|
||||||
}
|
|
@ -1,142 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BambuserIE(InfoExtractor):
|
|
||||||
IE_NAME = 'bambuser'
|
|
||||||
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
|
|
||||||
_API_KEY = '005f64509e19a868399060af746a00aa'
|
|
||||||
_LOGIN_URL = 'https://bambuser.com/user'
|
|
||||||
_NETRC_MACHINE = 'bambuser'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://bambuser.com/v/4050584',
|
|
||||||
# MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
|
|
||||||
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4050584',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Education engineering days - lightning talks',
|
|
||||||
'duration': 3741,
|
|
||||||
'uploader': 'pixelversity',
|
|
||||||
'uploader_id': '344706',
|
|
||||||
'timestamp': 1382976692,
|
|
||||||
'upload_date': '20131028',
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# It doesn't respect the 'Range' header, it would download the whole video
|
|
||||||
# caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _login(self):
|
|
||||||
username, password = self._get_login_info()
|
|
||||||
if username is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
login_form = {
|
|
||||||
'form_id': 'user_login',
|
|
||||||
'op': 'Log in',
|
|
||||||
'name': username,
|
|
||||||
'pass': password,
|
|
||||||
}
|
|
||||||
|
|
||||||
request = sanitized_Request(
|
|
||||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
|
||||||
request.add_header('Referer', self._LOGIN_URL)
|
|
||||||
response = self._download_webpage(
|
|
||||||
request, None, 'Logging in')
|
|
||||||
|
|
||||||
login_error = self._html_search_regex(
|
|
||||||
r'(?s)<div class="messages error">(.+?)</div>',
|
|
||||||
response, 'login error', default=None)
|
|
||||||
if login_error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Unable to login: %s' % login_error, expected=True)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
|
||||||
self._login()
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
info = self._download_json(
|
|
||||||
'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
|
|
||||||
% (self._API_KEY, video_id), video_id)
|
|
||||||
|
|
||||||
error = info.get('error')
|
|
||||||
if error:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
|
|
||||||
result = info['result']
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': result['title'],
|
|
||||||
'url': result['url'],
|
|
||||||
'thumbnail': result.get('preview'),
|
|
||||||
'duration': int_or_none(result.get('length')),
|
|
||||||
'uploader': result.get('username'),
|
|
||||||
'uploader_id': compat_str(result.get('owner', {}).get('uid')),
|
|
||||||
'timestamp': int_or_none(result.get('created')),
|
|
||||||
'fps': float_or_none(result.get('framerate')),
|
|
||||||
'view_count': int_or_none(result.get('views_total')),
|
|
||||||
'comment_count': int_or_none(result.get('comment_count')),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class BambuserChannelIE(InfoExtractor):
|
|
||||||
IE_NAME = 'bambuser:channel'
|
|
||||||
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
|
||||||
# The maximum number we can get with each request
|
|
||||||
_STEP = 50
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://bambuser.com/channel/pixelversity',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'pixelversity',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 60,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
user = mobj.group('user')
|
|
||||||
urls = []
|
|
||||||
last_id = ''
|
|
||||||
for i in itertools.count(1):
|
|
||||||
req_url = (
|
|
||||||
'http://bambuser.com/xhr-api/index.php?username={user}'
|
|
||||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
|
||||||
'&method=broadcast&format=json&vid_older_than={last}'
|
|
||||||
).format(user=user, count=self._STEP, last=last_id)
|
|
||||||
req = sanitized_Request(req_url)
|
|
||||||
# Without setting this header, we wouldn't get any result
|
|
||||||
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
|
||||||
data = self._download_json(
|
|
||||||
req, user, 'Downloading page %d' % i)
|
|
||||||
results = data['result']
|
|
||||||
if not results:
|
|
||||||
break
|
|
||||||
last_id = results[-1]['vid']
|
|
||||||
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': user,
|
|
||||||
'entries': urls,
|
|
||||||
}
|
|
@ -22,7 +22,8 @@ class BellMediaIE(InfoExtractor):
|
|||||||
bravo|
|
bravo|
|
||||||
mtv|
|
mtv|
|
||||||
space|
|
space|
|
||||||
etalk
|
etalk|
|
||||||
|
marilyn
|
||||||
)\.ca|
|
)\.ca|
|
||||||
much\.com
|
much\.com
|
||||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||||
@ -70,6 +71,7 @@ class BellMediaIE(InfoExtractor):
|
|||||||
'animalplanet': 'aniplan',
|
'animalplanet': 'aniplan',
|
||||||
'etalk': 'ctv',
|
'etalk': 'ctv',
|
||||||
'bnnbloomberg': 'bnn',
|
'bnnbloomberg': 'bnn',
|
||||||
|
'marilyn': 'ctv_marilyn',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,74 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ComCarCoffIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2494164',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20141127',
|
|
||||||
'timestamp': 1417107600,
|
|
||||||
'duration': 1232,
|
|
||||||
'title': 'Happy Thanksgiving Miranda',
|
|
||||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'requires ffmpeg',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
if not display_id:
|
|
||||||
display_id = 'comediansincarsgettingcoffee.com'
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
full_data = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
|
||||||
display_id)['videoData']
|
|
||||||
|
|
||||||
display_id = full_data['activeVideo']['video']
|
|
||||||
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
|
||||||
|
|
||||||
video_id = compat_str(video_data['mediaId'])
|
|
||||||
title = video_data['title']
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
video_data['mediaUrl'], video_id, 'mp4')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': video_data['images']['thumb'],
|
|
||||||
}, {
|
|
||||||
'url': video_data['images']['poster'],
|
|
||||||
}]
|
|
||||||
|
|
||||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
|
||||||
video_data.get('pubDate'))
|
|
||||||
duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
|
|
||||||
video_data.get('duration'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': duration,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'formats': formats,
|
|
||||||
'season_number': int_or_none(video_data.get('season')),
|
|
||||||
'episode_number': int_or_none(video_data.get('episode')),
|
|
||||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
|
||||||
}
|
|
@ -1455,14 +1455,14 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||||
fatal=True, m3u8_id=None):
|
fatal=True, m3u8_id=None, data=None, headers={}, query={}):
|
||||||
manifest = self._download_xml(
|
manifest = self._download_xml(
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest',
|
'Unable to download f4m manifest',
|
||||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
|
||||||
transform_source=transform_source,
|
transform_source=transform_source,
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
|
||||||
if manifest is False:
|
if manifest is False:
|
||||||
return []
|
return []
|
||||||
@ -1586,12 +1586,13 @@ class InfoExtractor(object):
|
|||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None,
|
entry_protocol='m3u8', preference=None,
|
||||||
m3u8_id=None, note=None, errnote=None,
|
m3u8_id=None, note=None, errnote=None,
|
||||||
fatal=True, live=False):
|
fatal=True, live=False, data=None, headers={},
|
||||||
|
query={}):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note=note or 'Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
errnote=errnote or 'Failed to download m3u8 information',
|
errnote=errnote or 'Failed to download m3u8 information',
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
@ -2009,12 +2010,12 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
mpd_url, video_id,
|
mpd_url, video_id,
|
||||||
note=note or 'Downloading MPD manifest',
|
note=note or 'Downloading MPD manifest',
|
||||||
errnote=errnote or 'Failed to download MPD manifest',
|
errnote=errnote or 'Failed to download MPD manifest',
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
mpd_doc, urlh = res
|
mpd_doc, urlh = res
|
||||||
@ -2317,12 +2318,12 @@ class InfoExtractor(object):
|
|||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
ism_url, video_id,
|
ism_url, video_id,
|
||||||
note=note or 'Downloading ISM manifest',
|
note=note or 'Downloading ISM manifest',
|
||||||
errnote=errnote or 'Failed to download ISM manifest',
|
errnote=errnote or 'Failed to download ISM manifest',
|
||||||
fatal=fatal)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
ism_doc, urlh = res
|
ism_doc, urlh = res
|
||||||
@ -2689,7 +2690,7 @@ class InfoExtractor(object):
|
|||||||
entry = {
|
entry = {
|
||||||
'id': this_video_id,
|
'id': this_video_id,
|
||||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||||
'description': video_data.get('description'),
|
'description': clean_html(video_data.get('description')),
|
||||||
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||||
|
@ -1,154 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import json
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..aes import (
|
|
||||||
aes_cbc_decrypt,
|
|
||||||
aes_cbc_encrypt,
|
|
||||||
)
|
|
||||||
from ..compat import compat_b64decode
|
|
||||||
from ..utils import (
|
|
||||||
bytes_to_intlist,
|
|
||||||
bytes_to_long,
|
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
|
||||||
intlist_to_bytes,
|
|
||||||
js_to_json,
|
|
||||||
int_or_none,
|
|
||||||
long_to_bytes,
|
|
||||||
pkcs1pad,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DaisukiMottoIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'V2e',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
|
|
||||||
'subtitles': {
|
|
||||||
'mul': [{
|
|
||||||
'ext': 'ttml',
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # AES-encrypted HLS stream
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
# The public key in PEM format can be found in clientlibs_anime_watch.min.js
|
|
||||||
_RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
flashvars = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
iv = [0] * 16
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
|
|
||||||
data[key] = flashvars.get(key, '')
|
|
||||||
|
|
||||||
encrypted_rtn = None
|
|
||||||
|
|
||||||
# Some AES keys are rejected. Try it with different AES keys
|
|
||||||
for idx in range(5):
|
|
||||||
aes_key = [random.randint(0, 254) for _ in range(32)]
|
|
||||||
padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
|
|
||||||
|
|
||||||
n, e = self._RSA_KEY
|
|
||||||
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
|
|
||||||
init_data = self._download_json(
|
|
||||||
'http://motto.daisuki.net/fastAPI/bgn/init/',
|
|
||||||
video_id, query={
|
|
||||||
's': flashvars.get('s', ''),
|
|
||||||
'c': flashvars.get('ss3_prm', ''),
|
|
||||||
'e': url,
|
|
||||||
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
|
|
||||||
bytes_to_intlist(json.dumps(data)),
|
|
||||||
aes_key, iv))).decode('ascii'),
|
|
||||||
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
|
|
||||||
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
|
|
||||||
|
|
||||||
if 'rtn' in init_data:
|
|
||||||
encrypted_rtn = init_data['rtn']
|
|
||||||
break
|
|
||||||
|
|
||||||
self._sleep(5, video_id)
|
|
||||||
|
|
||||||
if encrypted_rtn is None:
|
|
||||||
raise ExtractorError('Failed to fetch init data')
|
|
||||||
|
|
||||||
rtn = self._parse_json(
|
|
||||||
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
|
|
||||||
compat_b64decode(encrypted_rtn)),
|
|
||||||
aes_key, iv)).decode('utf-8').rstrip('\0'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
title = rtn['title_str']
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
caption_url = rtn.get('caption_url')
|
|
||||||
if caption_url:
|
|
||||||
# mul: multiple languages
|
|
||||||
subtitles['mul'] = [{
|
|
||||||
'url': caption_url,
|
|
||||||
'ext': 'ttml',
|
|
||||||
}]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DaisukiMottoPlaylistIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://motto.daisuki.net/information/',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'DRAGON BALL SUPER',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 117,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
|
|
||||||
attr = extract_attributes(li)
|
|
||||||
ad_id = attr.get('data-ad_id')
|
|
||||||
product_id = attr.get('data-product_id')
|
|
||||||
if ad_id and product_id:
|
|
||||||
episode_id = attr.get('data-chapter')
|
|
||||||
entries.append({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
|
|
||||||
'episode_id': episode_id,
|
|
||||||
'episode_number': int_or_none(episode_id),
|
|
||||||
'ie_key': 'DaisukiMotto',
|
|
||||||
})
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')
|
|
@ -146,6 +146,11 @@ class DPlayIE(InfoExtractor):
|
|||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
disco_base + 'content/videos/' + display_id, display_id,
|
disco_base + 'content/videos/' + display_id, display_id,
|
||||||
headers=headers, query={
|
headers=headers, query={
|
||||||
|
'fields[channel]': 'name',
|
||||||
|
'fields[image]': 'height,src,width',
|
||||||
|
'fields[show]': 'name',
|
||||||
|
'fields[tag]': 'name',
|
||||||
|
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||||
'include': 'images,primaryChannel,show,tags'
|
'include': 'images,primaryChannel,show,tags'
|
||||||
})
|
})
|
||||||
video_id = video['data']['id']
|
video_id = video['data']['id']
|
||||||
@ -226,7 +231,6 @@ class DPlayIE(InfoExtractor):
|
|||||||
'series': series,
|
'series': series,
|
||||||
'season_number': int_or_none(info.get('seasonNumber')),
|
'season_number': int_or_none(info.get('seasonNumber')),
|
||||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||||
'age_limit': int_or_none(info.get('minimum_age')),
|
|
||||||
'creator': creator,
|
'creator': creator,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -24,7 +25,14 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DRTVIE(InfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
||||||
|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
|
||||||
|
)
|
||||||
|
(?P<id>[\da-z_-]+)
|
||||||
|
'''
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_GEO_COUNTRIES = ['DK']
|
_GEO_COUNTRIES = ['DK']
|
||||||
IE_NAME = 'drtv'
|
IE_NAME = 'drtv'
|
||||||
@ -83,6 +91,26 @@ class DRTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '00951930010',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bonderøven (1:8)',
|
||||||
|
'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
|
||||||
|
'timestamp': 1546542000,
|
||||||
|
'upload_date': '20190103',
|
||||||
|
'duration': 2576.6,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -100,13 +128,32 @@ class DRTVIE(InfoExtractor):
|
|||||||
webpage, 'video id', default=None)
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = compat_urllib_parse_unquote(self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
||||||
webpage, 'urn'))
|
webpage, 'urn', default=None)
|
||||||
|
if video_id:
|
||||||
|
video_id = compat_urllib_parse_unquote(video_id)
|
||||||
|
|
||||||
|
_PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
|
||||||
|
query = {'expanded': 'true'}
|
||||||
|
|
||||||
|
if video_id:
|
||||||
|
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
||||||
|
else:
|
||||||
|
programcard_url = _PROGRAMCARD_BASE
|
||||||
|
page = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
|
||||||
|
'data'), '1')['cache']['page']
|
||||||
|
page = page[list(page.keys())[0]]
|
||||||
|
item = try_get(
|
||||||
|
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
|
||||||
|
dict)
|
||||||
|
video_id = item['customId'].split(':')[-1]
|
||||||
|
query['productionnumber'] = video_id
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
|
programcard_url, video_id, 'Downloading video JSON', query=query)
|
||||||
video_id, 'Downloading video JSON', query={'expanded': 'true'})
|
|
||||||
|
|
||||||
title = str_or_none(data.get('Title')) or re.sub(
|
title = str_or_none(data.get('Title')) or re.sub(
|
||||||
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
||||||
|
@ -18,7 +18,6 @@ from .acast import (
|
|||||||
ACastIE,
|
ACastIE,
|
||||||
ACastChannelIE,
|
ACastChannelIE,
|
||||||
)
|
)
|
||||||
from .addanime import AddAnimeIE
|
|
||||||
from .adn import ADNIE
|
from .adn import ADNIE
|
||||||
from .adobeconnect import AdobeConnectIE
|
from .adobeconnect import AdobeConnectIE
|
||||||
from .adobetv import (
|
from .adobetv import (
|
||||||
@ -80,7 +79,6 @@ from .awaan import (
|
|||||||
)
|
)
|
||||||
from .azmedien import AZMedienIE
|
from .azmedien import AZMedienIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
@ -224,7 +222,6 @@ from .comedycentral import (
|
|||||||
ComedyCentralTVIE,
|
ComedyCentralTVIE,
|
||||||
ToshIE,
|
ToshIE,
|
||||||
)
|
)
|
||||||
from .comcarcoff import ComCarCoffIE
|
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
MmsIE,
|
MmsIE,
|
||||||
@ -255,10 +252,6 @@ from .dailymotion import (
|
|||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
)
|
)
|
||||||
from .daisuki import (
|
|
||||||
DaisukiMottoIE,
|
|
||||||
DaisukiMottoPlaylistIE,
|
|
||||||
)
|
|
||||||
from .daum import (
|
from .daum import (
|
||||||
DaumIE,
|
DaumIE,
|
||||||
DaumClipIE,
|
DaumClipIE,
|
||||||
@ -360,7 +353,6 @@ from .firsttv import FirstTVIE
|
|||||||
from .fivemin import FiveMinIE
|
from .fivemin import FiveMinIE
|
||||||
from .fivetv import FiveTVIE
|
from .fivetv import FiveTVIE
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .flipagram import FlipagramIE
|
|
||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
from .footyroom import FootyRoomIE
|
from .footyroom import FootyRoomIE
|
||||||
from .formula1 import Formula1IE
|
from .formula1 import Formula1IE
|
||||||
@ -407,10 +399,6 @@ from .fusion import FusionIE
|
|||||||
from .fxnetworks import FXNetworksIE
|
from .fxnetworks import FXNetworksIE
|
||||||
from .gaia import GaiaIE
|
from .gaia import GaiaIE
|
||||||
from .gameinformer import GameInformerIE
|
from .gameinformer import GameInformerIE
|
||||||
from .gameone import (
|
|
||||||
GameOneIE,
|
|
||||||
GameOnePlaylistIE,
|
|
||||||
)
|
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gaskrank import GaskrankIE
|
from .gaskrank import GaskrankIE
|
||||||
@ -465,7 +453,6 @@ from .hungama import (
|
|||||||
HungamaSongIE,
|
HungamaSongIE,
|
||||||
)
|
)
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .iconosquare import IconosquareIE
|
|
||||||
from .ign import (
|
from .ign import (
|
||||||
IGNIE,
|
IGNIE,
|
||||||
OneUPIE,
|
OneUPIE,
|
||||||
@ -524,8 +511,8 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .ketnet import KetnetIE
|
from .ketnet import KetnetIE
|
||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
from .keek import KeekIE
|
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .kontrtube import KontrTubeIE
|
from .kontrtube import KontrTubeIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
@ -640,7 +627,6 @@ from .microsoftvirtualacademy import (
|
|||||||
MicrosoftVirtualAcademyIE,
|
MicrosoftVirtualAcademyIE,
|
||||||
MicrosoftVirtualAcademyCourseIE,
|
MicrosoftVirtualAcademyCourseIE,
|
||||||
)
|
)
|
||||||
from .minhateca import MinhatecaIE
|
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
from .miomio import MioMioIE
|
from .miomio import MioMioIE
|
||||||
@ -650,7 +636,6 @@ from .mixcloud import (
|
|||||||
MixcloudIE,
|
MixcloudIE,
|
||||||
MixcloudUserIE,
|
MixcloudUserIE,
|
||||||
MixcloudPlaylistIE,
|
MixcloudPlaylistIE,
|
||||||
MixcloudStreamIE,
|
|
||||||
)
|
)
|
||||||
from .mlb import MLBIE
|
from .mlb import MLBIE
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
@ -944,10 +929,6 @@ from .rentv import (
|
|||||||
from .restudy import RestudyIE
|
from .restudy import RestudyIE
|
||||||
from .reuters import ReutersIE
|
from .reuters import ReutersIE
|
||||||
from .reverbnation import ReverbNationIE
|
from .reverbnation import ReverbNationIE
|
||||||
from .revision3 import (
|
|
||||||
Revision3EmbedIE,
|
|
||||||
Revision3IE,
|
|
||||||
)
|
|
||||||
from .rice import RICEIE
|
from .rice import RICEIE
|
||||||
from .rmcdecouverte import RMCDecouverteIE
|
from .rmcdecouverte import RMCDecouverteIE
|
||||||
from .ro220 import Ro220IE
|
from .ro220 import Ro220IE
|
||||||
@ -992,6 +973,10 @@ from .sbs import SBSIE
|
|||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .screencastomatic import ScreencastOMaticIE
|
from .screencastomatic import ScreencastOMaticIE
|
||||||
from .scrippsnetworks import ScrippsNetworksWatchIE
|
from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||||
|
from .scte import (
|
||||||
|
SCTEIE,
|
||||||
|
SCTECourseIE,
|
||||||
|
)
|
||||||
from .seeker import SeekerIE
|
from .seeker import SeekerIE
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
@ -1249,6 +1234,7 @@ from .twitter import (
|
|||||||
TwitterCardIE,
|
TwitterCardIE,
|
||||||
TwitterIE,
|
TwitterIE,
|
||||||
TwitterAmplifyIE,
|
TwitterAmplifyIE,
|
||||||
|
TwitterBroadcastIE,
|
||||||
)
|
)
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
|
@ -334,7 +334,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
|
||||||
webpage, 'js data', default='{}'),
|
webpage, 'js data', default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
video_data = extract_from_jsmods_instances(server_js_data)
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
|
@ -1,115 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
try_get,
|
|
||||||
unified_timestamp,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FlipagramIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
|
||||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'nyvTSJMKId',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
|
||||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
|
||||||
'duration': 35.571,
|
|
||||||
'timestamp': 1461244995,
|
|
||||||
'upload_date': '20160421',
|
|
||||||
'uploader': 'kitty juria',
|
|
||||||
'uploader_id': 'sjuria101',
|
|
||||||
'creator': 'kitty juria',
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'comments': list,
|
|
||||||
'formats': 'mincount:2',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_data = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
flipagram = video_data['flipagram']
|
|
||||||
video = flipagram['video']
|
|
||||||
|
|
||||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
|
||||||
title = json_ld.get('title') or flipagram['captionText']
|
|
||||||
description = json_ld.get('description') or flipagram.get('captionText')
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': video['url'],
|
|
||||||
'width': int_or_none(video.get('width')),
|
|
||||||
'height': int_or_none(video.get('height')),
|
|
||||||
'filesize': int_or_none(video_data.get('size')),
|
|
||||||
}]
|
|
||||||
|
|
||||||
preview_url = try_get(
|
|
||||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
|
||||||
if preview_url:
|
|
||||||
formats.append({
|
|
||||||
'url': preview_url,
|
|
||||||
'ext': 'm4a',
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
counts = flipagram.get('counts', {})
|
|
||||||
user = flipagram.get('user', {})
|
|
||||||
video_data = flipagram.get('video', {})
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': self._proto_relative_url(cover['url']),
|
|
||||||
'width': int_or_none(cover.get('width')),
|
|
||||||
'height': int_or_none(cover.get('height')),
|
|
||||||
'filesize': int_or_none(cover.get('size')),
|
|
||||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
|
||||||
|
|
||||||
# Note that this only retrieves comments that are initially loaded.
|
|
||||||
# For videos with large amounts of comments, most won't be retrieved.
|
|
||||||
comments = []
|
|
||||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
|
||||||
text = comment.get('comment')
|
|
||||||
if not text or not isinstance(text, list):
|
|
||||||
continue
|
|
||||||
comments.append({
|
|
||||||
'author': comment.get('user', {}).get('name'),
|
|
||||||
'author_id': comment.get('user', {}).get('username'),
|
|
||||||
'id': comment.get('id'),
|
|
||||||
'text': text[0],
|
|
||||||
'timestamp': unified_timestamp(comment.get('created')),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
|
||||||
'uploader': user.get('name'),
|
|
||||||
'uploader_id': user.get('username'),
|
|
||||||
'creator': user.get('name'),
|
|
||||||
'view_count': int_or_none(counts.get('plays')),
|
|
||||||
'like_count': int_or_none(counts.get('likes')),
|
|
||||||
'repost_count': int_or_none(counts.get('reflips')),
|
|
||||||
'comment_count': int_or_none(counts.get('comments')),
|
|
||||||
'comments': comments,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
@ -1,134 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
xpath_with_ns,
|
|
||||||
parse_iso8601,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
NAMESPACE_MAP = {
|
|
||||||
'media': 'http://search.yahoo.com/mrss/',
|
|
||||||
}
|
|
||||||
|
|
||||||
# URL prefix to download the mp4 files directly instead of streaming via rtmp
|
|
||||||
# Credits go to XBox-Maniac
|
|
||||||
# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
|
|
||||||
RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
|
||||||
|
|
||||||
|
|
||||||
class GameOneIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.gameone.de/tv/288',
|
|
||||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '288',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Game One - Folge 288',
|
|
||||||
'duration': 1238,
|
|
||||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
|
||||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
|
||||||
'age_limit': 16,
|
|
||||||
'upload_date': '20140513',
|
|
||||||
'timestamp': 1399980122,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://gameone.de/tv/220',
|
|
||||||
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '220',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20120918',
|
|
||||||
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
|
|
||||||
'timestamp': 1347971451,
|
|
||||||
'title': 'Game One - Folge 220',
|
|
||||||
'duration': 896.62,
|
|
||||||
'age_limit': 16,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
og_video = self._og_search_video_url(webpage, secure=False)
|
|
||||||
description = self._html_search_meta('description', webpage)
|
|
||||||
age_limit = int(
|
|
||||||
self._search_regex(
|
|
||||||
r'age=(\d+)',
|
|
||||||
self._html_search_meta(
|
|
||||||
'age-de-meta-label',
|
|
||||||
webpage),
|
|
||||||
'age_limit',
|
|
||||||
'0'))
|
|
||||||
mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
|
|
||||||
|
|
||||||
mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
|
|
||||||
title = mrss.find('.//item/title').text
|
|
||||||
thumbnail = mrss.find('.//item/image').get('url')
|
|
||||||
timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
|
|
||||||
content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
|
|
||||||
content_url = content.get('url')
|
|
||||||
|
|
||||||
content = self._download_xml(
|
|
||||||
content_url,
|
|
||||||
video_id,
|
|
||||||
'Downloading media:content')
|
|
||||||
rendition_items = content.findall('.//rendition')
|
|
||||||
duration = float_or_none(rendition_items[0].get('duration'))
|
|
||||||
formats = [
|
|
||||||
{
|
|
||||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
|
||||||
'width': int_or_none(r.get('width')),
|
|
||||||
'height': int_or_none(r.get('height')),
|
|
||||||
'tbr': int_or_none(r.get('bitrate')),
|
|
||||||
}
|
|
||||||
for r in rendition_items
|
|
||||||
]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'description': description,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class GameOnePlaylistIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
|
|
||||||
IE_NAME = 'gameone:playlist'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.gameone.de/tv',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'GameOne',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 294,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
|
||||||
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
|
||||||
entries = [
|
|
||||||
self.url_result('http://www.gameone.de/tv/%d' %
|
|
||||||
video_id, 'GameOne')
|
|
||||||
for video_id in range(max_id, 0, -1)]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': 'GameOne',
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
@ -119,6 +119,7 @@ from .viqeo import ViqeoIE
|
|||||||
from .expressen import ExpressenIE
|
from .expressen import ExpressenIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
|
from .kinja import KinjaEmbedIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -1487,16 +1488,18 @@ class GenericIE(InfoExtractor):
|
|||||||
'timestamp': 1432570283,
|
'timestamp': 1432570283,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# OnionStudios embed
|
# Kinja embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2855',
|
'id': '106351',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
||||||
|
'description': 'Migrated from OnionStudios',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
'uploader': 'ClickHole',
|
'uploader': 'clickhole',
|
||||||
'uploader_id': 'clickhole',
|
'upload_date': '20150527',
|
||||||
|
'timestamp': 1432744860,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# SnagFilms embed
|
# SnagFilms embed
|
||||||
@ -2894,6 +2897,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if senate_isvp_url:
|
if senate_isvp_url:
|
||||||
return self.url_result(senate_isvp_url, 'SenateISVP')
|
return self.url_result(senate_isvp_url, 'SenateISVP')
|
||||||
|
|
||||||
|
# Look for Kinja embeds
|
||||||
|
kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
|
||||||
|
if kinja_embed_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
kinja_embed_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for OnionStudios embeds
|
# Look for OnionStudios embeds
|
||||||
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
||||||
if onionstudios_url:
|
if onionstudios_url:
|
||||||
|
@ -118,6 +118,7 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
if video_data.get('drmProtected'):
|
if video_data.get('drmProtected'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
|
headers = {'Referer': url}
|
||||||
formats = []
|
formats = []
|
||||||
geo_restricted = False
|
geo_restricted = False
|
||||||
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
||||||
@ -137,10 +138,11 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
if 'package:hls' in tags or ext == 'm3u8':
|
if 'package:hls' in tags or ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, video_id, 'mp4',
|
format_url, video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls'))
|
entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', headers=headers))
|
||||||
elif 'package:dash' in tags or ext == 'mpd':
|
elif 'package:dash' in tags or ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
format_url, video_id, mpd_id='dash'))
|
format_url, video_id, mpd_id='dash', headers=headers))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
# produce broken files
|
# produce broken files
|
||||||
pass
|
pass
|
||||||
@ -158,6 +160,9 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
self.raise_geo_restricted(countries=['IN'])
|
self.raise_geo_restricted(countries=['IN'])
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {}).update(headers)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -1,85 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
get_element_by_id,
|
|
||||||
remove_end,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class IconosquareIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
|
||||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '522207370455279102_24101272',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
|
||||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
|
||||||
'timestamp': 1376471991,
|
|
||||||
'upload_date': '20130814',
|
|
||||||
'uploader': 'aguynamedpatrick',
|
|
||||||
'uploader_id': '24101272',
|
|
||||||
'comment_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
media = self._parse_json(
|
|
||||||
get_element_by_id('mediaJson', webpage),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': f['url'],
|
|
||||||
'format_id': format_id,
|
|
||||||
'width': int_or_none(f.get('width')),
|
|
||||||
'height': int_or_none(f.get('height'))
|
|
||||||
} for format_id, f in media['videos'].items()]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
|
|
||||||
|
|
||||||
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
|
|
||||||
description = media.get('caption', {}).get('text')
|
|
||||||
|
|
||||||
uploader = media.get('user', {}).get('username')
|
|
||||||
uploader_id = media.get('user', {}).get('id')
|
|
||||||
|
|
||||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
|
||||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': t['url'],
|
|
||||||
'id': thumbnail_id,
|
|
||||||
'width': int_or_none(t.get('width')),
|
|
||||||
'height': int_or_none(t.get('height'))
|
|
||||||
} for thumbnail_id, t in media.get('images', {}).items()]
|
|
||||||
|
|
||||||
comments = [{
|
|
||||||
'id': comment.get('id'),
|
|
||||||
'text': comment['text'],
|
|
||||||
'timestamp': int_or_none(comment.get('created_time')),
|
|
||||||
'author': comment.get('from', {}).get('full_name'),
|
|
||||||
'author_id': comment.get('from', {}).get('username'),
|
|
||||||
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
'formats': formats,
|
|
||||||
'comments': comments,
|
|
||||||
}
|
|
@ -18,6 +18,8 @@ class IviIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_GEO_COUNTRIES = ['RU']
|
_GEO_COUNTRIES = ['RU']
|
||||||
|
_LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
|
||||||
|
_LIGHT_URL = 'https://api.ivi.ru/light/'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Single movie
|
# Single movie
|
||||||
@ -80,48 +82,77 @@ class IviIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
data = {
|
data = json.dumps({
|
||||||
'method': 'da.content.get',
|
'method': 'da.content.get',
|
||||||
'params': [
|
'params': [
|
||||||
video_id, {
|
video_id, {
|
||||||
'site': 's183',
|
'site': 's%d',
|
||||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||||
'contentid': video_id
|
'contentid': video_id
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
}).encode()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from Crypto.Cipher import Blowfish
|
||||||
|
from Crypto.Hash import CMAC
|
||||||
|
|
||||||
|
timestamp = self._download_json(
|
||||||
|
self._LIGHT_URL, video_id,
|
||||||
|
'Downloading timestamp JSON', data=json.dumps({
|
||||||
|
'method': 'da.timestamp.get',
|
||||||
|
'params': []
|
||||||
|
}).encode())['result']
|
||||||
|
|
||||||
|
data = data % 353
|
||||||
|
query = {
|
||||||
|
'ts': timestamp,
|
||||||
|
'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(),
|
||||||
}
|
}
|
||||||
|
except ImportError:
|
||||||
|
data = data % 183
|
||||||
|
query = {}
|
||||||
|
|
||||||
video_json = self._download_json(
|
video_json = self._download_json(
|
||||||
'http://api.digitalaccess.ru/api/json/', video_id,
|
self._LIGHT_URL, video_id,
|
||||||
'Downloading video JSON', data=json.dumps(data))
|
'Downloading video JSON', data=data, query=query)
|
||||||
|
|
||||||
if 'error' in video_json:
|
error = video_json.get('error')
|
||||||
error = video_json['error']
|
if error:
|
||||||
origin = error['origin']
|
origin = error.get('origin')
|
||||||
|
message = error.get('message') or error.get('user_message')
|
||||||
|
extractor_msg = 'Unable to download video %s'
|
||||||
if origin == 'NotAllowedForLocation':
|
if origin == 'NotAllowedForLocation':
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(message, self._GEO_COUNTRIES)
|
||||||
msg=error['message'], countries=self._GEO_COUNTRIES)
|
|
||||||
elif origin == 'NoRedisValidData':
|
elif origin == 'NoRedisValidData':
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
extractor_msg = 'Video %s does not exist'
|
||||||
|
elif message:
|
||||||
|
if 'недоступен для просмотра на площадке s183' in message:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Unable to download video %s: %s' % (video_id, error['message']),
|
'pycryptodome not found. Please install it.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
extractor_msg += ': ' + message
|
||||||
|
raise ExtractorError(extractor_msg % video_id, expected=True)
|
||||||
|
|
||||||
result = video_json['result']
|
result = video_json['result']
|
||||||
|
title = result['title']
|
||||||
|
|
||||||
quality = qualities(self._KNOWN_FORMATS)
|
quality = qualities(self._KNOWN_FORMATS)
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'url': x['url'],
|
for f in result.get('files', []):
|
||||||
'format_id': x.get('content_format'),
|
f_url = f.get('url')
|
||||||
'quality': quality(x.get('content_format')),
|
content_format = f.get('content_format')
|
||||||
} for x in result['files'] if x.get('url')]
|
if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': content_format,
|
||||||
|
'quality': quality(content_format),
|
||||||
|
'filesize': int_or_none(f.get('size_in_bytes')),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = result['title']
|
|
||||||
|
|
||||||
duration = int_or_none(result.get('duration'))
|
|
||||||
compilation = result.get('compilation')
|
compilation = result.get('compilation')
|
||||||
episode = title if compilation else None
|
episode = title if compilation else None
|
||||||
|
|
||||||
@ -158,7 +189,7 @@ class IviIE(InfoExtractor):
|
|||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': int_or_none(result.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,38 +1,26 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import hashlib
|
||||||
|
import random
|
||||||
|
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_str
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_duration
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class JamendoBaseIE(InfoExtractor):
|
class JamendoIE(InfoExtractor):
|
||||||
def _extract_meta(self, webpage, fatal=True):
|
|
||||||
title = self._og_search_title(
|
|
||||||
webpage, default=None) or self._search_regex(
|
|
||||||
r'<title>([^<]+)', webpage,
|
|
||||||
'title', default=None)
|
|
||||||
if title:
|
|
||||||
title = self._search_regex(
|
|
||||||
r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
|
|
||||||
if not title:
|
|
||||||
title = self._html_search_meta(
|
|
||||||
'name', webpage, 'title', fatal=fatal)
|
|
||||||
mobj = re.search(r'(.+) - (.+)', title or '')
|
|
||||||
artist, second = mobj.groups() if mobj else [None] * 2
|
|
||||||
return title, artist, second
|
|
||||||
|
|
||||||
|
|
||||||
class JamendoIE(JamendoBaseIE):
|
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
licensing\.jamendo\.com/[^/]+|
|
licensing\.jamendo\.com/[^/]+|
|
||||||
(?:www\.)?jamendo\.com
|
(?:www\.)?jamendo\.com
|
||||||
)
|
)
|
||||||
/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
|
/track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||||
@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE):
|
|||||||
'artist': 'Maya Filipič',
|
'artist': 'Maya Filipič',
|
||||||
'track': 'Stories from Emona I',
|
'track': 'Stories from Emona I',
|
||||||
'duration': 210,
|
'duration': 210,
|
||||||
'thumbnail': r're:^https?://.*\.jpg'
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1217438117,
|
||||||
|
'upload_date': '20080730',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||||
@ -53,15 +43,20 @@ class JamendoIE(JamendoBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._VALID_URL_RE.match(url)
|
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||||
track_id = mobj.group('id')
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
|
'https://www.jamendo.com/track/' + track_id, track_id)
|
||||||
display_id)
|
models = self._parse_json(self._html_search_regex(
|
||||||
|
r"data-bundled-models='([^']+)",
|
||||||
title, artist, track = self._extract_meta(webpage)
|
webpage, 'bundled models'), track_id)
|
||||||
|
track = models['track']['models'][0]
|
||||||
|
title = track_name = track['name']
|
||||||
|
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||||
|
artist = get_model('artist')
|
||||||
|
artist_name = artist.get('name')
|
||||||
|
if artist_name:
|
||||||
|
title = '%s - %s' % (artist_name, title)
|
||||||
|
album = get_model('album')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||||
@ -77,31 +72,58 @@ class JamendoIE(JamendoBaseIE):
|
|||||||
))]
|
))]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = self._html_search_meta(
|
urls = []
|
||||||
'image', webpage, 'thumbnail', fatal=False)
|
thumbnails = []
|
||||||
duration = parse_duration(self._search_regex(
|
for _, covers in track.get('cover', {}).items():
|
||||||
r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
|
for cover_id, cover_url in covers.items():
|
||||||
webpage, 'duration', fatal=False))
|
if not cover_url or cover_url in urls:
|
||||||
|
continue
|
||||||
|
urls.append(cover_url)
|
||||||
|
size = int_or_none(cover_id.lstrip('size'))
|
||||||
|
thumbnails.append({
|
||||||
|
'id': cover_id,
|
||||||
|
'url': cover_url,
|
||||||
|
'width': size,
|
||||||
|
'height': size,
|
||||||
|
})
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for tag in track.get('tags', []):
|
||||||
|
tag_name = tag.get('name')
|
||||||
|
if not tag_name:
|
||||||
|
continue
|
||||||
|
tags.append(tag_name)
|
||||||
|
|
||||||
|
stats = track.get('stats') or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': thumbnails,
|
||||||
'title': title,
|
'title': title,
|
||||||
'duration': duration,
|
'description': track.get('description'),
|
||||||
'artist': artist,
|
'duration': int_or_none(track.get('duration')),
|
||||||
'track': track,
|
'artist': artist_name,
|
||||||
'formats': formats
|
'track': track_name,
|
||||||
|
'album': album.get('name'),
|
||||||
|
'formats': formats,
|
||||||
|
'license': '-'.join(track.get('licenseCC', [])) or None,
|
||||||
|
'timestamp': int_or_none(track.get('dateCreated')),
|
||||||
|
'view_count': int_or_none(stats.get('listenedAll')),
|
||||||
|
'like_count': int_or_none(stats.get('favorited')),
|
||||||
|
'average_rating': int_or_none(stats.get('averageNote')),
|
||||||
|
'tags': tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class JamendoAlbumIE(JamendoBaseIE):
|
class JamendoAlbumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '121486',
|
'id': '121486',
|
||||||
'title': 'Shearer - Duck On Cover'
|
'title': 'Duck On Cover',
|
||||||
|
'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
||||||
@ -111,6 +133,8 @@ class JamendoAlbumIE(JamendoBaseIE):
|
|||||||
'title': 'Shearer - Warmachine',
|
'title': 'Shearer - Warmachine',
|
||||||
'artist': 'Shearer',
|
'artist': 'Shearer',
|
||||||
'track': 'Warmachine',
|
'track': 'Warmachine',
|
||||||
|
'timestamp': 1368089771,
|
||||||
|
'upload_date': '20130509',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
||||||
@ -120,6 +144,8 @@ class JamendoAlbumIE(JamendoBaseIE):
|
|||||||
'title': 'Shearer - Without Your Ghost',
|
'title': 'Shearer - Without Your Ghost',
|
||||||
'artist': 'Shearer',
|
'artist': 'Shearer',
|
||||||
'track': 'Without Your Ghost',
|
'track': 'Without Your Ghost',
|
||||||
|
'timestamp': 1368089771,
|
||||||
|
'upload_date': '20130509',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
@ -127,24 +153,35 @@ class JamendoAlbumIE(JamendoBaseIE):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _call_api(self, resource, resource_id):
|
||||||
|
path = '/api/%ss' % resource
|
||||||
|
rand = compat_str(random.random())
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.jamendo.com' + path, resource_id, query={
|
||||||
|
'id[]': resource_id,
|
||||||
|
}, headers={
|
||||||
|
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||||
|
})[0]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._VALID_URL_RE.match(url)
|
album_id = self._match_id(url)
|
||||||
album_id = mobj.group('id')
|
album = self._call_api('album', album_id)
|
||||||
|
album_name = album.get('name')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, mobj.group('display_id'))
|
entries = []
|
||||||
|
for track in album.get('tracks', []):
|
||||||
title, artist, album = self._extract_meta(webpage, fatal=False)
|
track_id = track.get('id')
|
||||||
|
if not track_id:
|
||||||
entries = [{
|
continue
|
||||||
|
track_id = compat_str(track_id)
|
||||||
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': compat_urlparse.urljoin(url, m.group('path')),
|
'url': 'https://www.jamendo.com/track/' + track_id,
|
||||||
'ie_key': JamendoIE.ie_key(),
|
'ie_key': JamendoIE.ie_key(),
|
||||||
'id': self._search_regex(
|
'id': track_id,
|
||||||
r'/track/(\d+)', m.group('path'), 'track id', default=None),
|
'album': album_name,
|
||||||
'artist': artist,
|
})
|
||||||
'album': album,
|
|
||||||
} for m in re.finditer(
|
|
||||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
|
||||||
webpage)]
|
|
||||||
|
|
||||||
return self.playlist_result(entries, album_id, title)
|
return self.playlist_result(
|
||||||
|
entries, album_id, album_name,
|
||||||
|
clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class KeekIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
|
|
||||||
IE_NAME = 'keek'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.keek.com/keek/NODfbab',
|
|
||||||
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'NODfbab',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
|
|
||||||
'uploader': 'ytdl',
|
|
||||||
'uploader_id': 'eGT5bab',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': self._og_search_video_url(webpage),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': self._og_search_description(webpage).strip(),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'uploader': self._search_regex(
|
|
||||||
r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
|
|
||||||
'uploader', fatal=False, group='uploader'),
|
|
||||||
'uploader_id': self._search_regex(
|
|
||||||
r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
|
|
||||||
'uploader id', fatal=False, group='uploader_id'),
|
|
||||||
}
|
|
221
youtube_dl/extractor/kinja.py
Normal file
221
youtube_dl/extractor/kinja.py
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KinjaEmbedIE(InfoExtractor):
|
||||||
|
IENAME = 'kinja:embed'
|
||||||
|
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
|
||||||
|
(?:
|
||||||
|
avclub|
|
||||||
|
clickhole|
|
||||||
|
deadspin|
|
||||||
|
gizmodo|
|
||||||
|
jalopnik|
|
||||||
|
jezebel|
|
||||||
|
kinja|
|
||||||
|
kotaku|
|
||||||
|
lifehacker|
|
||||||
|
splinternews|
|
||||||
|
the(?:inventory|onion|root|takeout)
|
||||||
|
)\.com'''
|
||||||
|
_COMMON_REGEX = r'''/
|
||||||
|
(?:
|
||||||
|
ajax/inset|
|
||||||
|
embed/video
|
||||||
|
)/iframe\?.*?\bid='''
|
||||||
|
_VALID_URL = r'''(?x)https?://%s%s
|
||||||
|
(?P<type>
|
||||||
|
fb|
|
||||||
|
imgur|
|
||||||
|
instagram|
|
||||||
|
jwp(?:layer)?-video|
|
||||||
|
kinjavideo|
|
||||||
|
mcp|
|
||||||
|
megaphone|
|
||||||
|
ooyala|
|
||||||
|
soundcloud(?:-playlist)?|
|
||||||
|
tumblr-post|
|
||||||
|
twitch-stream|
|
||||||
|
twitter|
|
||||||
|
ustream-channel|
|
||||||
|
vimeo|
|
||||||
|
vine|
|
||||||
|
youtube-(?:list|video)
|
||||||
|
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
|
||||||
|
_PROVIDER_MAP = {
|
||||||
|
'fb': ('facebook.com/video.php?v=', 'Facebook'),
|
||||||
|
'imgur': ('imgur.com/', 'Imgur'),
|
||||||
|
'instagram': ('instagram.com/p/', 'Instagram'),
|
||||||
|
'jwplayer-video': _JWPLATFORM_PROVIDER,
|
||||||
|
'jwp-video': _JWPLATFORM_PROVIDER,
|
||||||
|
'megaphone': ('player.megaphone.fm/', 'Generic'),
|
||||||
|
'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
|
||||||
|
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
|
||||||
|
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
|
||||||
|
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
|
||||||
|
'twitch-stream': ('twitch.tv/', 'TwitchStream'),
|
||||||
|
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
|
||||||
|
'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
|
||||||
|
'vimeo': ('vimeo.com/', 'Vimeo'),
|
||||||
|
'vine': ('vine.co/v/', 'Vine'),
|
||||||
|
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
|
||||||
|
'youtube-video': ('youtube.com/embed/', 'Youtube'),
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage, url):
|
||||||
|
return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
|
||||||
|
r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
provider = self._PROVIDER_MAP.get(video_type)
|
||||||
|
if provider:
|
||||||
|
video_id = compat_urllib_parse_unquote(video_id)
|
||||||
|
if video_type == 'tumblr-post':
|
||||||
|
video_id, blog = video_id.split('-', 1)
|
||||||
|
result_url = provider[0] % (blog, video_id)
|
||||||
|
elif video_type == 'youtube-list':
|
||||||
|
video_id, playlist_id = video_id.split('/')
|
||||||
|
result_url = provider[0] % (video_id, playlist_id)
|
||||||
|
else:
|
||||||
|
if video_type == 'ooyala':
|
||||||
|
video_id = video_id.split('/')[0]
|
||||||
|
result_url = provider[0] + video_id
|
||||||
|
return self.url_result('http://' + result_url, provider[1])
|
||||||
|
|
||||||
|
if video_type == 'kinjavideo':
|
||||||
|
data = self._download_json(
|
||||||
|
'https://kinja.com/api/core/video/views/videoById',
|
||||||
|
video_id, query={'videoId': video_id})['data']
|
||||||
|
title = data['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for k in ('signedPlaylist', 'streaming'):
|
||||||
|
m3u8_url = data.get(k + 'Url')
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
poster = data.get('poster') or {}
|
||||||
|
poster_id = poster.get('id')
|
||||||
|
if poster_id:
|
||||||
|
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': strip_or_none(data.get('description')),
|
||||||
|
'formats': formats,
|
||||||
|
'tags': data.get('tags'),
|
||||||
|
'timestamp': int_or_none(try_get(
|
||||||
|
data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': data.get('network'),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
video_data = self._download_json(
|
||||||
|
'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
|
||||||
|
video_id)['videoMetadata']
|
||||||
|
iptc = video_data['photoVideoMetadataIPTC']
|
||||||
|
title = iptc['title']['en']
|
||||||
|
fmg = video_data.get('photoVideoMetadata_fmg') or {}
|
||||||
|
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
|
||||||
|
data = self._download_json(
|
||||||
|
tvss_domain + '/api/v3/video-auth/url-signature-tokens',
|
||||||
|
video_id, query={'mcpids': video_id})['data'][0]
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
rendition_url = data.get('renditionUrl')
|
||||||
|
if rendition_url:
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
rendition_url, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
|
fallback_rendition_url = data.get('fallbackRenditionUrl')
|
||||||
|
if fallback_rendition_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'fallback',
|
||||||
|
'tbr': int_or_none(self._search_regex(
|
||||||
|
r'_(\d+)\.mp4', fallback_rendition_url,
|
||||||
|
'bitrate', default=None)),
|
||||||
|
'url': fallback_rendition_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
|
||||||
|
'uploader': fmg.get('network'),
|
||||||
|
'duration': int_or_none(iptc.get('fileDuration')),
|
||||||
|
'formats': formats,
|
||||||
|
'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
|
||||||
|
'timestamp': parse_iso8601(iptc.get('dateReleased')),
|
||||||
|
}
|
@ -5,24 +5,27 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
compat_str,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LnkGoIE(InfoExtractor):
|
class LnkGoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '46712',
|
'id': '10809',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Yra kaip yra',
|
'title': "Put'ka: Trys Klausimai",
|
||||||
'upload_date': '20150107',
|
'upload_date': '20161216',
|
||||||
'description': 'md5:d82a5e36b775b7048617f263a0e3475e',
|
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||||
'age_limit': 7,
|
'age_limit': 18,
|
||||||
'duration': 3019,
|
'duration': 117,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1481904000,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # HLS download
|
'skip_download': True, # HLS download
|
||||||
@ -30,20 +33,21 @@ class LnkGoIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '47289',
|
'id': '10467',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||||
'upload_date': '20150113',
|
'upload_date': '20150113',
|
||||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'duration': 346,
|
'duration': 346,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1421164800,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # HLS download
|
'skip_download': True, # HLS download
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_AGE_LIMITS = {
|
_AGE_LIMITS = {
|
||||||
@ -51,66 +55,34 @@ class LnkGoIE(InfoExtractor):
|
|||||||
'N-14': 14,
|
'N-14': 14,
|
||||||
'S': 18,
|
'S': 18,
|
||||||
}
|
}
|
||||||
|
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
video_info = self._download_json(
|
||||||
url, display_id, 'Downloading player webpage')
|
'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
|
||||||
|
display_id)['videoConfig']['videoInfo']
|
||||||
video_id = self._search_regex(
|
|
||||||
r'data-ep="([^"]+)"', webpage, 'video ID')
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
|
||||||
r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
|
||||||
|
|
||||||
thumbnail_w = int_or_none(
|
|
||||||
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
|
|
||||||
thumbnail_h = int_or_none(
|
|
||||||
self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False))
|
|
||||||
thumbnail = {
|
|
||||||
'url': self._og_search_thumbnail(webpage),
|
|
||||||
}
|
|
||||||
if thumbnail_w and thumbnail_h:
|
|
||||||
thumbnail.update({
|
|
||||||
'width': thumbnail_w,
|
|
||||||
'height': thumbnail_h,
|
|
||||||
})
|
|
||||||
|
|
||||||
config = self._parse_json(self._search_regex(
|
|
||||||
r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
|
|
||||||
|
|
||||||
if config.get('pGeo'):
|
|
||||||
self.report_warning(
|
|
||||||
'This content might not be available in your country due to copyright reasons')
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'format_id': 'hls',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'url': config['EpisodeVideoLink_HLS'],
|
|
||||||
}]
|
|
||||||
|
|
||||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
|
|
||||||
if m:
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'rtmp',
|
|
||||||
'ext': 'flv',
|
|
||||||
'url': m.group('url'),
|
|
||||||
'play_path': m.group('play_path'),
|
|
||||||
'page_url': url,
|
|
||||||
})
|
|
||||||
|
|
||||||
|
video_id = compat_str(video_info['id'])
|
||||||
|
title = video_info['title']
|
||||||
|
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||||
|
video_id, 'mp4', 'm3u8_native')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
poster_image = video_info.get('posterImage')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': [thumbnail],
|
'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None,
|
||||||
'duration': int_or_none(config.get('VideoTime')),
|
'duration': int_or_none(video_info.get('duration')),
|
||||||
'description': description,
|
'description': clean_html(video_info.get('htmlDescription')),
|
||||||
'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
|
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||||
'upload_date': upload_date,
|
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||||
|
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||||
player/index\.html\?.*?\bprogramGuid=
|
player/index\.html\?.*?\bprogramGuid=
|
||||||
)
|
)
|
||||||
)(?P<id>[0-9A-Z]{16})
|
)(?P<id>[0-9A-Z]{16,})
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# full episode
|
# full episode
|
||||||
@ -62,7 +62,6 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
'uploader': 'Canale 5',
|
'uploader': 'Canale 5',
|
||||||
'uploader_id': 'C5',
|
'uploader_id': 'C5',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
|
||||||
}, {
|
}, {
|
||||||
# clip
|
# clip
|
||||||
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
|
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
|
||||||
@ -78,6 +77,18 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'mediaset:FAFU000000665924',
|
'url': 'mediaset:FAFU000000665924',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -109,6 +120,11 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
entries.append(embed_url)
|
entries.append(embed_url)
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||||
|
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||||
|
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
|
||||||
|
return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
guid = self._match_id(url)
|
guid = self._match_id(url)
|
||||||
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
||||||
@ -118,14 +134,15 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
subtitles = {}
|
subtitles = {}
|
||||||
first_e = None
|
first_e = None
|
||||||
for asset_type in ('SD', 'HD'):
|
for asset_type in ('SD', 'HD'):
|
||||||
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
|
# TODO: fixup ISM+none manifest URLs
|
||||||
|
for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
|
||||||
try:
|
try:
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||||
'mbr': 'true',
|
'mbr': 'true',
|
||||||
'formats': f,
|
'formats': f,
|
||||||
'assetTypes': asset_type,
|
'assetTypes': asset_type,
|
||||||
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
|
}), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if not first_e:
|
if not first_e:
|
||||||
first_e = e
|
first_e = e
|
||||||
|
@ -1,70 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
parse_filesize,
|
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MinhatecaIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '125848331',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'youtube-dl test video',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'filesize_approx': 1530000,
|
|
||||||
'duration': 9,
|
|
||||||
'view_count': int,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
token = self._html_search_regex(
|
|
||||||
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
|
|
||||||
webpage, 'request token')
|
|
||||||
token_data = [
|
|
||||||
('fileId', video_id),
|
|
||||||
('__RequestVerificationToken', token),
|
|
||||||
]
|
|
||||||
req = sanitized_Request(
|
|
||||||
'http://minhateca.com.br/action/License/Download',
|
|
||||||
data=urlencode_postdata(token_data))
|
|
||||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
|
||||||
data = self._download_json(
|
|
||||||
req, video_id, note='Downloading metadata')
|
|
||||||
|
|
||||||
video_url = data['redirectUrl']
|
|
||||||
title_str = self._html_search_regex(
|
|
||||||
r'<h1.*?>(.*?)</h1>', webpage, 'title')
|
|
||||||
title, _, ext = title_str.rpartition('.')
|
|
||||||
filesize_approx = parse_filesize(self._html_search_regex(
|
|
||||||
r'<p class="fileSize">(.*?)</p>',
|
|
||||||
webpage, 'file size approximation', fatal=False))
|
|
||||||
duration = parse_duration(self._html_search_regex(
|
|
||||||
r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
|
|
||||||
webpage, 'duration', fatal=False))
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r'<p class="downloadsCounter">([0-9]+)</p>',
|
|
||||||
webpage, 'view count', fatal=False))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'ext': ext,
|
|
||||||
'filesize_approx': filesize_approx,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
}
|
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import functools
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -11,28 +10,37 @@ from ..compat import (
|
|||||||
compat_ord,
|
compat_ord,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
|
||||||
compat_zip
|
compat_zip
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
OnDemandPagedList,
|
parse_iso8601,
|
||||||
str_to_int,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MixcloudIE(InfoExtractor):
|
class MixcloudBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
||||||
|
lookup_key = object_type + 'Lookup'
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.mixcloud.com/graphql', display_id, query={
|
||||||
|
'query': '''{
|
||||||
|
%s(lookup: {username: "%s"%s}) {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
|
||||||
|
})['data'][lookup_key]
|
||||||
|
|
||||||
|
|
||||||
|
class MixcloudIE(MixcloudBaseIE):
|
||||||
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
|
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
|
||||||
IE_NAME = 'mixcloud'
|
IE_NAME = 'mixcloud'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach-cryptkeeper',
|
'id': 'dholbach_cryptkeeper',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
@ -40,11 +48,13 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'timestamp': 1321359578,
|
||||||
|
'upload_date': '20111115',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
@ -52,11 +62,14 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*',
|
'thumbnail': 're:https?://.*',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'timestamp': 1422987057,
|
||||||
|
'upload_date': '20150203',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _decrypt_xor_cipher(key, ciphertext):
|
def _decrypt_xor_cipher(key, ciphertext):
|
||||||
@ -66,115 +79,66 @@ class MixcloudIE(InfoExtractor):
|
|||||||
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
username, slug = re.match(self._VALID_URL, url).groups()
|
||||||
uploader = mobj.group(1)
|
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
|
||||||
cloudcast_name = mobj.group(2)
|
track_id = '%s_%s' % (username, slug)
|
||||||
track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, track_id)
|
cloudcast = self._call_api('cloudcast', '''audioLength
|
||||||
|
comments(first: 100) {
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
comment
|
||||||
|
created
|
||||||
|
user {
|
||||||
|
displayName
|
||||||
|
username
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
description
|
||||||
|
favorites {
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
featuringArtistList
|
||||||
|
isExclusive
|
||||||
|
name
|
||||||
|
owner {
|
||||||
|
displayName
|
||||||
|
url
|
||||||
|
username
|
||||||
|
}
|
||||||
|
picture(width: 1024, height: 1024) {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
plays
|
||||||
|
publishDate
|
||||||
|
reposts {
|
||||||
|
totalCount
|
||||||
|
}
|
||||||
|
streamInfo {
|
||||||
|
dashUrl
|
||||||
|
hlsUrl
|
||||||
|
url
|
||||||
|
}
|
||||||
|
tags {
|
||||||
|
tag {
|
||||||
|
name
|
||||||
|
}
|
||||||
|
}''', track_id, username, slug)
|
||||||
|
|
||||||
# Legacy path
|
title = cloudcast['name']
|
||||||
encrypted_play_info = self._search_regex(
|
|
||||||
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
|
|
||||||
|
|
||||||
if encrypted_play_info is not None:
|
stream_info = cloudcast['streamInfo']
|
||||||
# Decode
|
|
||||||
encrypted_play_info = compat_b64decode(encrypted_play_info)
|
|
||||||
else:
|
|
||||||
# New path
|
|
||||||
full_info_json = self._parse_json(self._html_search_regex(
|
|
||||||
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
|
|
||||||
webpage, 'play info'), 'play info')
|
|
||||||
for item in full_info_json:
|
|
||||||
item_data = try_get(item, [
|
|
||||||
lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
|
||||||
lambda x: x['cloudcastLookup']['data']['cloudcastLookup'],
|
|
||||||
], dict)
|
|
||||||
if try_get(item_data, lambda x: x['streamInfo']['url']):
|
|
||||||
info_json = item_data
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Failed to extract matching stream info')
|
|
||||||
|
|
||||||
message = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
|
||||||
webpage, 'error message', default=None)
|
|
||||||
|
|
||||||
js_url = self._search_regex(
|
|
||||||
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
|
|
||||||
webpage, 'js url')
|
|
||||||
js = self._download_webpage(js_url, track_id, 'Downloading JS')
|
|
||||||
# Known plaintext attack
|
|
||||||
if encrypted_play_info:
|
|
||||||
kps = ['{"stream_url":']
|
|
||||||
kpa_target = encrypted_play_info
|
|
||||||
else:
|
|
||||||
kps = ['https://', 'http://']
|
|
||||||
kpa_target = compat_b64decode(info_json['streamInfo']['url'])
|
|
||||||
for kp in kps:
|
|
||||||
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
|
||||||
for quote in ["'", '"']:
|
|
||||||
key = self._search_regex(
|
|
||||||
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
|
|
||||||
js, 'encryption key', default=None)
|
|
||||||
if key is not None:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Failed to extract encryption key')
|
|
||||||
|
|
||||||
if encrypted_play_info is not None:
|
|
||||||
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
|
|
||||||
if message and 'stream_url' not in play_info:
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
|
||||||
song_url = play_info['stream_url']
|
|
||||||
formats = [{
|
|
||||||
'format_id': 'normal',
|
|
||||||
'url': song_url
|
|
||||||
}]
|
|
||||||
|
|
||||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
|
||||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
|
||||||
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
|
||||||
uploader_id = self._search_regex(
|
|
||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
view_count = str_to_int(self._search_regex(
|
|
||||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
|
||||||
r'/listeners/?">([0-9,.]+)</a>',
|
|
||||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
|
||||||
webpage, 'play count', default=None))
|
|
||||||
|
|
||||||
else:
|
|
||||||
title = info_json['name']
|
|
||||||
thumbnail = urljoin(
|
|
||||||
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
|
|
||||||
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
|
|
||||||
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
|
|
||||||
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
|
|
||||||
description = try_get(info_json, lambda x: x['description'])
|
|
||||||
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
|
|
||||||
|
|
||||||
stream_info = info_json['streamInfo']
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
def decrypt_url(f_url):
|
|
||||||
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
|
|
||||||
decrypted_url = self._decrypt_xor_cipher(k, f_url)
|
|
||||||
if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
|
||||||
return decrypted_url
|
|
||||||
|
|
||||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||||
format_url = stream_info.get(url_key)
|
format_url = stream_info.get(url_key)
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
decrypted = decrypt_url(compat_b64decode(format_url))
|
decrypted = self._decrypt_xor_cipher(
|
||||||
if not decrypted:
|
self._DECRYPTION_KEY, compat_b64decode(format_url))
|
||||||
continue
|
|
||||||
if url_key == 'hlsUrl':
|
if url_key == 'hlsUrl':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
|
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
@ -191,52 +155,117 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'http_chunk_size': 5242880,
|
'http_chunk_size': 5242880,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if not formats and cloudcast.get('isExclusive'):
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
|
||||||
|
node = edge.get('node') or {}
|
||||||
|
text = strip_or_none(node.get('comment'))
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
user = node.get('user') or {}
|
||||||
|
comments.append({
|
||||||
|
'author': user.get('displayName'),
|
||||||
|
'author_id': user.get('username'),
|
||||||
|
'text': text,
|
||||||
|
'timestamp': parse_iso8601(node.get('created')),
|
||||||
|
})
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for t in cloudcast.get('tags'):
|
||||||
|
tag = try_get(t, lambda x: x['tag']['name'], compat_str)
|
||||||
|
if not tag:
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
|
||||||
|
|
||||||
|
owner = cloudcast.get('owner') or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': description,
|
'description': cloudcast.get('description'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
|
||||||
'uploader': uploader,
|
'uploader': owner.get('displayName'),
|
||||||
'uploader_id': uploader_id,
|
'timestamp': parse_iso8601(cloudcast.get('publishDate')),
|
||||||
'view_count': view_count,
|
'uploader_id': owner.get('username'),
|
||||||
|
'uploader_url': owner.get('url'),
|
||||||
|
'duration': int_or_none(cloudcast.get('audioLength')),
|
||||||
|
'view_count': int_or_none(cloudcast.get('plays')),
|
||||||
|
'like_count': get_count('favorites'),
|
||||||
|
'repost_count': get_count('reposts'),
|
||||||
|
'comment_count': get_count('comments'),
|
||||||
|
'comments': comments,
|
||||||
|
'tags': tags,
|
||||||
|
'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class MixcloudPlaylistBaseIE(InfoExtractor):
|
class MixcloudPlaylistBaseIE(MixcloudBaseIE):
|
||||||
_PAGE_SIZE = 24
|
def _get_cloudcast(self, node):
|
||||||
|
return node
|
||||||
|
|
||||||
def _find_urls_in_page(self, page):
|
def _get_playlist_title(self, title, slug):
|
||||||
for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
|
return title
|
||||||
yield self.url_result(
|
|
||||||
compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
|
|
||||||
MixcloudIE.ie_key())
|
|
||||||
|
|
||||||
def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
|
def _real_extract(self, url):
|
||||||
real_page_number = real_page_number or current_page + 1
|
username, slug = re.match(self._VALID_URL, url).groups()
|
||||||
return self._download_webpage(
|
username = compat_urllib_parse_unquote(username)
|
||||||
'https://www.mixcloud.com/%s/' % path, video_id,
|
if not slug:
|
||||||
note='Download %s (page %d)' % (page_name, current_page + 1),
|
slug = 'uploads'
|
||||||
errnote='Unable to download %s' % page_name,
|
else:
|
||||||
query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
|
slug = compat_urllib_parse_unquote(slug)
|
||||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
playlist_id = '%s_%s' % (username, slug)
|
||||||
|
|
||||||
def _tracks_page_func(self, page, video_id, page_name, current_page):
|
is_playlist_type = self._ROOT_TYPE == 'playlist'
|
||||||
resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
|
playlist_type = 'items' if is_playlist_type else slug
|
||||||
|
list_filter = ''
|
||||||
|
|
||||||
for item in self._find_urls_in_page(resp):
|
has_next_page = True
|
||||||
yield item
|
entries = []
|
||||||
|
while has_next_page:
|
||||||
|
playlist = self._call_api(
|
||||||
|
self._ROOT_TYPE, '''%s
|
||||||
|
%s
|
||||||
|
%s(first: 100%s) {
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pageInfo {
|
||||||
|
endCursor
|
||||||
|
hasNextPage
|
||||||
|
}
|
||||||
|
}''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
|
||||||
|
playlist_id, username, slug if is_playlist_type else None)
|
||||||
|
|
||||||
def _get_user_description(self, page_content):
|
items = playlist.get(playlist_type) or {}
|
||||||
return self._html_search_regex(
|
for edge in items.get('edges', []):
|
||||||
r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
|
cloudcast = self._get_cloudcast(edge.get('node') or {})
|
||||||
page_content, 'user description', fatal=False)
|
cloudcast_url = cloudcast.get('url')
|
||||||
|
if not cloudcast_url:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
|
||||||
|
|
||||||
|
page_info = items['pageInfo']
|
||||||
|
has_next_page = page_info['hasNextPage']
|
||||||
|
list_filter = ', after: "%s"' % page_info['endCursor']
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id,
|
||||||
|
self._get_playlist_title(playlist[self._TITLE_KEY], slug),
|
||||||
|
playlist.get(self._DESCRIPTION_KEY))
|
||||||
|
|
||||||
|
|
||||||
class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
|
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
|
||||||
IE_NAME = 'mixcloud:user'
|
IE_NAME = 'mixcloud:user'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -244,68 +273,58 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_uploads',
|
'id': 'dholbach_uploads',
|
||||||
'title': 'Daniel Holbach (uploads)',
|
'title': 'Daniel Holbach (uploads)',
|
||||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 11,
|
'playlist_mincount': 36,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mixcloud.com/dholbach/uploads/',
|
'url': 'http://www.mixcloud.com/dholbach/uploads/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_uploads',
|
'id': 'dholbach_uploads',
|
||||||
'title': 'Daniel Holbach (uploads)',
|
'title': 'Daniel Holbach (uploads)',
|
||||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 11,
|
'playlist_mincount': 36,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mixcloud.com/dholbach/favorites/',
|
'url': 'http://www.mixcloud.com/dholbach/favorites/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_favorites',
|
'id': 'dholbach_favorites',
|
||||||
'title': 'Daniel Holbach (favorites)',
|
'title': 'Daniel Holbach (favorites)',
|
||||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||||
},
|
},
|
||||||
'params': {
|
# 'params': {
|
||||||
'playlist_items': '1-100',
|
# 'playlist_items': '1-100',
|
||||||
},
|
# },
|
||||||
'playlist_mincount': 100,
|
'playlist_mincount': 396,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mixcloud.com/dholbach/listens/',
|
'url': 'http://www.mixcloud.com/dholbach/listens/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach_listens',
|
'id': 'dholbach_listens',
|
||||||
'title': 'Daniel Holbach (listens)',
|
'title': 'Daniel Holbach (listens)',
|
||||||
'description': 'md5:def36060ac8747b3aabca54924897e47',
|
'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
|
||||||
},
|
},
|
||||||
'params': {
|
# 'params': {
|
||||||
'playlist_items': '1-100',
|
# 'playlist_items': '1-100',
|
||||||
|
# },
|
||||||
|
'playlist_mincount': 1623,
|
||||||
|
'skip': 'Large list',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mixcloud.com/FirstEar/stream/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'FirstEar_stream',
|
||||||
|
'title': 'First Ear (stream)',
|
||||||
|
'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 100,
|
'playlist_mincount': 271,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
_TITLE_KEY = 'displayName'
|
||||||
mobj = re.match(self._VALID_URL, url)
|
_DESCRIPTION_KEY = 'biog'
|
||||||
user_id = mobj.group('user')
|
_ROOT_TYPE = 'user'
|
||||||
list_type = mobj.group('type')
|
_NODE_TEMPLATE = '''slug
|
||||||
|
url'''
|
||||||
|
|
||||||
# if only a profile URL was supplied, default to download all uploads
|
def _get_playlist_title(self, title, slug):
|
||||||
if list_type is None:
|
return '%s (%s)' % (title, slug)
|
||||||
list_type = 'uploads'
|
|
||||||
|
|
||||||
video_id = '%s_%s' % (user_id, list_type)
|
|
||||||
|
|
||||||
profile = self._download_webpage(
|
|
||||||
'https://www.mixcloud.com/%s/' % user_id, video_id,
|
|
||||||
note='Downloading user profile',
|
|
||||||
errnote='Unable to download user profile')
|
|
||||||
|
|
||||||
username = self._og_search_title(profile)
|
|
||||||
description = self._get_user_description(profile)
|
|
||||||
|
|
||||||
entries = OnDemandPagedList(
|
|
||||||
functools.partial(
|
|
||||||
self._tracks_page_func,
|
|
||||||
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
|
|
||||||
self._PAGE_SIZE)
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, video_id, '%s (%s)' % (username, list_type), description)
|
|
||||||
|
|
||||||
|
|
||||||
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||||
@ -313,87 +332,20 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
|||||||
IE_NAME = 'mixcloud:playlist'
|
IE_NAME = 'mixcloud:playlist'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'RedBullThre3style_tokyo-finalists-2015',
|
|
||||||
'title': 'National Champions 2015',
|
|
||||||
'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 16,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
|
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
user_id = mobj.group('user')
|
|
||||||
playlist_id = mobj.group('playlist')
|
|
||||||
video_id = '%s_%s' % (user_id, playlist_id)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
url, user_id,
|
|
||||||
note='Downloading playlist page',
|
|
||||||
errnote='Unable to download playlist page')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
|
|
||||||
webpage, 'playlist title',
|
|
||||||
default=None) or self._og_search_title(webpage, fatal=False)
|
|
||||||
description = self._get_user_description(webpage)
|
|
||||||
|
|
||||||
entries = OnDemandPagedList(
|
|
||||||
functools.partial(
|
|
||||||
self._tracks_page_func,
|
|
||||||
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
|
|
||||||
self._PAGE_SIZE)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id, title, description)
|
|
||||||
|
|
||||||
|
|
||||||
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
|
|
||||||
IE_NAME = 'mixcloud:stream'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.mixcloud.com/FirstEar/stream/',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FirstEar',
|
'id': 'maxvibes_jazzcat-on-ness-radio',
|
||||||
'title': 'First Ear',
|
'title': 'Ness Radio sessions',
|
||||||
'description': 'Curators of good music\nfirstearmusic.com',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 192,
|
'playlist_mincount': 59,
|
||||||
}
|
}]
|
||||||
|
_TITLE_KEY = 'name'
|
||||||
|
_DESCRIPTION_KEY = 'description'
|
||||||
|
_ROOT_TYPE = 'playlist'
|
||||||
|
_NODE_TEMPLATE = '''cloudcast {
|
||||||
|
slug
|
||||||
|
url
|
||||||
|
}'''
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _get_cloudcast(self, node):
|
||||||
user_id = self._match_id(url)
|
return node.get('cloudcast') or {}
|
||||||
|
|
||||||
webpage = self._download_webpage(url, user_id)
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
prev_page_url = None
|
|
||||||
|
|
||||||
def _handle_page(page):
|
|
||||||
entries.extend(self._find_urls_in_page(page))
|
|
||||||
return self._search_regex(
|
|
||||||
r'm-next-page-url="([^"]+)"', page,
|
|
||||||
'next page URL', default=None)
|
|
||||||
|
|
||||||
next_page_url = _handle_page(webpage)
|
|
||||||
|
|
||||||
for idx in itertools.count(0):
|
|
||||||
if not next_page_url or prev_page_url == next_page_url:
|
|
||||||
break
|
|
||||||
|
|
||||||
prev_page_url = next_page_url
|
|
||||||
current_page = int(self._search_regex(
|
|
||||||
r'\?page=(\d+)', next_page_url, 'next page number'))
|
|
||||||
|
|
||||||
next_page_url = _handle_page(self._fetch_tracks_page(
|
|
||||||
'%s/stream' % user_id, user_id, 'stream', idx,
|
|
||||||
real_page_number=current_page))
|
|
||||||
|
|
||||||
username = self._og_search_title(webpage)
|
|
||||||
description = self._get_user_description(webpage)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, user_id, username, description)
|
|
||||||
|
@ -41,6 +41,14 @@ class MSNIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
|
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Vidible(AOL) Embed
|
||||||
|
'url': 'https://www.msn.com/en-us/video/animals/yellowstone-park-staffers-catch-deer-engaged-in-behavior-they-cant-explain/vi-AAGfdg1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Dailymotion Embed
|
||||||
|
'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -61,6 +69,18 @@ class MSNIE(InfoExtractor):
|
|||||||
webpage, 'error', group='error'))
|
webpage, 'error', group='error'))
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
|
||||||
|
player_name = video.get('playerName')
|
||||||
|
if player_name:
|
||||||
|
provider_id = video.get('providerId')
|
||||||
|
if provider_id:
|
||||||
|
if player_name == 'AOL':
|
||||||
|
return self.url_result(
|
||||||
|
'aol-video:' + provider_id, 'Aol', provider_id)
|
||||||
|
elif player_name == 'Dailymotion':
|
||||||
|
return self.url_result(
|
||||||
|
'https://www.dailymotion.com/video/' + provider_id,
|
||||||
|
'Dailymotion', provider_id)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -1,73 +1,56 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import os.path
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MySpassIE(InfoExtractor):
|
class MySpassIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?myspass\.de/.*'
|
_VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||||
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '11741',
|
'id': '11741',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
|
'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
|
||||||
'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2',
|
'title': '17.02.2013 - Die Highlights, Teil 2',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# video id is the last path element of the URL
|
|
||||||
# usually there is a trailing slash, so also try the second but last
|
|
||||||
url_path = compat_urllib_parse_urlparse(url).path
|
|
||||||
url_parent_path, video_id = os.path.split(url_path)
|
|
||||||
if not video_id:
|
|
||||||
_, video_id = os.path.split(url_parent_path)
|
|
||||||
|
|
||||||
# get metadata
|
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
|
||||||
metadata = self._download_xml(
|
metadata = self._download_xml(
|
||||||
metadata_url, video_id, transform_source=lambda s: s.strip())
|
'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
# extract values from metadata
|
title = xpath_text(metadata, 'title', fatal=True)
|
||||||
url_flv_el = metadata.find('url_flv')
|
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||||
if url_flv_el is None:
|
video_id_int = int(video_id)
|
||||||
raise ExtractorError('Unable to extract download url')
|
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
||||||
video_url = url_flv_el.text
|
group_int = int(group)
|
||||||
title_el = metadata.find('title')
|
if group_int > video_id_int:
|
||||||
if title_el is None:
|
video_url = video_url.replace(
|
||||||
raise ExtractorError('Unable to extract title')
|
group, compat_str(group_int // video_id_int))
|
||||||
title = title_el.text
|
|
||||||
format_id_el = metadata.find('format_id')
|
|
||||||
if format_id_el is None:
|
|
||||||
format = 'mp4'
|
|
||||||
else:
|
|
||||||
format = format_id_el.text
|
|
||||||
description_el = metadata.find('description')
|
|
||||||
if description_el is not None:
|
|
||||||
description = description_el.text
|
|
||||||
else:
|
|
||||||
description = None
|
|
||||||
imagePreview_el = metadata.find('imagePreview')
|
|
||||||
if imagePreview_el is not None:
|
|
||||||
thumbnail = imagePreview_el.text
|
|
||||||
else:
|
|
||||||
thumbnail = None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'format': format,
|
'thumbnail': xpath_text(metadata, 'imagePreview'),
|
||||||
'thumbnail': thumbnail,
|
'description': xpath_text(metadata, 'description'),
|
||||||
'description': description,
|
'duration': parse_duration(xpath_text(metadata, 'duration')),
|
||||||
|
'series': xpath_text(metadata, 'format'),
|
||||||
|
'season_number': int_or_none(xpath_text(metadata, 'season')),
|
||||||
|
'season_id': xpath_text(metadata, 'season_id'),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(xpath_text(metadata, 'episode')),
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ class NexxIE(InfoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_domain_id(webpage):
|
def _extract_domain_id(webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
|
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P<id>\d+)',
|
||||||
webpage)
|
webpage)
|
||||||
return mobj.group('id') if mobj else None
|
return mobj.group('id') if mobj else None
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ class NexxIE(InfoExtractor):
|
|||||||
domain_id = NexxIE._extract_domain_id(webpage)
|
domain_id = NexxIE._extract_domain_id(webpage)
|
||||||
if domain_id:
|
if domain_id:
|
||||||
for video_id in re.findall(
|
for video_id in re.findall(
|
||||||
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
|
r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)',
|
||||||
webpage):
|
webpage):
|
||||||
entries.append(
|
entries.append(
|
||||||
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
|
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
|
||||||
@ -410,8 +410,8 @@ class NexxIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NexxEmbedIE(InfoExtractor):
|
class NexxEmbedIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
|
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
|
||||||
'md5': '16746bfc28c42049492385c989b26c4a',
|
'md5': '16746bfc28c42049492385c989b26c4a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -420,7 +420,6 @@ class NexxEmbedIE(InfoExtractor):
|
|||||||
'title': 'Nervenkitzel Achterbahn',
|
'title': 'Nervenkitzel Achterbahn',
|
||||||
'alt_title': 'Karussellbauer in Deutschland',
|
'alt_title': 'Karussellbauer in Deutschland',
|
||||||
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
||||||
'release_year': 2005,
|
|
||||||
'creator': 'SPIEGEL TV',
|
'creator': 'SPIEGEL TV',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 2761,
|
'duration': 2761,
|
||||||
@ -431,7 +430,10 @@ class NexxEmbedIE(InfoExtractor):
|
|||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
|
@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
xpath_text,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -47,10 +48,10 @@ class NTVRuIE(InfoExtractor):
|
|||||||
'duration': 1496,
|
'duration': 1496,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
|
||||||
'md5': 'f825770930937aa7e5aca0dc0d29319a',
|
'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1007609',
|
'id': '1126480',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Остросюжетный фильм «Кома»',
|
'title': 'Остросюжетный фильм «Кома»',
|
||||||
'description': 'Остросюжетный фильм «Кома»',
|
'description': 'Остросюжетный фильм «Кома»',
|
||||||
@ -68,6 +69,10 @@ class NTVRuIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^http://.*\.jpg',
|
'thumbnail': r're:^http://.*\.jpg',
|
||||||
'duration': 2590,
|
'duration': 2590,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Schemeless file URL
|
||||||
|
'url': 'https://www.ntv.ru/video/1797442',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_VIDEO_ID_REGEXES = [
|
_VIDEO_ID_REGEXES = [
|
||||||
@ -96,37 +101,31 @@ class NTVRuIE(InfoExtractor):
|
|||||||
'http://www.ntv.ru/vi%s/' % video_id,
|
'http://www.ntv.ru/vi%s/' % video_id,
|
||||||
video_id, 'Downloading video XML')
|
video_id, 'Downloading video XML')
|
||||||
|
|
||||||
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
|
title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
|
||||||
description = clean_html(xpath_text(player, './data/description', 'description'))
|
|
||||||
|
|
||||||
video = player.find('./data/video')
|
video = player.find('./data/video')
|
||||||
video_id = xpath_text(video, './id', 'video id')
|
|
||||||
thumbnail = xpath_text(video, './splash', 'thumbnail')
|
|
||||||
duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
|
|
||||||
view_count = int_or_none(xpath_text(video, './views', 'view count'))
|
|
||||||
|
|
||||||
token = self._download_webpage(
|
|
||||||
'http://stat.ntv.ru/services/access/token',
|
|
||||||
video_id, 'Downloading access token')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ['', 'hi', 'webm']:
|
for format_id in ['', 'hi', 'webm']:
|
||||||
file_ = video.find('./%sfile' % format_id)
|
file_ = xpath_text(video, './%sfile' % format_id)
|
||||||
if file_ is None:
|
if not file_:
|
||||||
continue
|
continue
|
||||||
size = video.find('./%ssize' % format_id)
|
if file_.startswith('//'):
|
||||||
|
file_ = self._proto_relative_url(file_)
|
||||||
|
elif not file_.startswith('http'):
|
||||||
|
file_ = 'http://media.ntv.ru/vod/' + file_
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
|
'url': file_,
|
||||||
'filesize': int_or_none(size.text if size is not None else None),
|
'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': xpath_text(video, './id'),
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': xpath_text(video, './splash'),
|
||||||
'duration': duration,
|
'duration': int_or_none(xpath_text(video, './totaltime')),
|
||||||
'view_count': view_count,
|
'view_count': int_or_none(xpath_text(video, './views')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -4,12 +4,8 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..compat import compat_str
|
||||||
determine_ext,
|
from ..utils import js_to_json
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
mimetype2ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class OnionStudiosIE(InfoExtractor):
|
class OnionStudiosIE(InfoExtractor):
|
||||||
@ -17,14 +13,16 @@ class OnionStudiosIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
|
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
|
||||||
'md5': '719d1f8c32094b8c33902c17bcae5e34',
|
'md5': '5a118d466d62b5cd03647cf2c593977f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2937',
|
'id': '3459881',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hannibal charges forward, stops for a cocktail',
|
'title': 'Hannibal charges forward, stops for a cocktail',
|
||||||
|
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'The A.V. Club',
|
'uploader': 'a.v. club',
|
||||||
'uploader_id': 'the-av-club',
|
'upload_date': '20150619',
|
||||||
|
'timestamp': 1434728546,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
|
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
|
||||||
@ -44,38 +42,12 @@ class OnionStudiosIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_data = self._download_json(
|
webpage = self._download_webpage(
|
||||||
'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
|
'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
|
||||||
|
video_id)
|
||||||
title = video_data['title']
|
mcp_id = compat_str(self._parse_json(self._search_regex(
|
||||||
|
r'window\.mcpMapping\s*=\s*({.+?});', webpage,
|
||||||
formats = []
|
'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
|
||||||
for source in video_data.get('sources', []):
|
return self.url_result(
|
||||||
source_url = source.get('url')
|
'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
|
||||||
if not source_url:
|
'KinjaEmbed', mcp_id)
|
||||||
continue
|
|
||||||
ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
else:
|
|
||||||
tbr = int_or_none(source.get('bitrate'))
|
|
||||||
formats.append({
|
|
||||||
'format_id': ext + ('-%d' % tbr if tbr else ''),
|
|
||||||
'url': source_url,
|
|
||||||
'width': int_or_none(source.get('width')),
|
|
||||||
'tbr': tbr,
|
|
||||||
'ext': ext,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': video_data.get('poster_url'),
|
|
||||||
'uploader': video_data.get('channel_name'),
|
|
||||||
'uploader_id': video_data.get('channel_slug'),
|
|
||||||
'duration': float_or_none(video_data.get('duration', 1000)),
|
|
||||||
'tags': video_data.get('tags'),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
@ -6,7 +6,11 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -24,6 +28,7 @@ class PatreonIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*$',
|
'thumbnail': 're:^https?://.*$',
|
||||||
'timestamp': 1406473987,
|
'timestamp': 1406473987,
|
||||||
'upload_date': '20140727',
|
'upload_date': '20140727',
|
||||||
|
'uploader_id': '87145',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.patreon.com/creation?hid=754133',
|
'url': 'http://www.patreon.com/creation?hid=754133',
|
||||||
@ -90,7 +95,13 @@ class PatreonIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
post = self._download_json(
|
post = self._download_json(
|
||||||
'https://www.patreon.com/api/posts/' + video_id, video_id)
|
'https://www.patreon.com/api/posts/' + video_id, video_id, query={
|
||||||
|
'fields[media]': 'download_url,mimetype,size_bytes',
|
||||||
|
'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
|
||||||
|
'fields[user]': 'full_name,url',
|
||||||
|
'json-api-use-default-includes': 'false',
|
||||||
|
'include': 'media,user',
|
||||||
|
})
|
||||||
attributes = post['data']['attributes']
|
attributes = post['data']['attributes']
|
||||||
title = attributes['title'].strip()
|
title = attributes['title'].strip()
|
||||||
image = attributes.get('image') or {}
|
image = attributes.get('image') or {}
|
||||||
@ -104,33 +115,42 @@ class PatreonIE(InfoExtractor):
|
|||||||
'comment_count': int_or_none(attributes.get('comment_count')),
|
'comment_count': int_or_none(attributes.get('comment_count')),
|
||||||
}
|
}
|
||||||
|
|
||||||
def add_file(file_data):
|
|
||||||
file_url = file_data.get('url')
|
|
||||||
if file_url:
|
|
||||||
info.update({
|
|
||||||
'url': file_url,
|
|
||||||
'ext': determine_ext(file_data.get('name'), 'mp3'),
|
|
||||||
})
|
|
||||||
|
|
||||||
for i in post.get('included', []):
|
for i in post.get('included', []):
|
||||||
i_type = i.get('type')
|
i_type = i.get('type')
|
||||||
if i_type == 'attachment':
|
if i_type == 'media':
|
||||||
add_file(i.get('attributes') or {})
|
media_attributes = i.get('attributes') or {}
|
||||||
|
download_url = media_attributes.get('download_url')
|
||||||
|
ext = mimetype2ext(media_attributes.get('mimetype'))
|
||||||
|
if download_url and ext in KNOWN_EXTENSIONS:
|
||||||
|
info.update({
|
||||||
|
'ext': ext,
|
||||||
|
'filesize': int_or_none(media_attributes.get('size_bytes')),
|
||||||
|
'url': download_url,
|
||||||
|
})
|
||||||
elif i_type == 'user':
|
elif i_type == 'user':
|
||||||
user_attributes = i.get('attributes')
|
user_attributes = i.get('attributes')
|
||||||
if user_attributes:
|
if user_attributes:
|
||||||
info.update({
|
info.update({
|
||||||
'uploader': user_attributes.get('full_name'),
|
'uploader': user_attributes.get('full_name'),
|
||||||
|
'uploader_id': str_or_none(i.get('id')),
|
||||||
'uploader_url': user_attributes.get('url'),
|
'uploader_url': user_attributes.get('url'),
|
||||||
})
|
})
|
||||||
|
|
||||||
if not info.get('url'):
|
if not info.get('url'):
|
||||||
add_file(attributes.get('post_file') or {})
|
embed_url = try_get(attributes, lambda x: x['embed']['url'])
|
||||||
|
if embed_url:
|
||||||
if not info.get('url'):
|
|
||||||
info.update({
|
info.update({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': attributes['embed']['url'],
|
'url': embed_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not info.get('url'):
|
||||||
|
post_file = attributes['post_file']
|
||||||
|
ext = determine_ext(post_file.get('name'))
|
||||||
|
if ext in KNOWN_EXTENSIONS:
|
||||||
|
info.update({
|
||||||
|
'ext': ext,
|
||||||
|
'url': post_file['url'],
|
||||||
})
|
})
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
@ -17,12 +17,54 @@ class PeriscopeBaseIE(InfoExtractor):
|
|||||||
'https://api.periscope.tv/api/v2/%s' % method,
|
'https://api.periscope.tv/api/v2/%s' % method,
|
||||||
item_id, query=query)
|
item_id, query=query)
|
||||||
|
|
||||||
|
def _parse_broadcast_data(self, broadcast, video_id):
|
||||||
|
title = broadcast['status']
|
||||||
|
uploader = broadcast.get('user_display_name') or broadcast.get('username')
|
||||||
|
title = '%s - %s' % (uploader, title) if uploader else title
|
||||||
|
is_live = broadcast.get('state').lower() == 'running'
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': broadcast[image],
|
||||||
|
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': broadcast.get('id') or video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'timestamp': parse_iso8601(broadcast.get('created_at')),
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'view_count': int_or_none(broadcast.get('total_watched')),
|
||||||
|
'tags': broadcast.get('tags'),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_common_format_info(broadcast):
|
||||||
|
return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _add_width_and_height(f, width, height):
|
||||||
|
for key, val in (('width', width), ('height', height)):
|
||||||
|
if not f.get(key):
|
||||||
|
f[key] = val
|
||||||
|
|
||||||
|
def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native'
|
||||||
|
if state in ('ended', 'timed_out') else 'm3u8',
|
||||||
|
m3u8_id=format_id, fatal=fatal)
|
||||||
|
if len(m3u8_formats) == 1:
|
||||||
|
self._add_width_and_height(m3u8_formats[0], width, height)
|
||||||
|
return m3u8_formats
|
||||||
|
|
||||||
|
|
||||||
class PeriscopeIE(PeriscopeBaseIE):
|
class PeriscopeIE(PeriscopeBaseIE):
|
||||||
IE_DESC = 'Periscope'
|
IE_DESC = 'Periscope'
|
||||||
IE_NAME = 'periscope'
|
IE_NAME = 'periscope'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
|
||||||
# Alive example URLs can be found here http://onperiscope.com/
|
# Alive example URLs can be found here https://www.periscope.tv/
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||||
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
||||||
@ -61,21 +103,9 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
'accessVideoPublic', {'broadcast_id': token}, token)
|
'accessVideoPublic', {'broadcast_id': token}, token)
|
||||||
|
|
||||||
broadcast = stream['broadcast']
|
broadcast = stream['broadcast']
|
||||||
title = broadcast['status']
|
info = self._parse_broadcast_data(broadcast, token)
|
||||||
|
|
||||||
uploader = broadcast.get('user_display_name') or broadcast.get('username')
|
|
||||||
uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
|
|
||||||
|
|
||||||
title = '%s - %s' % (uploader, title) if uploader else title
|
|
||||||
state = broadcast.get('state').lower()
|
state = broadcast.get('state').lower()
|
||||||
if state == 'running':
|
|
||||||
title = self._live_title(title)
|
|
||||||
timestamp = parse_iso8601(broadcast.get('created_at'))
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': broadcast[image],
|
|
||||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
|
||||||
|
|
||||||
width = int_or_none(broadcast.get('width'))
|
width = int_or_none(broadcast.get('width'))
|
||||||
height = int_or_none(broadcast.get('height'))
|
height = int_or_none(broadcast.get('height'))
|
||||||
|
|
||||||
@ -92,32 +122,20 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
continue
|
continue
|
||||||
video_urls.add(video_url)
|
video_urls.add(video_url)
|
||||||
if format_id != 'rtmp':
|
if format_id != 'rtmp':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_pscp_m3u8_formats(
|
||||||
video_url, token, 'mp4',
|
video_url, token, format_id, state, width, height, False)
|
||||||
entry_protocol='m3u8_native'
|
|
||||||
if state in ('ended', 'timed_out') else 'm3u8',
|
|
||||||
m3u8_id=format_id, fatal=False)
|
|
||||||
if len(m3u8_formats) == 1:
|
|
||||||
add_width_and_height(m3u8_formats[0])
|
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
continue
|
continue
|
||||||
rtmp_format = {
|
rtmp_format = {
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||||
}
|
}
|
||||||
add_width_and_height(rtmp_format)
|
self._add_width_and_height(rtmp_format)
|
||||||
formats.append(rtmp_format)
|
formats.append(rtmp_format)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info['formats'] = formats
|
||||||
'id': broadcast.get('id') or token,
|
return info
|
||||||
'title': title,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class PeriscopeUserIE(PeriscopeBaseIE):
|
class PeriscopeUserIE(PeriscopeBaseIE):
|
||||||
|
@ -1,170 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
unescapeHTML,
|
|
||||||
qualities,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Revision3EmbedIE(InfoExtractor):
|
|
||||||
IE_NAME = 'revision3:embed'
|
|
||||||
_VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
|
|
||||||
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '67558',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The Pros & Cons Of Zoos',
|
|
||||||
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
|
|
||||||
'uploader_id': 'dnews',
|
|
||||||
'uploader': 'DNews',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
playlist_id = mobj.group('playlist_id')
|
|
||||||
playlist_type = mobj.group('playlist_type') or 'video_id'
|
|
||||||
video_data = self._download_json(
|
|
||||||
'http://revision3.com/api/getPlaylist.json', playlist_id, query={
|
|
||||||
'api_key': self._API_KEY,
|
|
||||||
'codecs': 'h264,vp8,theora',
|
|
||||||
playlist_type: playlist_id,
|
|
||||||
})['items'][0]
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for vcodec, media in video_data['media'].items():
|
|
||||||
for quality_id, quality in media.items():
|
|
||||||
if quality_id == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
quality['url'], playlist_id, 'mp4',
|
|
||||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': quality['url'],
|
|
||||||
'format_id': '%s-%s' % (vcodec, quality_id),
|
|
||||||
'tbr': int_or_none(quality.get('bitrate')),
|
|
||||||
'vcodec': vcodec,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': playlist_id,
|
|
||||||
'title': unescapeHTML(video_data['title']),
|
|
||||||
'description': unescapeHTML(video_data.get('summary')),
|
|
||||||
'uploader': video_data.get('show', {}).get('name'),
|
|
||||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Revision3IE(InfoExtractor):
|
|
||||||
IE_NAME = 'revision'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
|
|
||||||
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '71089',
|
|
||||||
'display_id': 'technobuffalo/5-google-predictions-for-2016',
|
|
||||||
'ext': 'webm',
|
|
||||||
'title': '5 Google Predictions for 2016',
|
|
||||||
'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.',
|
|
||||||
'upload_date': '20151228',
|
|
||||||
'timestamp': 1451325600,
|
|
||||||
'duration': 187,
|
|
||||||
'uploader': 'TechnoBuffalo',
|
|
||||||
'uploader_id': 'technobuffalo',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# Show
|
|
||||||
'url': 'http://revision3.com/variant',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# Tag
|
|
||||||
'url': 'http://revision3.com/vr',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
site = domain.split('.')[0]
|
|
||||||
page_info = self._download_json(
|
|
||||||
self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
|
|
||||||
|
|
||||||
page_data = page_info['data']
|
|
||||||
page_type = page_data['type']
|
|
||||||
if page_type in ('episode', 'embed'):
|
|
||||||
show_data = page_data['show']['data']
|
|
||||||
page_id = compat_str(page_data['id'])
|
|
||||||
video_id = compat_str(page_data['video']['data']['id'])
|
|
||||||
|
|
||||||
preference = qualities(['mini', 'small', 'medium', 'large'])
|
|
||||||
thumbnails = [{
|
|
||||||
'url': image_url,
|
|
||||||
'id': image_id,
|
|
||||||
'preference': preference(image_id)
|
|
||||||
} for image_id, image_url in page_data.get('images', {}).items()]
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': page_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': unescapeHTML(page_data['name']),
|
|
||||||
'description': unescapeHTML(page_data.get('summary')),
|
|
||||||
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
|
|
||||||
'author': page_data.get('author'),
|
|
||||||
'uploader': show_data.get('name'),
|
|
||||||
'uploader_id': show_data.get('slug'),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'extractor_key': site,
|
|
||||||
}
|
|
||||||
|
|
||||||
if page_type == 'embed':
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': page_data['video']['data']['embed'],
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'revision3:%s' % video_id,
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
else:
|
|
||||||
list_data = page_info[page_type]['data']
|
|
||||||
episodes_data = page_info['episodes']['data']
|
|
||||||
num_episodes = page_info['meta']['totalEpisodes']
|
|
||||||
processed_episodes = 0
|
|
||||||
entries = []
|
|
||||||
page_num = 1
|
|
||||||
while True:
|
|
||||||
entries.extend([{
|
|
||||||
'_type': 'url',
|
|
||||||
'url': 'http://%s%s' % (domain, episode['path']),
|
|
||||||
'id': compat_str(episode['id']),
|
|
||||||
'ie_key': 'Revision3',
|
|
||||||
'extractor_key': site,
|
|
||||||
} for episode in episodes_data])
|
|
||||||
processed_episodes += len(episodes_data)
|
|
||||||
if processed_episodes == num_episodes:
|
|
||||||
break
|
|
||||||
page_num += 1
|
|
||||||
episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % (
|
|
||||||
domain, display_id + '/' + compat_str(page_num), domain),
|
|
||||||
display_id)['episodes']['data']
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, compat_str(list_data['id']),
|
|
||||||
list_data.get('name'), list_data.get('summary'))
|
|
@ -1,8 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
@ -18,7 +16,6 @@ from ..utils import (
|
|||||||
|
|
||||||
class RoosterTeethIE(InfoExtractor):
|
class RoosterTeethIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
|
||||||
_LOGIN_URL = 'https://roosterteeth.com/login'
|
|
||||||
_NETRC_MACHINE = 'roosterteeth'
|
_NETRC_MACHINE = 'roosterteeth'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||||
@ -53,48 +50,40 @@ class RoosterTeethIE(InfoExtractor):
|
|||||||
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
login_page = self._download_webpage(
|
try:
|
||||||
self._LOGIN_URL, None,
|
self._download_json(
|
||||||
note='Downloading login page',
|
'https://auth.roosterteeth.com/oauth/token',
|
||||||
errnote='Unable to download login page')
|
None, 'Logging in', data=urlencode_postdata({
|
||||||
|
'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
|
||||||
login_form = self._hidden_inputs(login_page)
|
'grant_type': 'password',
|
||||||
|
|
||||||
login_form.update({
|
|
||||||
'username': username,
|
'username': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
})
|
}))
|
||||||
|
except ExtractorError as e:
|
||||||
login_request = self._download_webpage(
|
msg = 'Unable to login'
|
||||||
self._LOGIN_URL, None,
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
note='Logging in',
|
resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
|
||||||
data=urlencode_postdata(login_form),
|
if resp:
|
||||||
headers={
|
error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
|
||||||
'Referer': self._LOGIN_URL,
|
|
||||||
})
|
|
||||||
|
|
||||||
if not any(re.search(p, login_request) for p in (
|
|
||||||
r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
|
|
||||||
r'>Sign Out<')):
|
|
||||||
error = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
|
|
||||||
login_request, 'alert', default=None, group='error')
|
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
msg += ': ' + error
|
||||||
raise ExtractorError('Unable to log in')
|
self.report_warning(msg)
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
|
if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
|
||||||
|
return
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id
|
api_episode_url = self._EPISODE_BASE_URL + display_id
|
||||||
|
|
||||||
try:
|
try:
|
||||||
m3u8_url = self._download_json(
|
m3u8_url = self._download_json(
|
||||||
|
144
youtube_dl/extractor/scte.py
Normal file
144
youtube_dl/extractor/scte.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
decode_packed_codes,
|
||||||
|
ExtractorError,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SCTEBaseIE(InfoExtractor):
|
||||||
|
_LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
|
||||||
|
_NETRC_MACHINE = 'scte'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_popup = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading login popup')
|
||||||
|
|
||||||
|
def is_logged(webpage):
|
||||||
|
return any(re.search(p, webpage) for p in (
|
||||||
|
r'class=["\']welcome\b', r'>Sign Out<'))
|
||||||
|
|
||||||
|
# already logged in
|
||||||
|
if is_logged(login_popup):
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_popup)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
|
||||||
|
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
|
||||||
|
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
|
||||||
|
})
|
||||||
|
|
||||||
|
response = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Logging in',
|
||||||
|
data=urlencode_postdata(login_form))
|
||||||
|
|
||||||
|
if '|pageRedirect|' not in response and not is_logged(response):
|
||||||
|
error = self._html_search_regex(
|
||||||
|
r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
|
||||||
|
response, 'error message', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class SCTEIE(SCTEBaseIE):
|
||||||
|
_VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Introduction to DOCSIS Engineering Professional',
|
||||||
|
'id': '31484',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
'skip': 'Requires account credentials',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
|
||||||
|
content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
|
||||||
|
context = decode_packed_codes(self._download_webpage(
|
||||||
|
'%smobile/data.js' % content_base, video_id))
|
||||||
|
|
||||||
|
data = self._parse_xml(
|
||||||
|
self._search_regex(
|
||||||
|
r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for asset in data.findall('.//asset'):
|
||||||
|
asset_url = asset.get('url')
|
||||||
|
if not asset_url or not asset_url.endswith('.mp4'):
|
||||||
|
continue
|
||||||
|
asset_id = self._search_regex(
|
||||||
|
r'video_([^_]+)_', asset_url, 'asset id', default=None)
|
||||||
|
if not asset_id:
|
||||||
|
continue
|
||||||
|
entries.append({
|
||||||
|
'id': asset_id,
|
||||||
|
'title': title,
|
||||||
|
'url': content_base + asset_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id, title)
|
||||||
|
|
||||||
|
|
||||||
|
class SCTECourseIE(SCTEBaseIE):
|
||||||
|
_VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learning.scte.org/course/view.php?id=3639',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learning.scte.org/course/view.php?id=3073',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1>(.+?)</h1>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'''(?x)
|
||||||
|
<a[^>]+
|
||||||
|
href=(["\'])
|
||||||
|
(?P<url>
|
||||||
|
https?://learning\.scte\.org/mod/
|
||||||
|
(?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
|
||||||
|
\bid=\d+
|
||||||
|
)
|
||||||
|
''',
|
||||||
|
webpage):
|
||||||
|
item_url = mobj.group('url')
|
||||||
|
if item_url == url:
|
||||||
|
continue
|
||||||
|
ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
|
||||||
|
else SCTECourseIE.ie_key())
|
||||||
|
entries.append(self.url_result(item_url, ie=ie))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, title)
|
@ -4,34 +4,37 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
get_element_by_class,
|
||||||
|
strip_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SeekerIE(InfoExtractor):
|
class SeekerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# player.loadRevision3Item
|
|
||||||
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
|
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
|
||||||
'md5': '30c1dc4030cc715cf05b423d0947ac18',
|
'md5': '897d44bbe0d8986a2ead96de565a92db',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '76243',
|
'id': 'Elrn3gnY',
|
||||||
'ext': 'webm',
|
'ext': 'mp4',
|
||||||
'title': 'Should Trump Be Required To Release His Tax Returns?',
|
'title': 'Should Trump Be Required To Release His Tax Returns?',
|
||||||
'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
|
'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
|
||||||
'uploader': 'Seeker Daily',
|
'timestamp': 1490090165,
|
||||||
'uploader_id': 'seekerdaily',
|
'upload_date': '20170321',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
|
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
|
'md5': '0497b9f20495174be73ae136949707d2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '67558',
|
'id': 'FihYQ8AE',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Pros & Cons Of Zoos',
|
'title': 'The Pros & Cons Of Zoos',
|
||||||
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
|
'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
|
||||||
'uploader': 'DNews',
|
'timestamp': 1490039133,
|
||||||
'uploader_id': 'dnews',
|
'upload_date': '20170320',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id, article_id = re.match(self._VALID_URL, url).groups()
|
display_id, article_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
|
entries = []
|
||||||
if mobj:
|
for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
|
||||||
playlist_type, playlist_id = mobj.groups()
|
entries.append(self.url_result(
|
||||||
return self.url_result(
|
'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
|
||||||
'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
|
|
||||||
else:
|
|
||||||
entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
|
|
||||||
r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
|
entries, article_id,
|
||||||
|
self._og_search_title(webpage),
|
||||||
|
strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))
|
||||||
|
@ -276,7 +276,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
if secret_token:
|
if secret_token:
|
||||||
query['secret_token'] = secret_token
|
query['secret_token'] = secret_token
|
||||||
|
|
||||||
if info.get('downloadable'):
|
if info.get('downloadable') and info.get('has_downloads_left'):
|
||||||
format_url = update_url_query(
|
format_url = update_url_query(
|
||||||
info.get('download_url') or track_base_url + '/download', query)
|
info.get('download_url') or track_base_url + '/download', query)
|
||||||
format_urls.add(format_url)
|
format_urls.add(format_url)
|
||||||
|
@ -4,15 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_urllib_parse_urlparse
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
compat_str,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -20,20 +15,20 @@ class STVPlayerIE(InfoExtractor):
|
|||||||
IE_NAME = 'stv:player'
|
IE_NAME = 'stv:player'
|
||||||
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
|
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/',
|
'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
|
||||||
'md5': '2ad867d4afd641fa14187596e0fbc91b',
|
'md5': '5adf9439c31d554f8be0707c7abe7e0a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6016487034001',
|
'id': '5333973339001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20190321',
|
'upload_date': '20170301',
|
||||||
'title': 'Interview with the cast ahead of new Victoria',
|
'title': '60 seconds on set with Laura Norton',
|
||||||
'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.',
|
'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!",
|
||||||
'timestamp': 1553179628,
|
'timestamp': 1488388054,
|
||||||
'uploader_id': '1486976045',
|
'uploader_id': '1486976045',
|
||||||
},
|
},
|
||||||
'skip': 'this resource is unavailable outside of the UK',
|
'skip': 'this resource is unavailable outside of the UK',
|
||||||
}
|
}
|
||||||
_PUBLISHER_ID = '1486976045'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
|
||||||
_PTYPE_MAP = {
|
_PTYPE_MAP = {
|
||||||
'episode': 'episodes',
|
'episode': 'episodes',
|
||||||
'video': 'shortform',
|
'video': 'shortform',
|
||||||
@ -41,31 +36,14 @@ class STVPlayerIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
ptype, video_id = re.match(self._VALID_URL, url).groups()
|
ptype, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex(
|
|
||||||
r'itemprop="embedURL"[^>]+href="([^"]+)',
|
|
||||||
webpage, 'embed URL', default=None)).query)
|
|
||||||
publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID
|
|
||||||
|
|
||||||
player_attr = extract_attributes(self._search_regex(
|
|
||||||
r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {}
|
|
||||||
|
|
||||||
info = {}
|
|
||||||
duration = ref_id = series = video_id = None
|
|
||||||
api_ref_id = player_attr.get('data-player-api-refid')
|
|
||||||
if api_ref_id:
|
|
||||||
resp = self._download_json(
|
resp = self._download_json(
|
||||||
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id),
|
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
|
||||||
api_ref_id, fatal=False)
|
video_id)
|
||||||
if resp:
|
|
||||||
result = resp.get('results') or {}
|
result = resp['results']
|
||||||
video = result.get('video') or {}
|
video = result['video']
|
||||||
video_id = str_or_none(video.get('id'))
|
video_id = compat_str(video['id'])
|
||||||
ref_id = video.get('guid')
|
|
||||||
duration = video.get('length')
|
|
||||||
programme = result.get('programme') or {}
|
|
||||||
series = programme.get('name') or programme.get('shortName')
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
_subtitles = result.get('_subtitles') or {}
|
_subtitles = result.get('_subtitles') or {}
|
||||||
for ext, sub_url in _subtitles.items():
|
for ext, sub_url in _subtitles.items():
|
||||||
@ -73,22 +51,17 @@ class STVPlayerIE(InfoExtractor):
|
|||||||
'ext': 'vtt' if ext == 'webvtt' else ext,
|
'ext': 'vtt' if ext == 'webvtt' else ext,
|
||||||
'url': sub_url,
|
'url': sub_url,
|
||||||
})
|
})
|
||||||
info.update({
|
|
||||||
|
programme = result.get('programme') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id,
|
||||||
'description': result.get('summary'),
|
'description': result.get('summary'),
|
||||||
|
'duration': float_or_none(video.get('length'), 1000),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'view_count': int_or_none(result.get('views')),
|
'view_count': int_or_none(result.get('views')),
|
||||||
})
|
'series': programme.get('name') or programme.get('shortName'),
|
||||||
if not video_id:
|
|
||||||
video_id = qs.get('videoId', [None])[0] or self._search_regex(
|
|
||||||
r'<link\s+itemprop="url"\s+href="(\d+)"',
|
|
||||||
webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid'])
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'duration': float_or_none(duration or player_attr.get('data-duration'), 1000),
|
|
||||||
'id': video_id,
|
|
||||||
'ie_key': 'BrightcoveNew',
|
'ie_key': 'BrightcoveNew',
|
||||||
'series': series or player_attr.get('data-programme-name'),
|
}
|
||||||
'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id),
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
@ -84,6 +84,19 @@ class TeamcocoIE(TurnerBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
_RECORD_TEMPL = '''id
|
||||||
|
title
|
||||||
|
teaser
|
||||||
|
publishOn
|
||||||
|
thumb {
|
||||||
|
preview
|
||||||
|
}
|
||||||
|
tags {
|
||||||
|
name
|
||||||
|
}
|
||||||
|
duration
|
||||||
|
turnerMediaId
|
||||||
|
turnerMediaAuthToken'''
|
||||||
|
|
||||||
def _graphql_call(self, query_template, object_type, object_id):
|
def _graphql_call(self, query_template, object_type, object_id):
|
||||||
find_object = 'find' + object_type
|
find_object = 'find' + object_type
|
||||||
@ -98,35 +111,35 @@ class TeamcocoIE(TurnerBaseIE):
|
|||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
response = self._graphql_call('''{
|
response = self._graphql_call('''{
|
||||||
%s(slug: "%s") {
|
%%s(slug: "%%s") {
|
||||||
... on RecordSlug {
|
... on RecordSlug {
|
||||||
record {
|
record {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
... on PageSlug {
|
||||||
|
child {
|
||||||
id
|
id
|
||||||
title
|
|
||||||
teaser
|
|
||||||
publishOn
|
|
||||||
thumb {
|
|
||||||
preview
|
|
||||||
}
|
|
||||||
file {
|
|
||||||
url
|
|
||||||
}
|
|
||||||
tags {
|
|
||||||
name
|
|
||||||
}
|
|
||||||
duration
|
|
||||||
turnerMediaId
|
|
||||||
turnerMediaAuthToken
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
... on NotFoundSlug {
|
... on NotFoundSlug {
|
||||||
status
|
status
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}''', 'Slug', display_id)
|
}''' % self._RECORD_TEMPL, 'Slug', display_id)
|
||||||
if response.get('status'):
|
if response.get('status'):
|
||||||
raise ExtractorError('This video is no longer available.', expected=True)
|
raise ExtractorError('This video is no longer available.', expected=True)
|
||||||
|
|
||||||
|
child = response.get('child')
|
||||||
|
if child:
|
||||||
|
record = self._graphql_call('''{
|
||||||
|
%%s(id: "%%s") {
|
||||||
|
... on Video {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % self._RECORD_TEMPL, 'Record', child['id'])
|
||||||
|
else:
|
||||||
record = response['record']
|
record = response['record']
|
||||||
video_id = record['id']
|
video_id = record['id']
|
||||||
|
|
||||||
@ -150,25 +163,21 @@ class TeamcocoIE(TurnerBaseIE):
|
|||||||
'accessTokenType': 'jws',
|
'accessTokenType': 'jws',
|
||||||
}))
|
}))
|
||||||
else:
|
else:
|
||||||
d = self._download_json(
|
video_sources = self._download_json(
|
||||||
'https://teamcoco.com/_truman/d/' + video_id,
|
'https://teamcoco.com/_truman/d/' + video_id,
|
||||||
video_id, fatal=False) or {}
|
video_id)['meta']['src']
|
||||||
video_sources = d.get('meta') or {}
|
if isinstance(video_sources, dict):
|
||||||
if not video_sources:
|
video_sources = video_sources.values()
|
||||||
video_sources = self._graphql_call('''{
|
|
||||||
%s(id: "%s") {
|
|
||||||
src
|
|
||||||
}
|
|
||||||
}''', 'RecordVideoSource', video_id) or {}
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
|
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
|
||||||
for format_id, src in video_sources.get('src', {}).items():
|
for src in video_sources:
|
||||||
if not isinstance(src, dict):
|
if not isinstance(src, dict):
|
||||||
continue
|
continue
|
||||||
src_url = src.get('src')
|
src_url = src.get('src')
|
||||||
if not src_url:
|
if not src_url:
|
||||||
continue
|
continue
|
||||||
|
format_id = src.get('label')
|
||||||
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
|
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
|
||||||
if format_id == 'hls' or ext == 'm3u8':
|
if format_id == 'hls' or ext == 'm3u8':
|
||||||
# compat_urllib_parse.urljoin does not work here
|
# compat_urllib_parse.urljoin does not work here
|
||||||
@ -190,9 +199,6 @@ class TeamcocoIE(TurnerBaseIE):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': get_quality(format_id),
|
'quality': get_quality(format_id),
|
||||||
})
|
})
|
||||||
if not formats:
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
record['file']['url'], video_id, 'mp4', fatal=False)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
info['formats'] = formats
|
info['formats'] = formats
|
||||||
|
|
||||||
|
@ -4,21 +4,25 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
remove_end,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelegraafIE(InfoExtractor):
|
class TelegraafIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '24353229',
|
'id': 'gaMItuoSeUg2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Tikibad ontruimd wegens brand',
|
'title': 'Historisch scheepswrak slaat na 100 jaar los',
|
||||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 33,
|
'duration': 55,
|
||||||
|
'timestamp': 1572805527,
|
||||||
|
'upload_date': '20191103',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@ -27,23 +31,30 @@ class TelegraafIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
article_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_id = self._download_json(
|
||||||
|
'https://www.telegraaf.nl/graphql', article_id, query={
|
||||||
|
'query': '''{
|
||||||
|
article(uid: %s) {
|
||||||
|
videos {
|
||||||
|
videoId
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % article_id,
|
||||||
|
})['data']['article']['videos'][0]['videoId']
|
||||||
|
|
||||||
player_url = self._html_search_regex(
|
item = self._download_json(
|
||||||
r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
|
'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
|
||||||
player_page = self._download_webpage(
|
video_id)['items'][0]
|
||||||
player_url, video_id, note='Download player webpage')
|
title = item['title']
|
||||||
playlist_url = self._search_regex(
|
|
||||||
r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
|
|
||||||
playlist_data = self._download_json(playlist_url, video_id)
|
|
||||||
|
|
||||||
item = playlist_data['items'][0]
|
|
||||||
formats = []
|
formats = []
|
||||||
locations = item['locations']
|
locations = item.get('locations') or {}
|
||||||
for location in locations.get('adaptive', []):
|
for location in locations.get('adaptive', []):
|
||||||
manifest_url = location['src']
|
manifest_url = location.get('src')
|
||||||
|
if not manifest_url:
|
||||||
|
continue
|
||||||
ext = determine_ext(manifest_url)
|
ext = determine_ext(manifest_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@ -54,25 +65,25 @@ class TelegraafIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
self.report_warning('Unknown adaptive format %s' % ext)
|
self.report_warning('Unknown adaptive format %s' % ext)
|
||||||
for location in locations.get('progressive', []):
|
for location in locations.get('progressive', []):
|
||||||
|
src = try_get(location, lambda x: x['sources'][0]['src'])
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
label = location.get('label')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': location['sources'][0]['src'],
|
'url': src,
|
||||||
'width': location.get('width'),
|
'width': int_or_none(location.get('width')),
|
||||||
'height': location.get('height'),
|
'height': int_or_none(location.get('height')),
|
||||||
'format_id': 'http-%s' % location['label'],
|
'format_id': 'http' + ('-%s' % label if label else ''),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
duration = item.get('duration')
|
|
||||||
thumbnail = item.get('poster')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': item.get('description'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': duration,
|
'duration': int_or_none(item.get('duration')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': item.get('poster'),
|
||||||
|
'timestamp': parse_iso8601(item.get('datecreated'), ' '),
|
||||||
}
|
}
|
||||||
|
@ -344,9 +344,8 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
info['subtitles'] = {
|
info['subtitles'] = {
|
||||||
'rechat': [{
|
'rechat': [{
|
||||||
'url': update_url_query(
|
'url': update_url_query(
|
||||||
'https://rechat.twitch.tv/rechat-messages', {
|
'https://api.twitch.tv/v5/videos/%s/comments' % item_id, {
|
||||||
'video_id': 'v%s' % item_id,
|
'client_id': self._CLIENT_ID,
|
||||||
'start': info['timestamp'],
|
|
||||||
}),
|
}),
|
||||||
'ext': 'json',
|
'ext': 'json',
|
||||||
}],
|
}],
|
||||||
@ -644,7 +643,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
|
|
||||||
class TwitchClipsIE(TwitchBaseIE):
|
class TwitchClipsIE(TwitchBaseIE):
|
||||||
IE_NAME = 'twitch:clips'
|
IE_NAME = 'twitch:clips'
|
||||||
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
|
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
|
||||||
@ -667,6 +666,9 @@ class TwitchClipsIE(TwitchBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
|
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -4,32 +4,67 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
remove_end,
|
|
||||||
try_get,
|
try_get,
|
||||||
|
strip_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .periscope import PeriscopeIE
|
from .periscope import (
|
||||||
|
PeriscopeBaseIE,
|
||||||
|
PeriscopeIE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TwitterBaseIE(InfoExtractor):
|
class TwitterBaseIE(InfoExtractor):
|
||||||
|
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||||
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
|
||||||
|
_GUEST_TOKEN = None
|
||||||
|
|
||||||
|
def _extract_variant_formats(self, variant, video_id):
|
||||||
|
variant_url = variant.get('url')
|
||||||
|
if not variant_url:
|
||||||
|
return []
|
||||||
|
elif '.m3u8' in variant_url:
|
||||||
|
return self._extract_m3u8_formats(
|
||||||
|
variant_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False)
|
||||||
|
else:
|
||||||
|
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
|
||||||
|
f = {
|
||||||
|
'url': variant_url,
|
||||||
|
'format_id': 'http' + ('-%d' % tbr if tbr else ''),
|
||||||
|
'tbr': tbr,
|
||||||
|
}
|
||||||
|
self._search_dimensions_in_video_url(f, variant_url)
|
||||||
|
return [f]
|
||||||
|
|
||||||
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
|
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
|
||||||
vmap_data = self._download_xml(vmap_url, video_id)
|
vmap_data = self._download_xml(vmap_url, video_id)
|
||||||
video_url = xpath_text(vmap_data, './/MediaFile').strip()
|
formats = []
|
||||||
if determine_ext(video_url) == 'm3u8':
|
urls = []
|
||||||
return self._extract_m3u8_formats(
|
for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
|
||||||
video_url, video_id, ext='mp4', m3u8_id='hls',
|
video_variant.attrib['url'] = compat_urllib_parse_unquote(
|
||||||
entry_protocol='m3u8_native')
|
video_variant.attrib['url'])
|
||||||
return [{
|
urls.append(video_variant.attrib['url'])
|
||||||
'url': video_url,
|
formats.extend(self._extract_variant_formats(
|
||||||
}]
|
video_variant.attrib, video_id))
|
||||||
|
video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
|
||||||
|
if video_url not in urls:
|
||||||
|
formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
|
||||||
|
return formats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _search_dimensions_in_video_url(a_format, video_url):
|
def _search_dimensions_in_video_url(a_format, video_url):
|
||||||
@ -40,10 +75,30 @@ class TwitterBaseIE(InfoExtractor):
|
|||||||
'height': int(m.group('height')),
|
'height': int(m.group('height')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, query={}):
|
||||||
|
headers = {
|
||||||
|
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
|
||||||
|
}
|
||||||
|
if not self._GUEST_TOKEN:
|
||||||
|
self._GUEST_TOKEN = self._download_json(
|
||||||
|
self._API_BASE + 'guest/activate.json', video_id,
|
||||||
|
'Downloading guest token', data=b'',
|
||||||
|
headers=headers)['guest_token']
|
||||||
|
headers['x-guest-token'] = self._GUEST_TOKEN
|
||||||
|
try:
|
||||||
|
return self._download_json(
|
||||||
|
self._API_BASE + path, video_id, headers=headers, query=query)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
raise ExtractorError(self._parse_json(
|
||||||
|
e.cause.read().decode(),
|
||||||
|
video_id)['errors'][0]['message'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
class TwitterCardIE(TwitterBaseIE):
|
|
||||||
|
class TwitterCardIE(InfoExtractor):
|
||||||
IE_NAME = 'twitter:card'
|
IE_NAME = 'twitter:card'
|
||||||
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||||
@ -51,19 +106,28 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '560070183650213889',
|
'id': '560070183650213889',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Twitter web player',
|
'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
|
||||||
|
'uploader': 'Twitter',
|
||||||
|
'uploader_id': 'Twitter',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 30.033,
|
'duration': 30.033,
|
||||||
|
'timestamp': 1422366112,
|
||||||
|
'upload_date': '20150127',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
|
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
|
||||||
'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
|
'md5': '7137eca597f72b9abbe61e5ae0161399',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '623160978427936768',
|
'id': '623160978427936768',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Twitter web player',
|
'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
|
||||||
'thumbnail': r're:^https?://.*$',
|
'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
|
||||||
|
'uploader': 'NASA',
|
||||||
|
'uploader_id': 'NASA',
|
||||||
|
'timestamp': 1437408129,
|
||||||
|
'upload_date': '20150720',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -75,7 +139,7 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
'title': 'Ubuntu 11.10 Overview',
|
'title': 'Ubuntu 11.10 Overview',
|
||||||
'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
|
'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
|
||||||
'upload_date': '20111013',
|
'upload_date': '20111013',
|
||||||
'uploader': 'OMG! Ubuntu!',
|
'uploader': 'OMG! UBUNTU!',
|
||||||
'uploader_id': 'omgubuntu',
|
'uploader_id': 'omgubuntu',
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
@ -99,190 +163,30 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '705235433198714880',
|
'id': '705235433198714880',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Twitter web player',
|
'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
|
||||||
'thumbnail': r're:^https?://.*',
|
'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
|
||||||
|
'uploader': 'Brent Yarina',
|
||||||
|
'uploader_id': 'BTNBrentYarina',
|
||||||
|
'timestamp': 1456976204,
|
||||||
|
'upload_date': '20160303',
|
||||||
},
|
},
|
||||||
|
'skip': 'This content is no longer available.',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/i/videos/752274308186120192',
|
'url': 'https://twitter.com/i/videos/752274308186120192',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_API_BASE = 'https://api.twitter.com/1.1'
|
|
||||||
|
|
||||||
def _parse_media_info(self, media_info, video_id):
|
|
||||||
formats = []
|
|
||||||
for media_variant in media_info.get('variants', []):
|
|
||||||
media_url = media_variant['url']
|
|
||||||
if media_url.endswith('.m3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
|
|
||||||
elif media_url.endswith('.mpd'):
|
|
||||||
formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
|
|
||||||
else:
|
|
||||||
tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
|
|
||||||
a_format = {
|
|
||||||
'url': media_url,
|
|
||||||
'format_id': 'http-%d' % tbr if tbr else 'http',
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
# Reported bitRate may be zero
|
|
||||||
if not a_format['tbr']:
|
|
||||||
del a_format['tbr']
|
|
||||||
|
|
||||||
self._search_dimensions_in_video_url(a_format, media_url)
|
|
||||||
|
|
||||||
formats.append(a_format)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_mobile_formats(self, username, video_id):
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'https://mobile.twitter.com/%s/status/%s' % (username, video_id),
|
|
||||||
video_id, 'Downloading mobile webpage',
|
|
||||||
headers={
|
|
||||||
# A recent mobile UA is necessary for `gt` cookie
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0',
|
|
||||||
})
|
|
||||||
main_script_url = self._html_search_regex(
|
|
||||||
r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL')
|
|
||||||
main_script = self._download_webpage(
|
|
||||||
main_script_url, video_id, 'Downloading main script')
|
|
||||||
bearer_token = self._search_regex(
|
|
||||||
r'BEARER_TOKEN\s*:\s*"([^"]+)"',
|
|
||||||
main_script, 'bearer token')
|
|
||||||
# https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
|
|
||||||
api_data = self._download_json(
|
|
||||||
'%s/statuses/show/%s.json' % (self._API_BASE, video_id),
|
|
||||||
video_id, 'Downloading API data',
|
|
||||||
headers={
|
|
||||||
'Authorization': 'Bearer ' + bearer_token,
|
|
||||||
})
|
|
||||||
media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {}
|
|
||||||
return self._parse_media_info(media_info, video_id)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, video_id = re.search(self._VALID_URL, url).groups()
|
status_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
config = None
|
'https://twitter.com/statuses/' + status_id,
|
||||||
formats = []
|
TwitterIE.ie_key(), status_id)
|
||||||
duration = None
|
|
||||||
|
|
||||||
urls = [url]
|
|
||||||
if path.startswith('cards/'):
|
|
||||||
urls.append('https://twitter.com/i/videos/' + video_id)
|
|
||||||
|
|
||||||
for u in urls:
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
u, video_id, headers={'Referer': 'https://twitter.com/'})
|
|
||||||
|
|
||||||
iframe_url = self._html_search_regex(
|
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
|
||||||
webpage, 'video iframe', default=None)
|
|
||||||
if iframe_url:
|
|
||||||
return self.url_result(iframe_url)
|
|
||||||
|
|
||||||
config = self._parse_json(self._html_search_regex(
|
|
||||||
r'data-(?:player-)?config="([^"]+)"', webpage,
|
|
||||||
'data player config', default='{}'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
if config.get('source_type') == 'vine':
|
|
||||||
return self.url_result(config['player_url'], 'Vine')
|
|
||||||
|
|
||||||
periscope_url = PeriscopeIE._extract_url(webpage)
|
|
||||||
if periscope_url:
|
|
||||||
return self.url_result(periscope_url, PeriscopeIE.ie_key())
|
|
||||||
|
|
||||||
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
|
|
||||||
|
|
||||||
if video_url:
|
|
||||||
if determine_ext(video_url) == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
|
|
||||||
else:
|
|
||||||
f = {
|
|
||||||
'url': video_url,
|
|
||||||
}
|
|
||||||
|
|
||||||
self._search_dimensions_in_video_url(f, video_url)
|
|
||||||
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
|
|
||||||
if vmap_url:
|
|
||||||
formats.extend(
|
|
||||||
self._extract_formats_from_vmap_url(vmap_url, video_id))
|
|
||||||
|
|
||||||
media_info = None
|
|
||||||
|
|
||||||
for entity in config.get('status', {}).get('entities', []):
|
|
||||||
if 'mediaInfo' in entity:
|
|
||||||
media_info = entity['mediaInfo']
|
|
||||||
|
|
||||||
if media_info:
|
|
||||||
formats.extend(self._parse_media_info(media_info, video_id))
|
|
||||||
duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
|
|
||||||
|
|
||||||
username = config.get('user', {}).get('screen_name')
|
|
||||||
if username:
|
|
||||||
formats.extend(self._extract_mobile_formats(username, video_id))
|
|
||||||
|
|
||||||
if formats:
|
|
||||||
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
|
||||||
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
|
||||||
duration = float_or_none(config.get('duration'), scale=1000) or duration
|
|
||||||
break
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
headers = {
|
|
||||||
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
|
|
||||||
'Referer': url,
|
|
||||||
}
|
|
||||||
ct0 = self._get_cookies(url).get('ct0')
|
|
||||||
if ct0:
|
|
||||||
headers['csrf_token'] = ct0.value
|
|
||||||
guest_token = self._download_json(
|
|
||||||
'%s/guest/activate.json' % self._API_BASE, video_id,
|
|
||||||
'Downloading guest token', data=b'',
|
|
||||||
headers=headers)['guest_token']
|
|
||||||
headers['x-guest-token'] = guest_token
|
|
||||||
self._set_cookie('api.twitter.com', 'gt', guest_token)
|
|
||||||
config = self._download_json(
|
|
||||||
'%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
|
|
||||||
video_id, headers=headers)
|
|
||||||
track = config['track']
|
|
||||||
vmap_url = track.get('vmapUrl')
|
|
||||||
if vmap_url:
|
|
||||||
formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
|
|
||||||
else:
|
|
||||||
playback_url = track['playbackUrl']
|
|
||||||
if determine_ext(playback_url) == 'm3u8':
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
playback_url, video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
|
||||||
else:
|
|
||||||
formats = [{
|
|
||||||
'url': playback_url,
|
|
||||||
}]
|
|
||||||
title = 'Twitter web player'
|
|
||||||
thumbnail = config.get('posterImage')
|
|
||||||
duration = float_or_none(track.get('durationMs'), scale=1000)
|
|
||||||
|
|
||||||
self._remove_duplicate_formats(formats)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class TwitterIE(InfoExtractor):
|
class TwitterIE(TwitterBaseIE):
|
||||||
IE_NAME = 'twitter'
|
IE_NAME = 'twitter'
|
||||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
|
||||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
|
||||||
_TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||||
@ -291,10 +195,13 @@ class TwitterIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
|
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
|
||||||
'uploader': 'FREE THE NIPPLE',
|
'uploader': 'FREE THE NIPPLE',
|
||||||
'uploader_id': 'freethenipple',
|
'uploader_id': 'freethenipple',
|
||||||
'duration': 12.922,
|
'duration': 12.922,
|
||||||
|
'timestamp': 1442188653,
|
||||||
|
'upload_date': '20150913',
|
||||||
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
||||||
@ -316,19 +223,23 @@ class TwitterIE(InfoExtractor):
|
|||||||
'id': '665052190608723968',
|
'id': '665052190608723968',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
|
'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
|
||||||
'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."',
|
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
|
||||||
'uploader_id': 'starwars',
|
'uploader_id': 'starwars',
|
||||||
'uploader': 'Star Wars',
|
'uploader': 'Star Wars',
|
||||||
|
'timestamp': 1447395772,
|
||||||
|
'upload_date': '20151113',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '705235433198714880',
|
'id': '705235433198714880',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
|
'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
|
||||||
'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
|
'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
|
||||||
'uploader_id': 'BTNBrentYarina',
|
'uploader_id': 'BTNBrentYarina',
|
||||||
'uploader': 'Brent Yarina',
|
'uploader': 'Brent Yarina',
|
||||||
|
'timestamp': 1456976204,
|
||||||
|
'upload_date': '20160303',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
|
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
|
||||||
@ -340,12 +251,14 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '700207533655363584',
|
'id': '700207533655363584',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
|
'title': 'Simon Vertugo - BEAT PROD: @suhmeduh #Damndaniel',
|
||||||
'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'uploader': 'JG',
|
'uploader': 'Simon Vertugo',
|
||||||
'uploader_id': 'jaydingeer',
|
'uploader_id': 'simonvertugo',
|
||||||
'duration': 30.0,
|
'duration': 30.0,
|
||||||
|
'timestamp': 1455777459,
|
||||||
|
'upload_date': '20160218',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
||||||
@ -353,10 +266,9 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MIOxnrUteUd',
|
'id': 'MIOxnrUteUd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vince Mancini - Vine of the day',
|
'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
|
||||||
'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
'uploader': 'TAKUMA',
|
||||||
'uploader': 'Vince Mancini',
|
'uploader_id': '1004126642786242560',
|
||||||
'uploader_id': 'Filmdrunk',
|
|
||||||
'timestamp': 1402826626,
|
'timestamp': 1402826626,
|
||||||
'upload_date': '20140615',
|
'upload_date': '20140615',
|
||||||
},
|
},
|
||||||
@ -367,21 +279,22 @@ class TwitterIE(InfoExtractor):
|
|||||||
'id': '719944021058060289',
|
'id': '719944021058060289',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
|
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
|
||||||
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
|
||||||
'uploader_id': 'captainamerica',
|
'uploader_id': 'CaptainAmerica',
|
||||||
'uploader': 'Captain America',
|
'uploader': 'Captain America',
|
||||||
'duration': 3.17,
|
'duration': 3.17,
|
||||||
|
'timestamp': 1460483005,
|
||||||
|
'upload_date': '20160412',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
|
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1zqKVVlkqLaKB',
|
'id': '1zqKVVlkqLaKB',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence',
|
'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
|
||||||
'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"',
|
|
||||||
'upload_date': '20160923',
|
'upload_date': '20160923',
|
||||||
'uploader_id': 'OPP_HSD',
|
'uploader_id': '1PmKqpJdOJQoY',
|
||||||
'uploader': 'Sgt Kerry Schmidt',
|
'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
|
||||||
'timestamp': 1474613214,
|
'timestamp': 1474613214,
|
||||||
},
|
},
|
||||||
'add_ie': ['Periscope'],
|
'add_ie': ['Periscope'],
|
||||||
@ -392,10 +305,12 @@ class TwitterIE(InfoExtractor):
|
|||||||
'id': '852138619213144067',
|
'id': '852138619213144067',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
|
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
|
||||||
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
|
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
|
||||||
'uploader': 'عالم الأخبار',
|
'uploader': 'عالم الأخبار',
|
||||||
'uploader_id': 'news_al3alm',
|
'uploader_id': 'news_al3alm',
|
||||||
'duration': 277.4,
|
'duration': 277.4,
|
||||||
|
'timestamp': 1492000653,
|
||||||
|
'upload_date': '20170412',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
@ -404,10 +319,12 @@ class TwitterIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
|
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
|
||||||
'uploader': 'Préfet de Guadeloupe',
|
'uploader': 'Préfet de Guadeloupe',
|
||||||
'uploader_id': 'Prefet971',
|
'uploader_id': 'Prefet971',
|
||||||
'duration': 47.48,
|
'duration': 47.48,
|
||||||
|
'timestamp': 1505803395,
|
||||||
|
'upload_date': '20170919',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -420,10 +337,12 @@ class TwitterIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:.*?Shep is on a roll today.*?',
|
'title': 're:.*?Shep is on a roll today.*?',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
|
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
|
||||||
'uploader': 'Lis Power',
|
'uploader': 'Lis Power',
|
||||||
'uploader_id': 'LisPower1',
|
'uploader_id': 'LisPower1',
|
||||||
'duration': 111.278,
|
'duration': 111.278,
|
||||||
|
'timestamp': 1527623489,
|
||||||
|
'upload_date': '20180529',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -435,89 +354,164 @@ class TwitterIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
|
'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'description': 'md5:66d493500c013e3e2d434195746a7f78',
|
'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
|
||||||
'uploader': 'Twitter',
|
'uploader': 'Twitter',
|
||||||
'uploader_id': 'Twitter',
|
'uploader_id': 'Twitter',
|
||||||
'duration': 61.567,
|
'duration': 61.567,
|
||||||
|
'timestamp': 1548184644,
|
||||||
|
'upload_date': '20190122',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# not available in Periscope
|
||||||
|
'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1vOGwqejwoWxB',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
|
||||||
|
'uploader': 'Vivi',
|
||||||
|
'uploader_id': '1eVjYOLGkGrQL',
|
||||||
|
},
|
||||||
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
}, {
|
||||||
|
# Twitch Clip Embed
|
||||||
|
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
twid = self._match_id(url)
|
||||||
twid = mobj.group('id')
|
status = self._call_api(
|
||||||
|
'statuses/show/%s.json' % twid, twid, {
|
||||||
webpage, urlh = self._download_webpage_handle(
|
'cards_platform': 'Web-12',
|
||||||
self._TEMPLATE_STATUSES_URL % twid, twid)
|
'include_cards': 1,
|
||||||
|
'include_reply_count': 1,
|
||||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
'include_user_entities': 0,
|
||||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
'tweet_mode': 'extended',
|
||||||
|
})
|
||||||
user_id = None
|
|
||||||
|
|
||||||
redirect_mobj = re.match(self._VALID_URL, urlh.geturl())
|
|
||||||
if redirect_mobj:
|
|
||||||
user_id = redirect_mobj.group('user_id')
|
|
||||||
|
|
||||||
if not user_id:
|
|
||||||
user_id = mobj.group('user_id')
|
|
||||||
|
|
||||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
|
||||||
|
|
||||||
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
|
||||||
|
|
||||||
|
title = description = status['full_text'].replace('\n', ' ')
|
||||||
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
||||||
title = re.sub(r'\s+(https?://[^ ]+)', '', title)
|
title = re.sub(r'\s+(https?://[^ ]+)', '', title)
|
||||||
|
user = status.get('user') or {}
|
||||||
|
uploader = user.get('name')
|
||||||
|
if uploader:
|
||||||
|
title = '%s - %s' % (uploader, title)
|
||||||
|
uploader_id = user.get('screen_name')
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
|
||||||
|
hashtag_text = hashtag.get('text')
|
||||||
|
if not hashtag_text:
|
||||||
|
continue
|
||||||
|
tags.append(hashtag_text)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'uploader_id': user_id,
|
'id': twid,
|
||||||
'uploader': username,
|
'title': title,
|
||||||
'webpage_url': url,
|
'description': description,
|
||||||
'description': '%s on Twitter: "%s"' % (username, description),
|
'uploader': uploader,
|
||||||
'title': username + ' - ' + title,
|
'timestamp': unified_timestamp(status.get('created_at')),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None,
|
||||||
|
'like_count': int_or_none(status.get('favorite_count')),
|
||||||
|
'repost_count': int_or_none(status.get('retweet_count')),
|
||||||
|
'comment_count': int_or_none(status.get('reply_count')),
|
||||||
|
'age_limit': 18 if status.get('possibly_sensitive') else 0,
|
||||||
|
'tags': tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
mobj = re.search(r'''(?x)
|
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
||||||
<video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
|
if media and media.get('type') != 'photo':
|
||||||
<source[^>]+video-src="(?P<url>[^"]+)"
|
video_info = media.get('video_info') or {}
|
||||||
''', webpage)
|
|
||||||
|
|
||||||
if mobj:
|
formats = []
|
||||||
more_info = mobj.group('more_info')
|
for variant in video_info.get('variants', []):
|
||||||
height = int_or_none(self._search_regex(
|
formats.extend(self._extract_variant_formats(variant, twid))
|
||||||
r'data-height="(\d+)"', more_info, 'height', fatal=False))
|
self._sort_formats(formats)
|
||||||
width = int_or_none(self._search_regex(
|
|
||||||
r'data-width="(\d+)"', more_info, 'width', fatal=False))
|
thumbnails = []
|
||||||
thumbnail = self._search_regex(
|
media_url = media.get('media_url_https') or media.get('media_url')
|
||||||
r'poster="([^"]+)"', more_info, 'poster', fatal=False)
|
if media_url:
|
||||||
info.update({
|
def add_thumbnail(name, size):
|
||||||
'id': twid,
|
thumbnails.append({
|
||||||
'url': mobj.group('url'),
|
'id': name,
|
||||||
'height': height,
|
'url': update_url_query(media_url, {'name': name}),
|
||||||
'width': width,
|
'width': int_or_none(size.get('w') or size.get('width')),
|
||||||
'thumbnail': thumbnail,
|
'height': int_or_none(size.get('h') or size.get('height')),
|
||||||
})
|
})
|
||||||
return info
|
for name, size in media.get('sizes', {}).items():
|
||||||
|
add_thumbnail(name, size)
|
||||||
|
add_thumbnail('orig', media.get('original_info') or {})
|
||||||
|
|
||||||
twitter_card_url = None
|
info.update({
|
||||||
if 'class="PlayableMedia' in webpage:
|
'formats': formats,
|
||||||
twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid)
|
'thumbnails': thumbnails,
|
||||||
|
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
twitter_card_iframe_url = self._search_regex(
|
card = status.get('card')
|
||||||
r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1',
|
if card:
|
||||||
webpage, 'Twitter card iframe URL', default=None, group='url')
|
binding_values = card['binding_values']
|
||||||
if twitter_card_iframe_url:
|
|
||||||
twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url)
|
def get_binding_value(k):
|
||||||
|
o = binding_values.get(k) or {}
|
||||||
|
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
||||||
|
|
||||||
|
card_name = card['name'].split(':')[-1]
|
||||||
|
if card_name == 'amplify':
|
||||||
|
formats = self._extract_formats_from_vmap_url(
|
||||||
|
get_binding_value('amplify_url_vmap'),
|
||||||
|
get_binding_value('amplify_content_id') or twid)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for suffix in ('_small', '', '_large', '_x_large', '_original'):
|
||||||
|
image = get_binding_value('player_image' + suffix) or {}
|
||||||
|
image_url = image.get('url')
|
||||||
|
if not image_url or '/player-placeholder' in image_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'id': suffix[1:] if suffix else 'medium',
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
if twitter_card_url:
|
|
||||||
info.update({
|
info.update({
|
||||||
'_type': 'url_transparent',
|
'formats': formats,
|
||||||
'ie_key': 'TwitterCard',
|
'thumbnails': thumbnails,
|
||||||
'url': twitter_card_url,
|
'duration': int_or_none(get_binding_value(
|
||||||
|
'content_duration_seconds')),
|
||||||
|
})
|
||||||
|
elif card_name == 'player':
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': get_binding_value('player_url'),
|
||||||
|
})
|
||||||
|
elif card_name == 'periscope_broadcast':
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': get_binding_value('url') or get_binding_value('player_url'),
|
||||||
|
'ie_key': PeriscopeIE.ie_key(),
|
||||||
|
})
|
||||||
|
elif card_name == 'broadcast':
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': get_binding_value('broadcast_url'),
|
||||||
|
'ie_key': TwitterBroadcastIE.ie_key(),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unsupported Twitter Card.')
|
||||||
|
else:
|
||||||
|
expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
|
||||||
|
if not expanded_url:
|
||||||
|
raise ExtractorError("There's no video in this tweet.")
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': expanded_url,
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
|
||||||
raise ExtractorError('There\'s no video in this tweet.')
|
|
||||||
|
|
||||||
|
|
||||||
class TwitterAmplifyIE(TwitterBaseIE):
|
class TwitterAmplifyIE(TwitterBaseIE):
|
||||||
IE_NAME = 'twitter:amplify'
|
IE_NAME = 'twitter:amplify'
|
||||||
@ -573,3 +567,27 @@ class TwitterAmplifyIE(TwitterBaseIE):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||||
|
IE_NAME = 'twitter:broadcast'
|
||||||
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
broadcast_id = self._match_id(url)
|
||||||
|
broadcast = self._call_api(
|
||||||
|
'broadcasts/show.json', broadcast_id,
|
||||||
|
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||||
|
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||||
|
media_key = broadcast['media_key']
|
||||||
|
source = self._call_api(
|
||||||
|
'live_video_stream/status/' + media_key, media_key)['source']
|
||||||
|
m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
|
||||||
|
if '/live_video_stream/geoblocked/' in m3u8_url:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||||
|
m3u8_url).query).get('type', [None])[0]
|
||||||
|
state, width, height = self._extract_common_format_info(broadcast)
|
||||||
|
info['formats'] = self._extract_pscp_m3u8_formats(
|
||||||
|
m3u8_url, broadcast_id, m3u8_id, state, width, height)
|
||||||
|
return info
|
||||||
|
@ -634,14 +634,15 @@ class VKWallPostIE(VKBaseIE):
|
|||||||
if not a.url:
|
if not a.url:
|
||||||
continue
|
continue
|
||||||
title = unescapeHTML(a.title)
|
title = unescapeHTML(a.title)
|
||||||
|
performer = unescapeHTML(a.performer)
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_%s' % (a.owner_id, a.id),
|
'id': '%s_%s' % (a.owner_id, a.id),
|
||||||
'url': self._unmask_url(a.url, a.ads['vk_id']),
|
'url': self._unmask_url(a.url, a.ads['vk_id']),
|
||||||
'title': '%s - %s' % (a.performer, title) if a.performer else title,
|
'title': '%s - %s' % (performer, title) if performer else title,
|
||||||
'thumbnail': a.cover_url.split(',') if a.cover_url else None,
|
'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
|
||||||
'duration': a.duration,
|
'duration': int_or_none(a.duration),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'artist': a.performer,
|
'artist': performer,
|
||||||
'track': title,
|
'track': title,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2019.10.29'
|
__version__ = '2019.11.05'
|
||||||
|
Loading…
Reference in New Issue
Block a user