1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-12-25 15:57:55 +01:00

Merge pull request #9 from ytdl-org/master

update
This commit is contained in:
tsia 2019-11-19 18:46:56 +01:00 committed by GitHub
commit 9599ce0bc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 1612 additions and 2201 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've verified that I'm running youtube-dl version **2019.11.05**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.10.29 [debug] youtube-dl version 2019.11.05
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've verified that I'm running youtube-dl version **2019.11.05**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've verified that I'm running youtube-dl version **2019.11.05**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've verified that I'm running youtube-dl version **2019.11.05**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.10.29 [debug] youtube-dl version 2019.11.05
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.10.29** - [ ] I've verified that I'm running youtube-dl version **2019.11.05**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -21,6 +21,12 @@ matrix:
- python: 3.7 - python: 3.7
dist: xenial dist: xenial
env: YTDL_TEST_SET=download env: YTDL_TEST_SET=download
- python: 3.8
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.8
dist: xenial
env: YTDL_TEST_SET=download
- python: 3.8-dev - python: 3.8-dev
dist: xenial dist: xenial
env: YTDL_TEST_SET=core env: YTDL_TEST_SET=core

View File

@ -1,3 +1,47 @@
version 2019.11.05
Extractors
+ [scte] Add support for learning.scte.org (#22975)
+ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
* [jamendo] Improve extraction
* Fix album extraction (#18564)
* Improve metadata extraction (#18565, #21379)
* [mediaset] Relax URL guid matching (#18352)
+ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
* [telegraaf] Fix extraction
+ [bellmedia] Add support for marilyn.ca videos (#22193)
* [stv] Fix extraction (#22928)
- [iconosquare] Remove extractor
- [keek] Remove extractor
- [gameone] Remove extractor (#21778)
- [flipagram] Remove extractor
- [bambuser] Remove extractor
* [wistia] Reduce embed extraction false positives
+ [wistia] Add support for inline embeds (#22931)
- [go90] Remove extractor
* [kakao] Remove raw request
+ [kakao] Extract format total bitrate
* [daum] Fix VOD and Clip extracton (#15015)
* [kakao] Improve extraction
+ Add support for embed URLs
+ Add support for Kakao Legacy vid based embed URLs
* Only extract fields used for extraction
* Strip description and extract tags
* [mixcloud] Fix cloudcast data extraction (#22821)
* [yahoo] Improve extraction
+ Add support for live streams (#3597, #3779, #22178)
* Bypass cookie consent page for european domains (#16948, #22576)
+ Add generic support for embeds (#20332)
* [tv2] Fix and improve extraction (#22787)
+ [tv2dk] Add support for TV2 DK sites
* [onet] Improve extraction …
+ Add support for onet100.vod.pl
+ Extract m3u8 formats
* Correct audio only format info
* [fox9] Fix extraction
version 2019.10.29 version 2019.10.29
Core Core

View File

@ -752,8 +752,8 @@ As a last resort, you can also uninstall the version installed by your package m
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html): Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
``` ```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+x /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl
hash -r hash -r
``` ```

View File

@ -76,8 +76,6 @@
- **awaan:video** - **awaan:video**
- **AZMedien**: AZ Medien videos - **AZMedien**: AZ Medien videos
- **BaiduVideo**: 百度视频 - **BaiduVideo**: 百度视频
- **bambuser**
- **bambuser:channel**
- **Bandcamp** - **Bandcamp**
- **Bandcamp:album** - **Bandcamp:album**
- **Bandcamp:weekly** - **Bandcamp:weekly**
@ -284,12 +282,12 @@
- **FiveThirtyEight** - **FiveThirtyEight**
- **FiveTV** - **FiveTV**
- **Flickr** - **Flickr**
- **Flipagram**
- **Folketinget**: Folketinget (ft.dk; Danish parliament) - **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom** - **FootyRoom**
- **Formula1** - **Formula1**
- **FOX** - **FOX**
- **FOX9** - **FOX9**
- **FOX9News**
- **Foxgay** - **Foxgay**
- **foxnews**: Fox News and Fox Business Video - **foxnews**: Fox News and Fox Business Video
- **foxnews:article** - **foxnews:article**
@ -315,8 +313,6 @@
- **FXNetworks** - **FXNetworks**
- **Gaia** - **Gaia**
- **GameInformer** - **GameInformer**
- **GameOne**
- **gameone:playlist**
- **GameSpot** - **GameSpot**
- **GameStar** - **GameStar**
- **Gaskrank** - **Gaskrank**
@ -331,7 +327,6 @@
- **Globo** - **Globo**
- **GloboArticle** - **GloboArticle**
- **Go** - **Go**
- **Go90**
- **GodTube** - **GodTube**
- **Golem** - **Golem**
- **GoogleDrive** - **GoogleDrive**
@ -366,7 +361,6 @@
- **Hungama** - **Hungama**
- **HungamaSong** - **HungamaSong**
- **Hypem** - **Hypem**
- **Iconosquare**
- **ign.com** - **ign.com**
- **imdb**: Internet Movie Database trailers - **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists - **imdb:list**: Internet Movie Database lists
@ -406,7 +400,6 @@
- **Kankan** - **Kankan**
- **Karaoketv** - **Karaoketv**
- **KarriereVideos** - **KarriereVideos**
- **keek**
- **KeezMovies** - **KeezMovies**
- **Ketnet** - **Ketnet**
- **KhanAcademy** - **KhanAcademy**
@ -777,6 +770,8 @@
- **Screencast** - **Screencast**
- **ScreencastOMatic** - **ScreencastOMatic**
- **scrippsnetworks:watch** - **scrippsnetworks:watch**
- **SCTE**
- **SCTECourse**
- **Seeker** - **Seeker**
- **SenateISVP** - **SenateISVP**
- **SendtoNews** - **SendtoNews**
@ -926,6 +921,7 @@
- **TV2** - **TV2**
- **tv2.hu** - **tv2.hu**
- **TV2Article** - **TV2Article**
- **TV2DK**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+ - **TV5MondePlus**: TV5MONDE+
- **TVA** - **TVA**

View File

@ -1,95 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
qualities,
)
class AddAnimeIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
_TESTS = [{
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0',
'info_dict': {
'id': '24MR3YO5SAS9',
'ext': 'mp4',
'description': 'One Piece 606',
'title': 'One Piece 606',
},
'skip': 'Video is gone',
}, {
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
try:
webpage = self._download_webpage(url, video_id)
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) or \
ee.cause.code != 503:
raise
redir_webpage = ee.cause.read().decode('utf-8')
action = self._search_regex(
r'<form id="challenge-form" action="([^"]+)"',
redir_webpage, 'Redirect form')
vc = self._search_regex(
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
redir_webpage, 'redirect vc value')
av = re.search(
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
redir_webpage)
if av is None:
raise ExtractorError('Cannot find redirect math task')
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
parsed_url = compat_urllib_parse_urlparse(url)
av_val = av_res + len(parsed_url.netloc)
confirm_url = (
parsed_url.scheme + '://' + parsed_url.netloc
+ action + '?'
+ compat_urllib_parse_urlencode({
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
self._download_webpage(
confirm_url, video_id,
note='Confirming after redirect')
webpage = self._download_webpage(url, video_id)
FORMATS = ('normal', 'hq')
quality = qualities(FORMATS)
formats = []
for format_id in FORMATS:
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, 'video file URLx',
fatal=False)
if not video_url:
continue
formats.append({
'format_id': format_id,
'url': video_url,
'quality': quality(format_id),
})
self._sort_formats(formats)
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
return {
'_type': 'video',
'id': video_id,
'formats': formats,
'title': video_title,
'description': video_description
}

View File

@ -1,142 +0,0 @@
from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
sanitized_Request,
urlencode_postdata,
)
class BambuserIE(InfoExtractor):
IE_NAME = 'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa'
_LOGIN_URL = 'https://bambuser.com/user'
_NETRC_MACHINE = 'bambuser'
_TEST = {
'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': {
'id': '4050584',
'ext': 'flv',
'title': 'Education engineering days - lightning talks',
'duration': 3741,
'uploader': 'pixelversity',
'uploader_id': '344706',
'timestamp': 1382976692,
'upload_date': '20131028',
'view_count': int,
},
'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
'skip_download': True,
},
}
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_form = {
'form_id': 'user_login',
'op': 'Log in',
'name': username,
'pass': password,
}
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in')
login_error = self._html_search_regex(
r'(?s)<div class="messages error">(.+?)</div>',
response, 'login error', default=None)
if login_error:
raise ExtractorError(
'Unable to login: %s' % login_error, expected=True)
def _real_initialize(self):
self._login()
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
% (self._API_KEY, video_id), video_id)
error = info.get('error')
if error:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
result = info['result']
return {
'id': video_id,
'title': result['title'],
'url': result['url'],
'thumbnail': result.get('preview'),
'duration': int_or_none(result.get('length')),
'uploader': result.get('username'),
'uploader_id': compat_str(result.get('owner', {}).get('uid')),
'timestamp': int_or_none(result.get('created')),
'fps': float_or_none(result.get('framerate')),
'view_count': int_or_none(result.get('views_total')),
'comment_count': int_or_none(result.get('comment_count')),
}
class BambuserChannelIE(InfoExtractor):
IE_NAME = 'bambuser:channel'
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
_TEST = {
'url': 'http://bambuser.com/channel/pixelversity',
'info_dict': {
'title': 'pixelversity',
},
'playlist_mincount': 60,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
urls = []
last_id = ''
for i in itertools.count(1):
req_url = (
'http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = sanitized_Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
data = self._download_json(
req, user, 'Downloading page %d' % i)
results = data['result']
if not results:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
return {
'_type': 'playlist',
'title': user,
'entries': urls,
}

View File

@ -22,7 +22,8 @@ class BellMediaIE(InfoExtractor):
bravo| bravo|
mtv| mtv|
space| space|
etalk etalk|
marilyn
)\.ca| )\.ca|
much\.com much\.com
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
@ -70,6 +71,7 @@ class BellMediaIE(InfoExtractor):
'animalplanet': 'aniplan', 'animalplanet': 'aniplan',
'etalk': 'ctv', 'etalk': 'ctv',
'bnnbloomberg': 'bnn', 'bnnbloomberg': 'bnn',
'marilyn': 'ctv_marilyn',
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,74 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class ComCarCoffIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
_TESTS = [{
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
'info_dict': {
'id': '2494164',
'ext': 'mp4',
'upload_date': '20141127',
'timestamp': 1417107600,
'duration': 1232,
'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
},
'params': {
'skip_download': 'requires ffmpeg',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
if not display_id:
display_id = 'comediansincarsgettingcoffee.com'
webpage = self._download_webpage(url, display_id)
full_data = self._parse_json(
self._search_regex(
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
display_id)['videoData']
display_id = full_data['activeVideo']['video']
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
video_id = compat_str(video_data['mediaId'])
title = video_data['title']
formats = self._extract_m3u8_formats(
video_data['mediaUrl'], video_id, 'mp4')
self._sort_formats(formats)
thumbnails = [{
'url': video_data['images']['thumb'],
}, {
'url': video_data['images']['poster'],
}]
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
video_data.get('pubDate'))
duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
video_data.get('duration'))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'timestamp': timestamp,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
}

View File

@ -1455,14 +1455,14 @@ class InfoExtractor(object):
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(), transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None): fatal=True, m3u8_id=None, data=None, headers={}, query={}):
manifest = self._download_xml( manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest', manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest', 'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244) # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
transform_source=transform_source, transform_source=transform_source,
fatal=fatal) fatal=fatal, data=data, headers=headers, query=query)
if manifest is False: if manifest is False:
return [] return []
@ -1586,12 +1586,13 @@ class InfoExtractor(object):
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None, entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None, m3u8_id=None, note=None, errnote=None,
fatal=True, live=False): fatal=True, live=False, data=None, headers={},
query={}):
res = self._download_webpage_handle( res = self._download_webpage_handle(
m3u8_url, video_id, m3u8_url, video_id,
note=note or 'Downloading m3u8 information', note=note or 'Downloading m3u8 information',
errnote=errnote or 'Failed to download m3u8 information', errnote=errnote or 'Failed to download m3u8 information',
fatal=fatal) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [] return []
@ -2009,12 +2010,12 @@ class InfoExtractor(object):
}) })
return entries return entries
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
res = self._download_xml_handle( res = self._download_xml_handle(
mpd_url, video_id, mpd_url, video_id,
note=note or 'Downloading MPD manifest', note=note or 'Downloading MPD manifest',
errnote=errnote or 'Failed to download MPD manifest', errnote=errnote or 'Failed to download MPD manifest',
fatal=fatal) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [] return []
mpd_doc, urlh = res mpd_doc, urlh = res
@ -2317,12 +2318,12 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle( res = self._download_xml_handle(
ism_url, video_id, ism_url, video_id,
note=note or 'Downloading ISM manifest', note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest', errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [] return []
ism_doc, urlh = res ism_doc, urlh = res
@ -2689,7 +2690,7 @@ class InfoExtractor(object):
entry = { entry = {
'id': this_video_id, 'id': this_video_id,
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'), 'description': clean_html(video_data.get('description')),
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
'timestamp': int_or_none(video_data.get('pubdate')), 'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),

View File

@ -1,154 +0,0 @@
from __future__ import unicode_literals
import base64
import json
import random
import re
from .common import InfoExtractor
from ..aes import (
aes_cbc_decrypt,
aes_cbc_encrypt,
)
from ..compat import compat_b64decode
from ..utils import (
bytes_to_intlist,
bytes_to_long,
extract_attributes,
ExtractorError,
intlist_to_bytes,
js_to_json,
int_or_none,
long_to_bytes,
pkcs1pad,
)
class DaisukiMottoIE(InfoExtractor):
_VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
_TEST = {
'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
'info_dict': {
'id': 'V2e',
'ext': 'mp4',
'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
'subtitles': {
'mul': [{
'ext': 'ttml',
}],
},
},
'params': {
'skip_download': True, # AES-encrypted HLS stream
},
}
# The public key in PEM format can be found in clientlibs_anime_watch.min.js
_RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
flashvars = self._parse_json(self._search_regex(
r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
video_id, transform_source=js_to_json)
iv = [0] * 16
data = {}
for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
data[key] = flashvars.get(key, '')
encrypted_rtn = None
# Some AES keys are rejected. Try it with different AES keys
for idx in range(5):
aes_key = [random.randint(0, 254) for _ in range(32)]
padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
n, e = self._RSA_KEY
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
init_data = self._download_json(
'http://motto.daisuki.net/fastAPI/bgn/init/',
video_id, query={
's': flashvars.get('s', ''),
'c': flashvars.get('ss3_prm', ''),
'e': url,
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
bytes_to_intlist(json.dumps(data)),
aes_key, iv))).decode('ascii'),
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
if 'rtn' in init_data:
encrypted_rtn = init_data['rtn']
break
self._sleep(5, video_id)
if encrypted_rtn is None:
raise ExtractorError('Failed to fetch init data')
rtn = self._parse_json(
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
compat_b64decode(encrypted_rtn)),
aes_key, iv)).decode('utf-8').rstrip('\0'),
video_id)
title = rtn['title_str']
formats = self._extract_m3u8_formats(
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
subtitles = {}
caption_url = rtn.get('caption_url')
if caption_url:
# mul: multiple languages
subtitles['mul'] = [{
'url': caption_url,
'ext': 'ttml',
}]
return {
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
}
class DaisukiMottoPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
_TEST = {
'url': 'http://motto.daisuki.net/information/',
'info_dict': {
'title': 'DRAGON BALL SUPER',
},
'playlist_mincount': 117,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = []
for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
attr = extract_attributes(li)
ad_id = attr.get('data-ad_id')
product_id = attr.get('data-product_id')
if ad_id and product_id:
episode_id = attr.get('data-chapter')
entries.append({
'_type': 'url_transparent',
'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
'episode_id': episode_id,
'episode_number': int_or_none(episode_id),
'ie_key': 'DaisukiMotto',
})
return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')

View File

@ -146,6 +146,11 @@ class DPlayIE(InfoExtractor):
video = self._download_json( video = self._download_json(
disco_base + 'content/videos/' + display_id, display_id, disco_base + 'content/videos/' + display_id, display_id,
headers=headers, query={ headers=headers, query={
'fields[channel]': 'name',
'fields[image]': 'height,src,width',
'fields[show]': 'name',
'fields[tag]': 'name',
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
'include': 'images,primaryChannel,show,tags' 'include': 'images,primaryChannel,show,tags'
}) })
video_id = video['data']['id'] video_id = video['data']['id']
@ -226,7 +231,6 @@ class DPlayIE(InfoExtractor):
'series': series, 'series': series,
'season_number': int_or_none(info.get('seasonNumber')), 'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')), 'episode_number': int_or_none(info.get('episodeNumber')),
'age_limit': int_or_none(info.get('minimum_age')),
'creator': creator, 'creator': creator,
'tags': tags, 'tags': tags,
'thumbnails': thumbnails, 'thumbnails': thumbnails,

View File

@ -17,6 +17,7 @@ from ..utils import (
float_or_none, float_or_none,
mimetype2ext, mimetype2ext,
str_or_none, str_or_none,
try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
@ -24,7 +25,14 @@ from ..utils import (
class DRTVIE(InfoExtractor): class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' _VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
)
(?P<id>[\da-z_-]+)
'''
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['DK'] _GEO_COUNTRIES = ['DK']
IE_NAME = 'drtv' IE_NAME = 'drtv'
@ -83,6 +91,26 @@ class DRTVIE(InfoExtractor):
}, { }, {
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
'info_dict': {
'id': '00951930010',
'ext': 'mp4',
'title': 'Bonderøven (1:8)',
'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
'timestamp': 1546542000,
'upload_date': '20190103',
'duration': 2576.6,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
'only_matching': True,
}, {
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -100,13 +128,32 @@ class DRTVIE(InfoExtractor):
webpage, 'video id', default=None) webpage, 'video id', default=None)
if not video_id: if not video_id:
video_id = compat_urllib_parse_unquote(self._search_regex( video_id = self._search_regex(
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
webpage, 'urn')) webpage, 'urn', default=None)
if video_id:
video_id = compat_urllib_parse_unquote(video_id)
_PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
query = {'expanded': 'true'}
if video_id:
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
else:
programcard_url = _PROGRAMCARD_BASE
page = self._parse_json(
self._search_regex(
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
'data'), '1')['cache']['page']
page = page[list(page.keys())[0]]
item = try_get(
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
dict)
video_id = item['customId'].split(':')[-1]
query['productionnumber'] = video_id
data = self._download_json( data = self._download_json(
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id, programcard_url, video_id, 'Downloading video JSON', query=query)
video_id, 'Downloading video JSON', query={'expanded': 'true'})
title = str_or_none(data.get('Title')) or re.sub( title = str_or_none(data.get('Title')) or re.sub(
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',

View File

@ -18,7 +18,6 @@ from .acast import (
ACastIE, ACastIE,
ACastChannelIE, ACastChannelIE,
) )
from .addanime import AddAnimeIE
from .adn import ADNIE from .adn import ADNIE
from .adobeconnect import AdobeConnectIE from .adobeconnect import AdobeConnectIE
from .adobetv import ( from .adobetv import (
@ -80,7 +79,6 @@ from .awaan import (
) )
from .azmedien import AZMedienIE from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import ( from .bbc import (
BBCCoUkIE, BBCCoUkIE,
@ -224,7 +222,6 @@ from .comedycentral import (
ComedyCentralTVIE, ComedyCentralTVIE,
ToshIE, ToshIE,
) )
from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import ( from .commonprotocols import (
MmsIE, MmsIE,
@ -255,10 +252,6 @@ from .dailymotion import (
DailymotionPlaylistIE, DailymotionPlaylistIE,
DailymotionUserIE, DailymotionUserIE,
) )
from .daisuki import (
DaisukiMottoIE,
DaisukiMottoPlaylistIE,
)
from .daum import ( from .daum import (
DaumIE, DaumIE,
DaumClipIE, DaumClipIE,
@ -360,7 +353,6 @@ from .firsttv import FirstTVIE
from .fivemin import FiveMinIE from .fivemin import FiveMinIE
from .fivetv import FiveTVIE from .fivetv import FiveTVIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .flipagram import FlipagramIE
from .folketinget import FolketingetIE from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE from .footyroom import FootyRoomIE
from .formula1 import Formula1IE from .formula1 import Formula1IE
@ -407,10 +399,6 @@ from .fusion import FusionIE
from .fxnetworks import FXNetworksIE from .fxnetworks import FXNetworksIE
from .gaia import GaiaIE from .gaia import GaiaIE
from .gameinformer import GameInformerIE from .gameinformer import GameInformerIE
from .gameone import (
GameOneIE,
GameOnePlaylistIE,
)
from .gamespot import GameSpotIE from .gamespot import GameSpotIE
from .gamestar import GameStarIE from .gamestar import GameStarIE
from .gaskrank import GaskrankIE from .gaskrank import GaskrankIE
@ -465,7 +453,6 @@ from .hungama import (
HungamaSongIE, HungamaSongIE,
) )
from .hypem import HypemIE from .hypem import HypemIE
from .iconosquare import IconosquareIE
from .ign import ( from .ign import (
IGNIE, IGNIE,
OneUPIE, OneUPIE,
@ -524,8 +511,8 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
@ -640,7 +627,6 @@ from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE, MicrosoftVirtualAcademyCourseIE,
) )
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE from .minoto import MinotoIE
from .miomio import MioMioIE from .miomio import MioMioIE
@ -650,7 +636,6 @@ from .mixcloud import (
MixcloudIE, MixcloudIE,
MixcloudUserIE, MixcloudUserIE,
MixcloudPlaylistIE, MixcloudPlaylistIE,
MixcloudStreamIE,
) )
from .mlb import MLBIE from .mlb import MLBIE
from .mnet import MnetIE from .mnet import MnetIE
@ -944,10 +929,6 @@ from .rentv import (
from .restudy import RestudyIE from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .revision3 import (
Revision3EmbedIE,
Revision3IE,
)
from .rice import RICEIE from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE from .rmcdecouverte import RMCDecouverteIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
@ -992,6 +973,10 @@ from .sbs import SBSIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import ScrippsNetworksWatchIE from .scrippsnetworks import ScrippsNetworksWatchIE
from .scte import (
SCTEIE,
SCTECourseIE,
)
from .seeker import SeekerIE from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
@ -1249,6 +1234,7 @@ from .twitter import (
TwitterCardIE, TwitterCardIE,
TwitterIE, TwitterIE,
TwitterAmplifyIE, TwitterAmplifyIE,
TwitterBroadcastIE,
) )
from .udemy import ( from .udemy import (
UdemyIE, UdemyIE,

View File

@ -334,7 +334,7 @@ class FacebookIE(InfoExtractor):
if not video_data: if not video_data:
server_js_data = self._parse_json( server_js_data = self._parse_json(
self._search_regex( self._search_regex(
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
webpage, 'js data', default='{}'), webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False) video_id, transform_source=js_to_json, fatal=False)
video_data = extract_from_jsmods_instances(server_js_data) video_data = extract_from_jsmods_instances(server_js_data)

View File

@ -1,115 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
float_or_none,
try_get,
unified_timestamp,
)
class FlipagramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://flipagram.com/f/nyvTSJMKId',
'md5': '888dcf08b7ea671381f00fab74692755',
'info_dict': {
'id': 'nyvTSJMKId',
'ext': 'mp4',
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
'duration': 35.571,
'timestamp': 1461244995,
'upload_date': '20160421',
'uploader': 'kitty juria',
'uploader_id': 'sjuria101',
'creator': 'kitty juria',
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
'comments': list,
'formats': 'mincount:2',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(
self._search_regex(
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
video_id)
flipagram = video_data['flipagram']
video = flipagram['video']
json_ld = self._search_json_ld(webpage, video_id, default={})
title = json_ld.get('title') or flipagram['captionText']
description = json_ld.get('description') or flipagram.get('captionText')
formats = [{
'url': video['url'],
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'filesize': int_or_none(video_data.get('size')),
}]
preview_url = try_get(
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
if preview_url:
formats.append({
'url': preview_url,
'ext': 'm4a',
'vcodec': 'none',
})
self._sort_formats(formats)
counts = flipagram.get('counts', {})
user = flipagram.get('user', {})
video_data = flipagram.get('video', {})
thumbnails = [{
'url': self._proto_relative_url(cover['url']),
'width': int_or_none(cover.get('width')),
'height': int_or_none(cover.get('height')),
'filesize': int_or_none(cover.get('size')),
} for cover in flipagram.get('covers', []) if cover.get('url')]
# Note that this only retrieves comments that are initially loaded.
# For videos with large amounts of comments, most won't be retrieved.
comments = []
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
text = comment.get('comment')
if not text or not isinstance(text, list):
continue
comments.append({
'author': comment.get('user', {}).get('name'),
'author_id': comment.get('user', {}).get('username'),
'id': comment.get('id'),
'text': text[0],
'timestamp': unified_timestamp(comment.get('created')),
})
return {
'id': video_id,
'title': title,
'description': description,
'duration': float_or_none(flipagram.get('duration'), 1000),
'thumbnails': thumbnails,
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
'uploader': user.get('name'),
'uploader_id': user.get('username'),
'creator': user.get('name'),
'view_count': int_or_none(counts.get('plays')),
'like_count': int_or_none(counts.get('likes')),
'repost_count': int_or_none(counts.get('reflips')),
'comment_count': int_or_none(counts.get('comments')),
'comments': comments,
'formats': formats,
}

View File

@ -1,134 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_with_ns,
parse_iso8601,
float_or_none,
int_or_none,
)
NAMESPACE_MAP = {
'media': 'http://search.yahoo.com/mrss/',
}
# URL prefix to download the mp4 files directly instead of streaming via rtmp
# Credits go to XBox-Maniac
# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
class GameOneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.gameone.de/tv/288',
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
'info_dict': {
'id': '288',
'ext': 'mp4',
'title': 'Game One - Folge 288',
'duration': 1238,
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
'age_limit': 16,
'upload_date': '20140513',
'timestamp': 1399980122,
}
},
{
'url': 'http://gameone.de/tv/220',
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
'info_dict': {
'id': '220',
'ext': 'mp4',
'upload_date': '20120918',
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
'timestamp': 1347971451,
'title': 'Game One - Folge 220',
'duration': 896.62,
'age_limit': 16,
}
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
og_video = self._og_search_video_url(webpage, secure=False)
description = self._html_search_meta('description', webpage)
age_limit = int(
self._search_regex(
r'age=(\d+)',
self._html_search_meta(
'age-de-meta-label',
webpage),
'age_limit',
'0'))
mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
title = mrss.find('.//item/title').text
thumbnail = mrss.find('.//item/image').get('url')
timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
content_url = content.get('url')
content = self._download_xml(
content_url,
video_id,
'Downloading media:content')
rendition_items = content.findall('.//rendition')
duration = float_or_none(rendition_items[0].get('duration'))
formats = [
{
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
'width': int_or_none(r.get('width')),
'height': int_or_none(r.get('height')),
'tbr': int_or_none(r.get('bitrate')),
}
for r in rendition_items
]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'description': description,
'age_limit': age_limit,
'timestamp': timestamp,
}
class GameOnePlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
IE_NAME = 'gameone:playlist'
_TEST = {
'url': 'http://www.gameone.de/tv',
'info_dict': {
'title': 'GameOne',
},
'playlist_mincount': 294,
}
def _real_extract(self, url):
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
entries = [
self.url_result('http://www.gameone.de/tv/%d' %
video_id, 'GameOne')
for video_id in range(max_id, 0, -1)]
return {
'_type': 'playlist',
'title': 'GameOne',
'entries': entries,
}

View File

@ -119,6 +119,7 @@ from .viqeo import ViqeoIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .kinja import KinjaEmbedIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1487,16 +1488,18 @@ class GenericIE(InfoExtractor):
'timestamp': 1432570283, 'timestamp': 1432570283,
}, },
}, },
# OnionStudios embed # Kinja embed
{ {
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': { 'info_dict': {
'id': '2855', 'id': '106351',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dont Understand Bitcoin? This Man Will Mumble An Explanation At You', 'title': 'Dont Understand Bitcoin? This Man Will Mumble An Explanation At You',
'description': 'Migrated from OnionStudios',
'thumbnail': r're:^https?://.*\.jpe?g$', 'thumbnail': r're:^https?://.*\.jpe?g$',
'uploader': 'ClickHole', 'uploader': 'clickhole',
'uploader_id': 'clickhole', 'upload_date': '20150527',
'timestamp': 1432744860,
} }
}, },
# SnagFilms embed # SnagFilms embed
@ -2894,6 +2897,12 @@ class GenericIE(InfoExtractor):
if senate_isvp_url: if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP') return self.url_result(senate_isvp_url, 'SenateISVP')
# Look for Kinja embeds
kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
if kinja_embed_urls:
return self.playlist_from_matches(
kinja_embed_urls, video_id, video_title)
# Look for OnionStudios embeds # Look for OnionStudios embeds
onionstudios_url = OnionStudiosIE._extract_url(webpage) onionstudios_url = OnionStudiosIE._extract_url(webpage)
if onionstudios_url: if onionstudios_url:

View File

@ -118,6 +118,7 @@ class HotStarIE(HotStarBaseIE):
if video_data.get('drmProtected'): if video_data.get('drmProtected'):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
headers = {'Referer': url}
formats = [] formats = []
geo_restricted = False geo_restricted = False
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
@ -137,10 +138,11 @@ class HotStarIE(HotStarBaseIE):
if 'package:hls' in tags or ext == 'm3u8': if 'package:hls' in tags or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')) entry_protocol='m3u8_native',
m3u8_id='hls', headers=headers))
elif 'package:dash' in tags or ext == 'mpd': elif 'package:dash' in tags or ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash')) format_url, video_id, mpd_id='dash', headers=headers))
elif ext == 'f4m': elif ext == 'f4m':
# produce broken files # produce broken files
pass pass
@ -158,6 +160,9 @@ class HotStarIE(HotStarBaseIE):
self.raise_geo_restricted(countries=['IN']) self.raise_geo_restricted(countries=['IN'])
self._sort_formats(formats) self._sort_formats(formats)
for f in formats:
f.setdefault('http_headers', {}).update(headers)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,

View File

@ -1,85 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
get_element_by_id,
remove_end,
)
class IconosquareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
_TEST = {
'url': 'http://statigr.am/p/522207370455279102_24101272',
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
'info_dict': {
'id': '522207370455279102_24101272',
'ext': 'mp4',
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
'timestamp': 1376471991,
'upload_date': '20130814',
'uploader': 'aguynamedpatrick',
'uploader_id': '24101272',
'comment_count': int,
'like_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media = self._parse_json(
get_element_by_id('mediaJson', webpage),
video_id)
formats = [{
'url': f['url'],
'format_id': format_id,
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height'))
} for format_id, f in media['videos'].items()]
self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
description = media.get('caption', {}).get('text')
uploader = media.get('user', {}).get('username')
uploader_id = media.get('user', {}).get('id')
comment_count = int_or_none(media.get('comments', {}).get('count'))
like_count = int_or_none(media.get('likes', {}).get('count'))
thumbnails = [{
'url': t['url'],
'id': thumbnail_id,
'width': int_or_none(t.get('width')),
'height': int_or_none(t.get('height'))
} for thumbnail_id, t in media.get('images', {}).items()]
comments = [{
'id': comment.get('id'),
'text': comment['text'],
'timestamp': int_or_none(comment.get('created_time')),
'author': comment.get('from', {}).get('full_name'),
'author_id': comment.get('from', {}).get('username'),
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnails': thumbnails,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'comment_count': comment_count,
'like_count': like_count,
'formats': formats,
'comments': comments,
}

View File

@ -18,6 +18,8 @@ class IviIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['RU'] _GEO_COUNTRIES = ['RU']
_LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
_LIGHT_URL = 'https://api.ivi.ru/light/'
_TESTS = [ _TESTS = [
# Single movie # Single movie
@ -80,48 +82,77 @@ class IviIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = { data = json.dumps({
'method': 'da.content.get', 'method': 'da.content.get',
'params': [ 'params': [
video_id, { video_id, {
'site': 's183', 'site': 's%d',
'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
'contentid': video_id 'contentid': video_id
} }
] ]
}).encode()
try:
from Crypto.Cipher import Blowfish
from Crypto.Hash import CMAC
timestamp = self._download_json(
self._LIGHT_URL, video_id,
'Downloading timestamp JSON', data=json.dumps({
'method': 'da.timestamp.get',
'params': []
}).encode())['result']
data = data % 353
query = {
'ts': timestamp,
'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(),
} }
except ImportError:
data = data % 183
query = {}
video_json = self._download_json( video_json = self._download_json(
'http://api.digitalaccess.ru/api/json/', video_id, self._LIGHT_URL, video_id,
'Downloading video JSON', data=json.dumps(data)) 'Downloading video JSON', data=data, query=query)
if 'error' in video_json: error = video_json.get('error')
error = video_json['error'] if error:
origin = error['origin'] origin = error.get('origin')
message = error.get('message') or error.get('user_message')
extractor_msg = 'Unable to download video %s'
if origin == 'NotAllowedForLocation': if origin == 'NotAllowedForLocation':
self.raise_geo_restricted( self.raise_geo_restricted(message, self._GEO_COUNTRIES)
msg=error['message'], countries=self._GEO_COUNTRIES)
elif origin == 'NoRedisValidData': elif origin == 'NoRedisValidData':
raise ExtractorError('Video %s does not exist' % video_id, expected=True) extractor_msg = 'Video %s does not exist'
elif message:
if 'недоступен для просмотра на площадке s183' in message:
raise ExtractorError( raise ExtractorError(
'Unable to download video %s: %s' % (video_id, error['message']), 'pycryptodome not found. Please install it.',
expected=True) expected=True)
extractor_msg += ': ' + message
raise ExtractorError(extractor_msg % video_id, expected=True)
result = video_json['result'] result = video_json['result']
title = result['title']
quality = qualities(self._KNOWN_FORMATS) quality = qualities(self._KNOWN_FORMATS)
formats = [{ formats = []
'url': x['url'], for f in result.get('files', []):
'format_id': x.get('content_format'), f_url = f.get('url')
'quality': quality(x.get('content_format')), content_format = f.get('content_format')
} for x in result['files'] if x.get('url')] if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
continue
formats.append({
'url': f_url,
'format_id': content_format,
'quality': quality(content_format),
'filesize': int_or_none(f.get('size_in_bytes')),
})
self._sort_formats(formats) self._sort_formats(formats)
title = result['title']
duration = int_or_none(result.get('duration'))
compilation = result.get('compilation') compilation = result.get('compilation')
episode = title if compilation else None episode = title if compilation else None
@ -158,7 +189,7 @@ class IviIE(InfoExtractor):
'episode_number': episode_number, 'episode_number': episode_number,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'description': description, 'description': description,
'duration': duration, 'duration': int_or_none(result.get('duration')),
'formats': formats, 'formats': formats,
} }

View File

@ -1,38 +1,26 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import hashlib
import random
from ..compat import compat_urlparse from ..compat import compat_str
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_duration from ..utils import (
clean_html,
int_or_none,
try_get,
)
class JamendoBaseIE(InfoExtractor): class JamendoIE(InfoExtractor):
def _extract_meta(self, webpage, fatal=True):
title = self._og_search_title(
webpage, default=None) or self._search_regex(
r'<title>([^<]+)', webpage,
'title', default=None)
if title:
title = self._search_regex(
r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
if not title:
title = self._html_search_meta(
'name', webpage, 'title', fatal=fatal)
mobj = re.search(r'(.+) - (.+)', title or '')
artist, second = mobj.groups() if mobj else [None] * 2
return title, artist, second
class JamendoIE(JamendoBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
licensing\.jamendo\.com/[^/]+| licensing\.jamendo\.com/[^/]+|
(?:www\.)?jamendo\.com (?:www\.)?jamendo\.com
) )
/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
''' '''
_TESTS = [{ _TESTS = [{
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE):
'artist': 'Maya Filipič', 'artist': 'Maya Filipič',
'track': 'Stories from Emona I', 'track': 'Stories from Emona I',
'duration': 210, 'duration': 210,
'thumbnail': r're:^https?://.*\.jpg' 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1217438117,
'upload_date': '20080730',
} }
}, { }, {
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
@ -53,15 +43,20 @@ class JamendoIE(JamendoBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._VALID_URL_RE.match(url) track_id, display_id = self._VALID_URL_RE.match(url).groups()
track_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage( webpage = self._download_webpage(
'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), 'https://www.jamendo.com/track/' + track_id, track_id)
display_id) models = self._parse_json(self._html_search_regex(
r"data-bundled-models='([^']+)",
title, artist, track = self._extract_meta(webpage) webpage, 'bundled models'), track_id)
track = models['track']['models'][0]
title = track_name = track['name']
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
artist = get_model('artist')
artist_name = artist.get('name')
if artist_name:
title = '%s - %s' % (artist_name, title)
album = get_model('album')
formats = [{ formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@ -77,31 +72,58 @@ class JamendoIE(JamendoBaseIE):
))] ))]
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._html_search_meta( urls = []
'image', webpage, 'thumbnail', fatal=False) thumbnails = []
duration = parse_duration(self._search_regex( for _, covers in track.get('cover', {}).items():
r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', for cover_id, cover_url in covers.items():
webpage, 'duration', fatal=False)) if not cover_url or cover_url in urls:
continue
urls.append(cover_url)
size = int_or_none(cover_id.lstrip('size'))
thumbnails.append({
'id': cover_id,
'url': cover_url,
'width': size,
'height': size,
})
tags = []
for tag in track.get('tags', []):
tag_name = tag.get('name')
if not tag_name:
continue
tags.append(tag_name)
stats = track.get('stats') or {}
return { return {
'id': track_id, 'id': track_id,
'display_id': display_id, 'display_id': display_id,
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'title': title, 'title': title,
'duration': duration, 'description': track.get('description'),
'artist': artist, 'duration': int_or_none(track.get('duration')),
'track': track, 'artist': artist_name,
'formats': formats 'track': track_name,
'album': album.get('name'),
'formats': formats,
'license': '-'.join(track.get('licenseCC', [])) or None,
'timestamp': int_or_none(track.get('dateCreated')),
'view_count': int_or_none(stats.get('listenedAll')),
'like_count': int_or_none(stats.get('favorited')),
'average_rating': int_or_none(stats.get('averageNote')),
'tags': tags,
} }
class JamendoAlbumIE(JamendoBaseIE): class JamendoAlbumIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
'info_dict': { 'info_dict': {
'id': '121486', 'id': '121486',
'title': 'Shearer - Duck On Cover' 'title': 'Duck On Cover',
'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
}, },
'playlist': [{ 'playlist': [{
'md5': 'e1a2fcb42bda30dfac990212924149a8', 'md5': 'e1a2fcb42bda30dfac990212924149a8',
@ -111,6 +133,8 @@ class JamendoAlbumIE(JamendoBaseIE):
'title': 'Shearer - Warmachine', 'title': 'Shearer - Warmachine',
'artist': 'Shearer', 'artist': 'Shearer',
'track': 'Warmachine', 'track': 'Warmachine',
'timestamp': 1368089771,
'upload_date': '20130509',
} }
}, { }, {
'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'md5': '1f358d7b2f98edfe90fd55dac0799d50',
@ -120,6 +144,8 @@ class JamendoAlbumIE(JamendoBaseIE):
'title': 'Shearer - Without Your Ghost', 'title': 'Shearer - Without Your Ghost',
'artist': 'Shearer', 'artist': 'Shearer',
'track': 'Without Your Ghost', 'track': 'Without Your Ghost',
'timestamp': 1368089771,
'upload_date': '20130509',
} }
}], }],
'params': { 'params': {
@ -127,24 +153,35 @@ class JamendoAlbumIE(JamendoBaseIE):
} }
} }
def _call_api(self, resource, resource_id):
path = '/api/%ss' % resource
rand = compat_str(random.random())
return self._download_json(
'https://www.jamendo.com' + path, resource_id, query={
'id[]': resource_id,
}, headers={
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
})[0]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._VALID_URL_RE.match(url) album_id = self._match_id(url)
album_id = mobj.group('id') album = self._call_api('album', album_id)
album_name = album.get('name')
webpage = self._download_webpage(url, mobj.group('display_id')) entries = []
for track in album.get('tracks', []):
title, artist, album = self._extract_meta(webpage, fatal=False) track_id = track.get('id')
if not track_id:
entries = [{ continue
track_id = compat_str(track_id)
entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': compat_urlparse.urljoin(url, m.group('path')), 'url': 'https://www.jamendo.com/track/' + track_id,
'ie_key': JamendoIE.ie_key(), 'ie_key': JamendoIE.ie_key(),
'id': self._search_regex( 'id': track_id,
r'/track/(\d+)', m.group('path'), 'track id', default=None), 'album': album_name,
'artist': artist, })
'album': album,
} for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
webpage)]
return self.playlist_result(entries, album_id, title) return self.playlist_result(
entries, album_id, album_name,
clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))

View File

@ -1,39 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class KeekIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
IE_NAME = 'keek'
_TEST = {
'url': 'https://www.keek.com/keek/NODfbab',
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
'info_dict': {
'id': 'NODfbab',
'ext': 'mp4',
'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
'uploader': 'ytdl',
'uploader_id': 'eGT5bab',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'url': self._og_search_video_url(webpage),
'ext': 'mp4',
'title': self._og_search_description(webpage).strip(),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._search_regex(
r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
'uploader', fatal=False, group='uploader'),
'uploader_id': self._search_regex(
r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
'uploader id', fatal=False, group='uploader_id'),
}

View File

@ -0,0 +1,221 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
int_or_none,
parse_iso8601,
strip_or_none,
try_get,
unescapeHTML,
urljoin,
)
class KinjaEmbedIE(InfoExtractor):
IENAME = 'kinja:embed'
_DOMAIN_REGEX = r'''(?:[^.]+\.)?
(?:
avclub|
clickhole|
deadspin|
gizmodo|
jalopnik|
jezebel|
kinja|
kotaku|
lifehacker|
splinternews|
the(?:inventory|onion|root|takeout)
)\.com'''
_COMMON_REGEX = r'''/
(?:
ajax/inset|
embed/video
)/iframe\?.*?\bid='''
_VALID_URL = r'''(?x)https?://%s%s
(?P<type>
fb|
imgur|
instagram|
jwp(?:layer)?-video|
kinjavideo|
mcp|
megaphone|
ooyala|
soundcloud(?:-playlist)?|
tumblr-post|
twitch-stream|
twitter|
ustream-channel|
vimeo|
vine|
youtube-(?:list|video)
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
_TESTS = [{
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
'only_matching': True,
}, {
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
'only_matching': True,
}]
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
_PROVIDER_MAP = {
'fb': ('facebook.com/video.php?v=', 'Facebook'),
'imgur': ('imgur.com/', 'Imgur'),
'instagram': ('instagram.com/p/', 'Instagram'),
'jwplayer-video': _JWPLATFORM_PROVIDER,
'jwp-video': _JWPLATFORM_PROVIDER,
'megaphone': ('player.megaphone.fm/', 'Generic'),
'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
'twitch-stream': ('twitch.tv/', 'TwitchStream'),
'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
'vimeo': ('vimeo.com/', 'Vimeo'),
'vine': ('vine.co/v/', 'Vine'),
'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
'youtube-video': ('youtube.com/embed/', 'Youtube'),
}
@staticmethod
def _extract_urls(webpage, url):
return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
webpage)]
def _real_extract(self, url):
video_type, video_id = re.match(self._VALID_URL, url).groups()
provider = self._PROVIDER_MAP.get(video_type)
if provider:
video_id = compat_urllib_parse_unquote(video_id)
if video_type == 'tumblr-post':
video_id, blog = video_id.split('-', 1)
result_url = provider[0] % (blog, video_id)
elif video_type == 'youtube-list':
video_id, playlist_id = video_id.split('/')
result_url = provider[0] % (video_id, playlist_id)
else:
if video_type == 'ooyala':
video_id = video_id.split('/')[0]
result_url = provider[0] + video_id
return self.url_result('http://' + result_url, provider[1])
if video_type == 'kinjavideo':
data = self._download_json(
'https://kinja.com/api/core/video/views/videoById',
video_id, query={'videoId': video_id})['data']
title = data['title']
formats = []
for k in ('signedPlaylist', 'streaming'):
m3u8_url = data.get(k + 'Url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
thumbnail = None
poster = data.get('poster') or {}
poster_id = poster.get('id')
if poster_id:
thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
return {
'id': video_id,
'title': title,
'description': strip_or_none(data.get('description')),
'formats': formats,
'tags': data.get('tags'),
'timestamp': int_or_none(try_get(
data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
'thumbnail': thumbnail,
'uploader': data.get('network'),
}
else:
video_data = self._download_json(
'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
video_id)['videoMetadata']
iptc = video_data['photoVideoMetadataIPTC']
title = iptc['title']['en']
fmg = video_data.get('photoVideoMetadata_fmg') or {}
tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
data = self._download_json(
tvss_domain + '/api/v3/video-auth/url-signature-tokens',
video_id, query={'mcpids': video_id})['data'][0]
formats = []
rendition_url = data.get('renditionUrl')
if rendition_url:
formats = self._extract_m3u8_formats(
rendition_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
fallback_rendition_url = data.get('fallbackRenditionUrl')
if fallback_rendition_url:
formats.append({
'format_id': 'fallback',
'tbr': int_or_none(self._search_regex(
r'_(\d+)\.mp4', fallback_rendition_url,
'bitrate', default=None)),
'url': fallback_rendition_url,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
'uploader': fmg.get('network'),
'duration': int_or_none(iptc.get('fileDuration')),
'formats': formats,
'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
'timestamp': parse_iso8601(iptc.get('dateReleased')),
}

View File

@ -5,24 +5,27 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
compat_str,
int_or_none, int_or_none,
unified_strdate, parse_iso8601,
) )
class LnkGoIE(InfoExtractor): class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'info_dict': { 'info_dict': {
'id': '46712', 'id': '10809',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Yra kaip yra', 'title': "Put'ka: Trys Klausimai",
'upload_date': '20150107', 'upload_date': '20161216',
'description': 'md5:d82a5e36b775b7048617f263a0e3475e', 'description': 'Seniai matytas Putka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
'age_limit': 7, 'age_limit': 18,
'duration': 3019, 'duration': 117,
'thumbnail': r're:^https?://.*\.jpg$' 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1481904000,
}, },
'params': { 'params': {
'skip_download': True, # HLS download 'skip_download': True, # HLS download
@ -30,20 +33,21 @@ class LnkGoIE(InfoExtractor):
}, { }, {
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2', 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
'info_dict': { 'info_dict': {
'id': '47289', 'id': '10467',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nėrdas: Kompiuterio Valymas', 'title': 'Nėrdas: Kompiuterio Valymas',
'upload_date': '20150113', 'upload_date': '20150113',
'description': 'md5:7352d113a242a808676ff17e69db6a69', 'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18, 'age_limit': 18,
'duration': 346, 'duration': 346,
'thumbnail': r're:^https?://.*\.jpg$' 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421164800,
}, },
'params': { 'params': {
'skip_download': True, # HLS download 'skip_download': True, # HLS download
}, },
}, { }, {
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
'only_matching': True, 'only_matching': True,
}] }]
_AGE_LIMITS = { _AGE_LIMITS = {
@ -51,66 +55,34 @@ class LnkGoIE(InfoExtractor):
'N-14': 14, 'N-14': 14,
'S': 18, 'S': 18,
} }
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage( video_info = self._download_json(
url, display_id, 'Downloading player webpage') 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
display_id)['videoConfig']['videoInfo']
video_id = self._search_regex(
r'data-ep="([^"]+)"', webpage, 'video ID')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
upload_date = unified_strdate(self._search_regex(
r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
thumbnail_w = int_or_none(
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
thumbnail_h = int_or_none(
self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False))
thumbnail = {
'url': self._og_search_thumbnail(webpage),
}
if thumbnail_w and thumbnail_h:
thumbnail.update({
'width': thumbnail_w,
'height': thumbnail_h,
})
config = self._parse_json(self._search_regex(
r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
if config.get('pGeo'):
self.report_warning(
'This content might not be available in your country due to copyright reasons')
formats = [{
'format_id': 'hls',
'ext': 'mp4',
'url': config['EpisodeVideoLink_HLS'],
}]
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
if m:
formats.append({
'format_id': 'rtmp',
'ext': 'flv',
'url': m.group('url'),
'play_path': m.group('play_path'),
'page_url': url,
})
video_id = compat_str(video_info['id'])
title = video_info['title']
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats) self._sort_formats(formats)
poster_image = video_info.get('posterImage')
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnails': [thumbnail], 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None,
'duration': int_or_none(config.get('VideoTime')), 'duration': int_or_none(video_info.get('duration')),
'description': description, 'description': clean_html(video_info.get('htmlDescription')),
'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0), 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
'upload_date': upload_date, 'timestamp': parse_iso8601(video_info.get('airDate')),
'view_count': int_or_none(video_info.get('viewsCount')),
} }

View File

@ -27,7 +27,7 @@ class MediasetIE(ThePlatformBaseIE):
(?:video|on-demand)/(?:[^/]+/)+[^/]+_| (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
player/index\.html\?.*?\bprogramGuid= player/index\.html\?.*?\bprogramGuid=
) )
)(?P<id>[0-9A-Z]{16}) )(?P<id>[0-9A-Z]{16,})
''' '''
_TESTS = [{ _TESTS = [{
# full episode # full episode
@ -62,7 +62,6 @@ class MediasetIE(ThePlatformBaseIE):
'uploader': 'Canale 5', 'uploader': 'Canale 5',
'uploader_id': 'C5', 'uploader_id': 'C5',
}, },
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, { }, {
# clip # clip
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
@ -78,6 +77,18 @@ class MediasetIE(ThePlatformBaseIE):
}, { }, {
'url': 'mediaset:FAFU000000665924', 'url': 'mediaset:FAFU000000665924',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295',
'only_matching': True,
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02',
'only_matching': True,
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01',
'only_matching': True,
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -109,6 +120,11 @@ class MediasetIE(ThePlatformBaseIE):
entries.append(embed_url) entries.append(embed_url)
return entries return entries
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
for video in smil.findall(self._xpath_ns('.//video', namespace)):
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
def _real_extract(self, url): def _real_extract(self, url):
guid = self._match_id(url) guid = self._match_id(url)
tp_path = 'PR1GhC/media/guid/2702976343/' + guid tp_path = 'PR1GhC/media/guid/2702976343/' + guid
@ -118,14 +134,15 @@ class MediasetIE(ThePlatformBaseIE):
subtitles = {} subtitles = {}
first_e = None first_e = None
for asset_type in ('SD', 'HD'): for asset_type in ('SD', 'HD'):
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): # TODO: fixup ISM+none manifest URLs
for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
try: try:
tp_formats, tp_subtitles = self._extract_theplatform_smil( tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
'mbr': 'true', 'mbr': 'true',
'formats': f, 'formats': f,
'assetTypes': asset_type, 'assetTypes': asset_type,
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
except ExtractorError as e: except ExtractorError as e:
if not first_e: if not first_e:
first_e = e first_e = e

View File

@ -1,70 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_filesize,
sanitized_Request,
urlencode_postdata,
)
class MinhatecaIE(InfoExtractor):
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
_TEST = {
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
'info_dict': {
'id': '125848331',
'ext': 'mp4',
'title': 'youtube-dl test video',
'thumbnail': r're:^https?://.*\.jpg$',
'filesize_approx': 1530000,
'duration': 9,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
token = self._html_search_regex(
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
webpage, 'request token')
token_data = [
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = sanitized_Request(
'http://minhateca.com.br/action/License/Download',
data=urlencode_postdata(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
data = self._download_json(
req, video_id, note='Downloading metadata')
video_url = data['redirectUrl']
title_str = self._html_search_regex(
r'<h1.*?>(.*?)</h1>', webpage, 'title')
title, _, ext = title_str.rpartition('.')
filesize_approx = parse_filesize(self._html_search_regex(
r'<p class="fileSize">(.*?)</p>',
webpage, 'file size approximation', fatal=False))
duration = parse_duration(self._html_search_regex(
r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<p class="downloadsCounter">([0-9]+)</p>',
webpage, 'view count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': ext,
'filesize_approx': filesize_approx,
'duration': duration,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import functools
import itertools import itertools
import re import re
@ -11,28 +10,37 @@ from ..compat import (
compat_ord, compat_ord,
compat_str, compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urlparse,
compat_zip compat_zip
) )
from ..utils import ( from ..utils import (
clean_html,
ExtractorError,
int_or_none, int_or_none,
OnDemandPagedList, parse_iso8601,
str_to_int, strip_or_none,
try_get, try_get,
urljoin,
) )
class MixcloudIE(InfoExtractor): class MixcloudBaseIE(InfoExtractor):
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
lookup_key = object_type + 'Lookup'
return self._download_json(
'https://www.mixcloud.com/graphql', display_id, query={
'query': '''{
%s(lookup: {username: "%s"%s}) {
%s
}
}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
})['data'][lookup_key]
class MixcloudIE(MixcloudBaseIE):
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
IE_NAME = 'mixcloud' IE_NAME = 'mixcloud'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
'info_dict': { 'info_dict': {
'id': 'dholbach-cryptkeeper', 'id': 'dholbach_cryptkeeper',
'ext': 'm4a', 'ext': 'm4a',
'title': 'Cryptkeeper', 'title': 'Cryptkeeper',
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
@ -40,11 +48,13 @@ class MixcloudIE(InfoExtractor):
'uploader_id': 'dholbach', 'uploader_id': 'dholbach',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'view_count': int, 'view_count': int,
'timestamp': 1321359578,
'upload_date': '20111115',
}, },
}, { }, {
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
'info_dict': { 'info_dict': {
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Caribou 7 inch Vinyl Mix & Chat', 'title': 'Caribou 7 inch Vinyl Mix & Chat',
'description': 'md5:2b8aec6adce69f9d41724647c65875e8', 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
@ -52,11 +62,14 @@ class MixcloudIE(InfoExtractor):
'uploader_id': 'gillespeterson', 'uploader_id': 'gillespeterson',
'thumbnail': 're:https?://.*', 'thumbnail': 're:https?://.*',
'view_count': int, 'view_count': int,
'timestamp': 1422987057,
'upload_date': '20150203',
}, },
}, { }, {
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
'only_matching': True, 'only_matching': True,
}] }]
_DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
@staticmethod @staticmethod
def _decrypt_xor_cipher(key, ciphertext): def _decrypt_xor_cipher(key, ciphertext):
@ -66,115 +79,66 @@ class MixcloudIE(InfoExtractor):
for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) username, slug = re.match(self._VALID_URL, url).groups()
uploader = mobj.group(1) username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
cloudcast_name = mobj.group(2) track_id = '%s_%s' % (username, slug)
track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
webpage = self._download_webpage(url, track_id) cloudcast = self._call_api('cloudcast', '''audioLength
comments(first: 100) {
edges {
node {
comment
created
user {
displayName
username
}
}
}
totalCount
}
description
favorites {
totalCount
}
featuringArtistList
isExclusive
name
owner {
displayName
url
username
}
picture(width: 1024, height: 1024) {
url
}
plays
publishDate
reposts {
totalCount
}
streamInfo {
dashUrl
hlsUrl
url
}
tags {
tag {
name
}
}''', track_id, username, slug)
# Legacy path title = cloudcast['name']
encrypted_play_info = self._search_regex(
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
if encrypted_play_info is not None: stream_info = cloudcast['streamInfo']
# Decode
encrypted_play_info = compat_b64decode(encrypted_play_info)
else:
# New path
full_info_json = self._parse_json(self._html_search_regex(
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
webpage, 'play info'), 'play info')
for item in full_info_json:
item_data = try_get(item, [
lambda x: x['cloudcast']['data']['cloudcastLookup'],
lambda x: x['cloudcastLookup']['data']['cloudcastLookup'],
], dict)
if try_get(item_data, lambda x: x['streamInfo']['url']):
info_json = item_data
break
else:
raise ExtractorError('Failed to extract matching stream info')
message = self._html_search_regex(
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
webpage, 'error message', default=None)
js_url = self._search_regex(
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
webpage, 'js url')
js = self._download_webpage(js_url, track_id, 'Downloading JS')
# Known plaintext attack
if encrypted_play_info:
kps = ['{"stream_url":']
kpa_target = encrypted_play_info
else:
kps = ['https://', 'http://']
kpa_target = compat_b64decode(info_json['streamInfo']['url'])
for kp in kps:
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
for quote in ["'", '"']:
key = self._search_regex(
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
js, 'encryption key', default=None)
if key is not None:
break
else:
continue
break
else:
raise ExtractorError('Failed to extract encryption key')
if encrypted_play_info is not None:
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
if message and 'stream_url' not in play_info:
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
song_url = play_info['stream_url']
formats = [{
'format_id': 'normal',
'url': song_url
}]
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
thumbnail = self._proto_relative_url(self._html_search_regex(
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
uploader = self._html_search_regex(
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>',
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
else:
title = info_json['name']
thumbnail = urljoin(
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
description = try_get(info_json, lambda x: x['description'])
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
stream_info = info_json['streamInfo']
formats = [] formats = []
def decrypt_url(f_url):
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
decrypted_url = self._decrypt_xor_cipher(k, f_url)
if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
return decrypted_url
for url_key in ('url', 'hlsUrl', 'dashUrl'): for url_key in ('url', 'hlsUrl', 'dashUrl'):
format_url = stream_info.get(url_key) format_url = stream_info.get(url_key)
if not format_url: if not format_url:
continue continue
decrypted = decrypt_url(compat_b64decode(format_url)) decrypted = self._decrypt_xor_cipher(
if not decrypted: self._DECRYPTION_KEY, compat_b64decode(format_url))
continue
if url_key == 'hlsUrl': if url_key == 'hlsUrl':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
decrypted, track_id, 'mp4', entry_protocol='m3u8_native', decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
@ -191,52 +155,117 @@ class MixcloudIE(InfoExtractor):
'http_chunk_size': 5242880, 'http_chunk_size': 5242880,
}, },
}) })
if not formats and cloudcast.get('isExclusive'):
self.raise_login_required()
self._sort_formats(formats) self._sort_formats(formats)
comments = []
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
node = edge.get('node') or {}
text = strip_or_none(node.get('comment'))
if not text:
continue
user = node.get('user') or {}
comments.append({
'author': user.get('displayName'),
'author_id': user.get('username'),
'text': text,
'timestamp': parse_iso8601(node.get('created')),
})
tags = []
for t in cloudcast.get('tags'):
tag = try_get(t, lambda x: x['tag']['name'], compat_str)
if not tag:
tags.append(tag)
get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
owner = cloudcast.get('owner') or {}
return { return {
'id': track_id, 'id': track_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'description': description, 'description': cloudcast.get('description'),
'thumbnail': thumbnail, 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
'uploader': uploader, 'uploader': owner.get('displayName'),
'uploader_id': uploader_id, 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
'view_count': view_count, 'uploader_id': owner.get('username'),
'uploader_url': owner.get('url'),
'duration': int_or_none(cloudcast.get('audioLength')),
'view_count': int_or_none(cloudcast.get('plays')),
'like_count': get_count('favorites'),
'repost_count': get_count('reposts'),
'comment_count': get_count('comments'),
'comments': comments,
'tags': tags,
'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
} }
class MixcloudPlaylistBaseIE(InfoExtractor): class MixcloudPlaylistBaseIE(MixcloudBaseIE):
_PAGE_SIZE = 24 def _get_cloudcast(self, node):
return node
def _find_urls_in_page(self, page): def _get_playlist_title(self, title, slug):
for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page): return title
yield self.url_result(
compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
MixcloudIE.ie_key())
def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None): def _real_extract(self, url):
real_page_number = real_page_number or current_page + 1 username, slug = re.match(self._VALID_URL, url).groups()
return self._download_webpage( username = compat_urllib_parse_unquote(username)
'https://www.mixcloud.com/%s/' % path, video_id, if not slug:
note='Download %s (page %d)' % (page_name, current_page + 1), slug = 'uploads'
errnote='Unable to download %s' % page_name, else:
query={'page': real_page_number, 'list': 'main', '_ajax': '1'}, slug = compat_urllib_parse_unquote(slug)
headers={'X-Requested-With': 'XMLHttpRequest'}) playlist_id = '%s_%s' % (username, slug)
def _tracks_page_func(self, page, video_id, page_name, current_page): is_playlist_type = self._ROOT_TYPE == 'playlist'
resp = self._fetch_tracks_page(page, video_id, page_name, current_page) playlist_type = 'items' if is_playlist_type else slug
list_filter = ''
for item in self._find_urls_in_page(resp): has_next_page = True
yield item entries = []
while has_next_page:
playlist = self._call_api(
self._ROOT_TYPE, '''%s
%s
%s(first: 100%s) {
edges {
node {
%s
}
}
pageInfo {
endCursor
hasNextPage
}
}''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
playlist_id, username, slug if is_playlist_type else None)
def _get_user_description(self, page_content): items = playlist.get(playlist_type) or {}
return self._html_search_regex( for edge in items.get('edges', []):
r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>', cloudcast = self._get_cloudcast(edge.get('node') or {})
page_content, 'user description', fatal=False) cloudcast_url = cloudcast.get('url')
if not cloudcast_url:
continue
entries.append(self.url_result(
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
page_info = items['pageInfo']
has_next_page = page_info['hasNextPage']
list_filter = ', after: "%s"' % page_info['endCursor']
return self.playlist_result(
entries, playlist_id,
self._get_playlist_title(playlist[self._TITLE_KEY], slug),
playlist.get(self._DESCRIPTION_KEY))
class MixcloudUserIE(MixcloudPlaylistBaseIE): class MixcloudUserIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
IE_NAME = 'mixcloud:user' IE_NAME = 'mixcloud:user'
_TESTS = [{ _TESTS = [{
@ -244,68 +273,58 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'Daniel Holbach (uploads)',
'description': 'md5:def36060ac8747b3aabca54924897e47', 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
}, },
'playlist_mincount': 11, 'playlist_mincount': 36,
}, { }, {
'url': 'http://www.mixcloud.com/dholbach/uploads/', 'url': 'http://www.mixcloud.com/dholbach/uploads/',
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'Daniel Holbach (uploads)',
'description': 'md5:def36060ac8747b3aabca54924897e47', 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
}, },
'playlist_mincount': 11, 'playlist_mincount': 36,
}, { }, {
'url': 'http://www.mixcloud.com/dholbach/favorites/', 'url': 'http://www.mixcloud.com/dholbach/favorites/',
'info_dict': { 'info_dict': {
'id': 'dholbach_favorites', 'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)', 'title': 'Daniel Holbach (favorites)',
'description': 'md5:def36060ac8747b3aabca54924897e47', 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
}, },
'params': { # 'params': {
'playlist_items': '1-100', # 'playlist_items': '1-100',
}, # },
'playlist_mincount': 100, 'playlist_mincount': 396,
}, { }, {
'url': 'http://www.mixcloud.com/dholbach/listens/', 'url': 'http://www.mixcloud.com/dholbach/listens/',
'info_dict': { 'info_dict': {
'id': 'dholbach_listens', 'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)', 'title': 'Daniel Holbach (listens)',
'description': 'md5:def36060ac8747b3aabca54924897e47', 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
}, },
'params': { # 'params': {
'playlist_items': '1-100', # 'playlist_items': '1-100',
# },
'playlist_mincount': 1623,
'skip': 'Large list',
}, {
'url': 'https://www.mixcloud.com/FirstEar/stream/',
'info_dict': {
'id': 'FirstEar_stream',
'title': 'First Ear (stream)',
'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
}, },
'playlist_mincount': 100, 'playlist_mincount': 271,
}] }]
def _real_extract(self, url): _TITLE_KEY = 'displayName'
mobj = re.match(self._VALID_URL, url) _DESCRIPTION_KEY = 'biog'
user_id = mobj.group('user') _ROOT_TYPE = 'user'
list_type = mobj.group('type') _NODE_TEMPLATE = '''slug
url'''
# if only a profile URL was supplied, default to download all uploads def _get_playlist_title(self, title, slug):
if list_type is None: return '%s (%s)' % (title, slug)
list_type = 'uploads'
video_id = '%s_%s' % (user_id, list_type)
profile = self._download_webpage(
'https://www.mixcloud.com/%s/' % user_id, video_id,
note='Downloading user profile',
errnote='Unable to download user profile')
username = self._og_search_title(profile)
description = self._get_user_description(profile)
entries = OnDemandPagedList(
functools.partial(
self._tracks_page_func,
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
self._PAGE_SIZE)
return self.playlist_result(
entries, video_id, '%s (%s)' % (username, list_type), description)
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
@ -313,87 +332,20 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
IE_NAME = 'mixcloud:playlist' IE_NAME = 'mixcloud:playlist'
_TESTS = [{ _TESTS = [{
'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
'info_dict': {
'id': 'RedBullThre3style_tokyo-finalists-2015',
'title': 'National Champions 2015',
'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
},
'playlist_mincount': 16,
}, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user')
playlist_id = mobj.group('playlist')
video_id = '%s_%s' % (user_id, playlist_id)
webpage = self._download_webpage(
url, user_id,
note='Downloading playlist page',
errnote='Unable to download playlist page')
title = self._html_search_regex(
r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
webpage, 'playlist title',
default=None) or self._og_search_title(webpage, fatal=False)
description = self._get_user_description(webpage)
entries = OnDemandPagedList(
functools.partial(
self._tracks_page_func,
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
self._PAGE_SIZE)
return self.playlist_result(entries, video_id, title, description)
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
IE_NAME = 'mixcloud:stream'
_TEST = {
'url': 'https://www.mixcloud.com/FirstEar/stream/',
'info_dict': { 'info_dict': {
'id': 'FirstEar', 'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'First Ear', 'title': 'Ness Radio sessions',
'description': 'Curators of good music\nfirstearmusic.com',
}, },
'playlist_mincount': 192, 'playlist_mincount': 59,
} }]
_TITLE_KEY = 'name'
_DESCRIPTION_KEY = 'description'
_ROOT_TYPE = 'playlist'
_NODE_TEMPLATE = '''cloudcast {
slug
url
}'''
def _real_extract(self, url): def _get_cloudcast(self, node):
user_id = self._match_id(url) return node.get('cloudcast') or {}
webpage = self._download_webpage(url, user_id)
entries = []
prev_page_url = None
def _handle_page(page):
entries.extend(self._find_urls_in_page(page))
return self._search_regex(
r'm-next-page-url="([^"]+)"', page,
'next page URL', default=None)
next_page_url = _handle_page(webpage)
for idx in itertools.count(0):
if not next_page_url or prev_page_url == next_page_url:
break
prev_page_url = next_page_url
current_page = int(self._search_regex(
r'\?page=(\d+)', next_page_url, 'next page number'))
next_page_url = _handle_page(self._fetch_tracks_page(
'%s/stream' % user_id, user_id, 'stream', idx,
real_page_number=current_page))
username = self._og_search_title(webpage)
description = self._get_user_description(webpage)
return self.playlist_result(entries, user_id, username, description)

View File

@ -41,6 +41,14 @@ class MSNIE(InfoExtractor):
}, { }, {
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-raped-woman-comment/vi-AAhvzW6', 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-raped-woman-comment/vi-AAhvzW6',
'only_matching': True, 'only_matching': True,
}, {
# Vidible(AOL) Embed
'url': 'https://www.msn.com/en-us/video/animals/yellowstone-park-staffers-catch-deer-engaged-in-behavior-they-cant-explain/vi-AAGfdg1',
'only_matching': True,
}, {
# Dailymotion Embed
'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -61,6 +69,18 @@ class MSNIE(InfoExtractor):
webpage, 'error', group='error')) webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
player_name = video.get('playerName')
if player_name:
provider_id = video.get('providerId')
if provider_id:
if player_name == 'AOL':
return self.url_result(
'aol-video:' + provider_id, 'Aol', provider_id)
elif player_name == 'Dailymotion':
return self.url_result(
'https://www.dailymotion.com/video/' + provider_id,
'Dailymotion', provider_id)
title = video['title'] title = video['title']
formats = [] formats = []

View File

@ -1,73 +1,56 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import os.path
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, int_or_none,
parse_duration,
xpath_text,
) )
class MySpassIE(InfoExtractor): class MySpassIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
'md5': '0b49f4844a068f8b33f4b7c88405862b', 'md5': '0b49f4844a068f8b33f4b7c88405862b',
'info_dict': { 'info_dict': {
'id': '11741', 'id': '11741',
'ext': 'mp4', 'ext': 'mp4',
'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?', 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2', 'title': '17.02.2013 - Die Highlights, Teil 2',
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' video_id = self._match_id(url)
# video id is the last path element of the URL
# usually there is a trailing slash, so also try the second but last
url_path = compat_urllib_parse_urlparse(url).path
url_parent_path, video_id = os.path.split(url_path)
if not video_id:
_, video_id = os.path.split(url_parent_path)
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata = self._download_xml( metadata = self._download_xml(
metadata_url, video_id, transform_source=lambda s: s.strip()) 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
video_id)
# extract values from metadata title = xpath_text(metadata, 'title', fatal=True)
url_flv_el = metadata.find('url_flv') video_url = xpath_text(metadata, 'url_flv', 'download url', True)
if url_flv_el is None: video_id_int = int(video_id)
raise ExtractorError('Unable to extract download url') for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
video_url = url_flv_el.text group_int = int(group)
title_el = metadata.find('title') if group_int > video_id_int:
if title_el is None: video_url = video_url.replace(
raise ExtractorError('Unable to extract title') group, compat_str(group_int // video_id_int))
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
format = 'mp4'
else:
format = format_id_el.text
description_el = metadata.find('description')
if description_el is not None:
description = description_el.text
else:
description = None
imagePreview_el = metadata.find('imagePreview')
if imagePreview_el is not None:
thumbnail = imagePreview_el.text
else:
thumbnail = None
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': title, 'title': title,
'format': format, 'thumbnail': xpath_text(metadata, 'imagePreview'),
'thumbnail': thumbnail, 'description': xpath_text(metadata, 'description'),
'description': description, 'duration': parse_duration(xpath_text(metadata, 'duration')),
'series': xpath_text(metadata, 'format'),
'season_number': int_or_none(xpath_text(metadata, 'season')),
'season_id': xpath_text(metadata, 'season_id'),
'episode': title,
'episode_number': int_or_none(xpath_text(metadata, 'episode')),
} }

View File

@ -108,7 +108,7 @@ class NexxIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_domain_id(webpage): def _extract_domain_id(webpage):
mobj = re.search( mobj = re.search(
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)', r'<script\b[^>]+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P<id>\d+)',
webpage) webpage)
return mobj.group('id') if mobj else None return mobj.group('id') if mobj else None
@ -123,7 +123,7 @@ class NexxIE(InfoExtractor):
domain_id = NexxIE._extract_domain_id(webpage) domain_id = NexxIE._extract_domain_id(webpage)
if domain_id: if domain_id:
for video_id in re.findall( for video_id in re.findall(
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)',
webpage): webpage):
entries.append( entries.append(
'https://api.nexx.cloud/v3/%s/videos/byid/%s' 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
@ -410,8 +410,8 @@ class NexxIE(InfoExtractor):
class NexxEmbedIE(InfoExtractor): class NexxEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1', 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
'md5': '16746bfc28c42049492385c989b26c4a', 'md5': '16746bfc28c42049492385c989b26c4a',
'info_dict': { 'info_dict': {
@ -420,7 +420,6 @@ class NexxEmbedIE(InfoExtractor):
'title': 'Nervenkitzel Achterbahn', 'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland', 'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
'release_year': 2005,
'creator': 'SPIEGEL TV', 'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761, 'duration': 2761,
@ -431,7 +430,10 @@ class NexxEmbedIE(InfoExtractor):
'format': 'bestvideo', 'format': 'bestvideo',
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7',
'only_matching': True,
}]
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):

View File

@ -3,9 +3,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
xpath_text,
int_or_none, int_or_none,
strip_or_none,
unescapeHTML,
xpath_text,
) )
@ -47,10 +48,10 @@ class NTVRuIE(InfoExtractor):
'duration': 1496, 'duration': 1496,
}, },
}, { }, {
'url': 'http://www.ntv.ru/kino/Koma_film', 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
'md5': 'f825770930937aa7e5aca0dc0d29319a', 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4',
'info_dict': { 'info_dict': {
'id': '1007609', 'id': '1126480',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Остросюжетный фильм «Кома»', 'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»', 'description': 'Остросюжетный фильм «Кома»',
@ -68,6 +69,10 @@ class NTVRuIE(InfoExtractor):
'thumbnail': r're:^http://.*\.jpg', 'thumbnail': r're:^http://.*\.jpg',
'duration': 2590, 'duration': 2590,
}, },
}, {
# Schemeless file URL
'url': 'https://www.ntv.ru/video/1797442',
'only_matching': True,
}] }]
_VIDEO_ID_REGEXES = [ _VIDEO_ID_REGEXES = [
@ -96,37 +101,31 @@ class NTVRuIE(InfoExtractor):
'http://www.ntv.ru/vi%s/' % video_id, 'http://www.ntv.ru/vi%s/' % video_id,
video_id, 'Downloading video XML') video_id, 'Downloading video XML')
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True)) title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
description = clean_html(xpath_text(player, './data/description', 'description'))
video = player.find('./data/video') video = player.find('./data/video')
video_id = xpath_text(video, './id', 'video id')
thumbnail = xpath_text(video, './splash', 'thumbnail')
duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
view_count = int_or_none(xpath_text(video, './views', 'view count'))
token = self._download_webpage(
'http://stat.ntv.ru/services/access/token',
video_id, 'Downloading access token')
formats = [] formats = []
for format_id in ['', 'hi', 'webm']: for format_id in ['', 'hi', 'webm']:
file_ = video.find('./%sfile' % format_id) file_ = xpath_text(video, './%sfile' % format_id)
if file_ is None: if not file_:
continue continue
size = video.find('./%ssize' % format_id) if file_.startswith('//'):
file_ = self._proto_relative_url(file_)
elif not file_.startswith('http'):
file_ = 'http://media.ntv.ru/vod/' + file_
formats.append({ formats.append({
'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token), 'url': file_,
'filesize': int_or_none(size.text if size is not None else None), 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': xpath_text(video, './id'),
'title': title, 'title': title,
'description': description, 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
'thumbnail': thumbnail, 'thumbnail': xpath_text(video, './splash'),
'duration': duration, 'duration': int_or_none(xpath_text(video, './totaltime')),
'view_count': view_count, 'view_count': int_or_none(xpath_text(video, './views')),
'formats': formats, 'formats': formats,
} }

View File

@ -4,12 +4,8 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..compat import compat_str
determine_ext, from ..utils import js_to_json
int_or_none,
float_or_none,
mimetype2ext,
)
class OnionStudiosIE(InfoExtractor): class OnionStudiosIE(InfoExtractor):
@ -17,14 +13,16 @@ class OnionStudiosIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': '719d1f8c32094b8c33902c17bcae5e34', 'md5': '5a118d466d62b5cd03647cf2c593977f',
'info_dict': { 'info_dict': {
'id': '2937', 'id': '3459881',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail', 'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club', 'uploader': 'a.v. club',
'uploader_id': 'the-av-club', 'upload_date': '20150619',
'timestamp': 1434728546,
}, },
}, { }, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
@ -44,38 +42,12 @@ class OnionStudiosIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( webpage = self._download_webpage(
'http://www.onionstudios.com/video/%s.json' % video_id, video_id) 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
video_id)
title = video_data['title'] mcp_id = compat_str(self._parse_json(self._search_regex(
r'window\.mcpMapping\s*=\s*({.+?});', webpage,
formats = [] 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
for source in video_data.get('sources', []): return self.url_result(
source_url = source.get('url') 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
if not source_url: 'KinjaEmbed', mcp_id)
continue
ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
tbr = int_or_none(source.get('bitrate'))
formats.append({
'format_id': ext + ('-%d' % tbr if tbr else ''),
'url': source_url,
'width': int_or_none(source.get('width')),
'tbr': tbr,
'ext': ext,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': video_data.get('poster_url'),
'uploader': video_data.get('channel_name'),
'uploader_id': video_data.get('channel_slug'),
'duration': float_or_none(video_data.get('duration', 1000)),
'tags': video_data.get('tags'),
'formats': formats,
}

View File

@ -6,7 +6,11 @@ from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
int_or_none, int_or_none,
KNOWN_EXTENSIONS,
mimetype2ext,
parse_iso8601, parse_iso8601,
str_or_none,
try_get,
) )
@ -24,6 +28,7 @@ class PatreonIE(InfoExtractor):
'thumbnail': 're:^https?://.*$', 'thumbnail': 're:^https?://.*$',
'timestamp': 1406473987, 'timestamp': 1406473987,
'upload_date': '20140727', 'upload_date': '20140727',
'uploader_id': '87145',
}, },
}, { }, {
'url': 'http://www.patreon.com/creation?hid=754133', 'url': 'http://www.patreon.com/creation?hid=754133',
@ -90,7 +95,13 @@ class PatreonIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
post = self._download_json( post = self._download_json(
'https://www.patreon.com/api/posts/' + video_id, video_id) 'https://www.patreon.com/api/posts/' + video_id, video_id, query={
'fields[media]': 'download_url,mimetype,size_bytes',
'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
'fields[user]': 'full_name,url',
'json-api-use-default-includes': 'false',
'include': 'media,user',
})
attributes = post['data']['attributes'] attributes = post['data']['attributes']
title = attributes['title'].strip() title = attributes['title'].strip()
image = attributes.get('image') or {} image = attributes.get('image') or {}
@ -104,33 +115,42 @@ class PatreonIE(InfoExtractor):
'comment_count': int_or_none(attributes.get('comment_count')), 'comment_count': int_or_none(attributes.get('comment_count')),
} }
def add_file(file_data):
file_url = file_data.get('url')
if file_url:
info.update({
'url': file_url,
'ext': determine_ext(file_data.get('name'), 'mp3'),
})
for i in post.get('included', []): for i in post.get('included', []):
i_type = i.get('type') i_type = i.get('type')
if i_type == 'attachment': if i_type == 'media':
add_file(i.get('attributes') or {}) media_attributes = i.get('attributes') or {}
download_url = media_attributes.get('download_url')
ext = mimetype2ext(media_attributes.get('mimetype'))
if download_url and ext in KNOWN_EXTENSIONS:
info.update({
'ext': ext,
'filesize': int_or_none(media_attributes.get('size_bytes')),
'url': download_url,
})
elif i_type == 'user': elif i_type == 'user':
user_attributes = i.get('attributes') user_attributes = i.get('attributes')
if user_attributes: if user_attributes:
info.update({ info.update({
'uploader': user_attributes.get('full_name'), 'uploader': user_attributes.get('full_name'),
'uploader_id': str_or_none(i.get('id')),
'uploader_url': user_attributes.get('url'), 'uploader_url': user_attributes.get('url'),
}) })
if not info.get('url'): if not info.get('url'):
add_file(attributes.get('post_file') or {}) embed_url = try_get(attributes, lambda x: x['embed']['url'])
if embed_url:
if not info.get('url'):
info.update({ info.update({
'_type': 'url', '_type': 'url',
'url': attributes['embed']['url'], 'url': embed_url,
})
if not info.get('url'):
post_file = attributes['post_file']
ext = determine_ext(post_file.get('name'))
if ext in KNOWN_EXTENSIONS:
info.update({
'ext': ext,
'url': post_file['url'],
}) })
return info return info

View File

@ -17,12 +17,54 @@ class PeriscopeBaseIE(InfoExtractor):
'https://api.periscope.tv/api/v2/%s' % method, 'https://api.periscope.tv/api/v2/%s' % method,
item_id, query=query) item_id, query=query)
def _parse_broadcast_data(self, broadcast, video_id):
title = broadcast['status']
uploader = broadcast.get('user_display_name') or broadcast.get('username')
title = '%s - %s' % (uploader, title) if uploader else title
is_live = broadcast.get('state').lower() == 'running'
thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
return {
'id': broadcast.get('id') or video_id,
'title': self._live_title(title) if is_live else title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails,
'view_count': int_or_none(broadcast.get('total_watched')),
'tags': broadcast.get('tags'),
'is_live': is_live,
}
@staticmethod
def _extract_common_format_info(broadcast):
return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))
@staticmethod
def _add_width_and_height(f, width, height):
for key, val in (('width', width), ('height', height)):
if not f.get(key):
f[key] = val
def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
m3u8_id=format_id, fatal=fatal)
if len(m3u8_formats) == 1:
self._add_width_and_height(m3u8_formats[0], width, height)
return m3u8_formats
class PeriscopeIE(PeriscopeBaseIE): class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope' IE_DESC = 'Periscope'
IE_NAME = 'periscope' IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/ # Alive example URLs can be found here https://www.periscope.tv/
_TESTS = [{ _TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
'md5': '65b57957972e503fcbbaeed8f4fa04ca', 'md5': '65b57957972e503fcbbaeed8f4fa04ca',
@ -61,21 +103,9 @@ class PeriscopeIE(PeriscopeBaseIE):
'accessVideoPublic', {'broadcast_id': token}, token) 'accessVideoPublic', {'broadcast_id': token}, token)
broadcast = stream['broadcast'] broadcast = stream['broadcast']
title = broadcast['status'] info = self._parse_broadcast_data(broadcast, token)
uploader = broadcast.get('user_display_name') or broadcast.get('username')
uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
title = '%s - %s' % (uploader, title) if uploader else title
state = broadcast.get('state').lower() state = broadcast.get('state').lower()
if state == 'running':
title = self._live_title(title)
timestamp = parse_iso8601(broadcast.get('created_at'))
thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
width = int_or_none(broadcast.get('width')) width = int_or_none(broadcast.get('width'))
height = int_or_none(broadcast.get('height')) height = int_or_none(broadcast.get('height'))
@ -92,32 +122,20 @@ class PeriscopeIE(PeriscopeBaseIE):
continue continue
video_urls.add(video_url) video_urls.add(video_url)
if format_id != 'rtmp': if format_id != 'rtmp':
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_pscp_m3u8_formats(
video_url, token, 'mp4', video_url, token, format_id, state, width, height, False)
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
m3u8_id=format_id, fatal=False)
if len(m3u8_formats) == 1:
add_width_and_height(m3u8_formats[0])
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
continue continue
rtmp_format = { rtmp_format = {
'url': video_url, 'url': video_url,
'ext': 'flv' if format_id == 'rtmp' else 'mp4', 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
} }
add_width_and_height(rtmp_format) self._add_width_and_height(rtmp_format)
formats.append(rtmp_format) formats.append(rtmp_format)
self._sort_formats(formats) self._sort_formats(formats)
return { info['formats'] = formats
'id': broadcast.get('id') or token, return info
'title': title,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnails': thumbnails,
'formats': formats,
}
class PeriscopeUserIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE):

View File

@ -1,170 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_iso8601,
unescapeHTML,
qualities,
)
class Revision3EmbedIE(InfoExtractor):
IE_NAME = 'revision3:embed'
_VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
_TEST = {
'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
'info_dict': {
'id': '67558',
'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos',
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
'uploader_id': 'dnews',
'uploader': 'DNews',
}
}
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('playlist_id')
playlist_type = mobj.group('playlist_type') or 'video_id'
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json', playlist_id, query={
'api_key': self._API_KEY,
'codecs': 'h264,vp8,theora',
playlist_type: playlist_id,
})['items'][0]
formats = []
for vcodec, media in video_data['media'].items():
for quality_id, quality in media.items():
if quality_id == 'hls':
formats.extend(self._extract_m3u8_formats(
quality['url'], playlist_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
else:
formats.append({
'url': quality['url'],
'format_id': '%s-%s' % (vcodec, quality_id),
'tbr': int_or_none(quality.get('bitrate')),
'vcodec': vcodec,
})
self._sort_formats(formats)
return {
'id': playlist_id,
'title': unescapeHTML(video_data['title']),
'description': unescapeHTML(video_data.get('summary')),
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
'formats': formats,
}
class Revision3IE(InfoExtractor):
IE_NAME = 'revision'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
_TESTS = [{
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
'info_dict': {
'id': '71089',
'display_id': 'technobuffalo/5-google-predictions-for-2016',
'ext': 'webm',
'title': '5 Google Predictions for 2016',
'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.',
'upload_date': '20151228',
'timestamp': 1451325600,
'duration': 187,
'uploader': 'TechnoBuffalo',
'uploader_id': 'technobuffalo',
}
}, {
# Show
'url': 'http://revision3.com/variant',
'only_matching': True,
}, {
# Tag
'url': 'http://revision3.com/vr',
'only_matching': True,
}]
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
def _real_extract(self, url):
domain, display_id = re.match(self._VALID_URL, url).groups()
site = domain.split('.')[0]
page_info = self._download_json(
self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
page_data = page_info['data']
page_type = page_data['type']
if page_type in ('episode', 'embed'):
show_data = page_data['show']['data']
page_id = compat_str(page_data['id'])
video_id = compat_str(page_data['video']['data']['id'])
preference = qualities(['mini', 'small', 'medium', 'large'])
thumbnails = [{
'url': image_url,
'id': image_id,
'preference': preference(image_id)
} for image_id, image_url in page_data.get('images', {}).items()]
info = {
'id': page_id,
'display_id': display_id,
'title': unescapeHTML(page_data['name']),
'description': unescapeHTML(page_data.get('summary')),
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
'author': page_data.get('author'),
'uploader': show_data.get('name'),
'uploader_id': show_data.get('slug'),
'thumbnails': thumbnails,
'extractor_key': site,
}
if page_type == 'embed':
info.update({
'_type': 'url_transparent',
'url': page_data['video']['data']['embed'],
})
return info
info.update({
'_type': 'url_transparent',
'url': 'revision3:%s' % video_id,
})
return info
else:
list_data = page_info[page_type]['data']
episodes_data = page_info['episodes']['data']
num_episodes = page_info['meta']['totalEpisodes']
processed_episodes = 0
entries = []
page_num = 1
while True:
entries.extend([{
'_type': 'url',
'url': 'http://%s%s' % (domain, episode['path']),
'id': compat_str(episode['id']),
'ie_key': 'Revision3',
'extractor_key': site,
} for episode in episodes_data])
processed_episodes += len(episodes_data)
if processed_episodes == num_episodes:
break
page_num += 1
episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % (
domain, display_id + '/' + compat_str(page_num), domain),
display_id)['episodes']['data']
return self.playlist_result(
entries, compat_str(list_data['id']),
list_data.get('name'), list_data.get('summary'))

View File

@ -1,8 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
@ -18,7 +16,6 @@ from ..utils import (
class RoosterTeethIE(InfoExtractor): class RoosterTeethIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
_LOGIN_URL = 'https://roosterteeth.com/login'
_NETRC_MACHINE = 'roosterteeth' _NETRC_MACHINE = 'roosterteeth'
_TESTS = [{ _TESTS = [{
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
@ -53,48 +50,40 @@ class RoosterTeethIE(InfoExtractor):
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'only_matching': True, 'only_matching': True,
}] }]
_EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
def _login(self): def _login(self):
username, password = self._get_login_info() username, password = self._get_login_info()
if username is None: if username is None:
return return
login_page = self._download_webpage( try:
self._LOGIN_URL, None, self._download_json(
note='Downloading login page', 'https://auth.roosterteeth.com/oauth/token',
errnote='Unable to download login page') None, 'Logging in', data=urlencode_postdata({
'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
login_form = self._hidden_inputs(login_page) 'grant_type': 'password',
login_form.update({
'username': username, 'username': username,
'password': password, 'password': password,
}) }))
except ExtractorError as e:
login_request = self._download_webpage( msg = 'Unable to login'
self._LOGIN_URL, None, if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
note='Logging in', resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
data=urlencode_postdata(login_form), if resp:
headers={ error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
'Referer': self._LOGIN_URL,
})
if not any(re.search(p, login_request) for p in (
r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
r'>Sign Out<')):
error = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
login_request, 'alert', default=None, group='error')
if error: if error:
raise ExtractorError('Unable to login: %s' % error, expected=True) msg += ': ' + error
raise ExtractorError('Unable to log in') self.report_warning(msg)
def _real_initialize(self): def _real_initialize(self):
if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
return
self._login() self._login()
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id api_episode_url = self._EPISODE_BASE_URL + display_id
try: try:
m3u8_url = self._download_json( m3u8_url = self._download_json(

View File

@ -0,0 +1,144 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
ExtractorError,
urlencode_postdata,
)
class SCTEBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
_NETRC_MACHINE = 'scte'
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_popup = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'class=["\']welcome\b', r'>Sign Out<'))
# already logged in
if is_logged(login_popup):
return
login_form = self._hidden_inputs(login_popup)
login_form.update({
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
})
response = self._download_webpage(
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form))
if '|pageRedirect|' not in response and not is_logged(response):
error = self._html_search_regex(
r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
class SCTEIE(SCTEBaseIE):
_VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
'info_dict': {
'title': 'Introduction to DOCSIS Engineering Professional',
'id': '31484',
},
'playlist_count': 5,
'skip': 'Requires account credentials',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
context = decode_packed_codes(self._download_webpage(
'%smobile/data.js' % content_base, video_id))
data = self._parse_xml(
self._search_regex(
r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
video_id)
entries = []
for asset in data.findall('.//asset'):
asset_url = asset.get('url')
if not asset_url or not asset_url.endswith('.mp4'):
continue
asset_id = self._search_regex(
r'video_([^_]+)_', asset_url, 'asset id', default=None)
if not asset_id:
continue
entries.append({
'id': asset_id,
'title': title,
'url': content_base + asset_url,
})
return self.playlist_result(entries, video_id, title)
class SCTECourseIE(SCTEBaseIE):
_VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
'only_matching': True,
}, {
'url': 'https://learning.scte.org/course/view.php?id=3639',
'only_matching': True,
}, {
'url': 'https://learning.scte.org/course/view.php?id=3073',
'only_matching': True,
}]
def _real_extract(self, url):
course_id = self._match_id(url)
webpage = self._download_webpage(url, course_id)
title = self._search_regex(
r'<h1>(.+?)</h1>', webpage, 'title', default=None)
entries = []
for mobj in re.finditer(
r'''(?x)
<a[^>]+
href=(["\'])
(?P<url>
https?://learning\.scte\.org/mod/
(?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
\bid=\d+
)
''',
webpage):
item_url = mobj.group('url')
if item_url == url:
continue
ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
else SCTECourseIE.ie_key())
entries.append(self.url_result(item_url, ie=ie))
return self.playlist_result(entries, course_id, title)

View File

@ -4,34 +4,37 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
get_element_by_class,
strip_or_none,
)
class SeekerIE(InfoExtractor): class SeekerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html' _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
_TESTS = [{ _TESTS = [{
# player.loadRevision3Item
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
'md5': '30c1dc4030cc715cf05b423d0947ac18', 'md5': '897d44bbe0d8986a2ead96de565a92db',
'info_dict': { 'info_dict': {
'id': '76243', 'id': 'Elrn3gnY',
'ext': 'webm', 'ext': 'mp4',
'title': 'Should Trump Be Required To Release His Tax Returns?', 'title': 'Should Trump Be Required To Release His Tax Returns?',
'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', 'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
'uploader': 'Seeker Daily', 'timestamp': 1490090165,
'uploader_id': 'seekerdaily', 'upload_date': '20170321',
} }
}, { }, {
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
'playlist': [ 'playlist': [
{ {
'md5': '83bcd157cab89ad7318dd7b8c9cf1306', 'md5': '0497b9f20495174be73ae136949707d2',
'info_dict': { 'info_dict': {
'id': '67558', 'id': 'FihYQ8AE',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos', 'title': 'The Pros & Cons Of Zoos',
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
'uploader': 'DNews', 'timestamp': 1490039133,
'uploader_id': 'dnews', 'upload_date': '20170320',
}, },
} }
], ],
@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id, article_id = re.match(self._VALID_URL, url).groups() display_id, article_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) entries = []
if mobj: for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
playlist_type, playlist_id = mobj.groups() entries.append(self.url_result(
return self.url_result( 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
else:
entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
return self.playlist_result( return self.playlist_result(
entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) entries, article_id,
self._og_search_title(webpage),
strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))

View File

@ -276,7 +276,7 @@ class SoundcloudIE(InfoExtractor):
if secret_token: if secret_token:
query['secret_token'] = secret_token query['secret_token'] = secret_token
if info.get('downloadable'): if info.get('downloadable') and info.get('has_downloads_left'):
format_url = update_url_query( format_url = update_url_query(
info.get('download_url') or track_base_url + '/download', query) info.get('download_url') or track_base_url + '/download', query)
format_urls.add(format_url) format_urls.add(format_url)

View File

@ -4,15 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse
)
from ..utils import ( from ..utils import (
extract_attributes, compat_str,
float_or_none, float_or_none,
int_or_none, int_or_none,
str_or_none,
) )
@ -20,20 +15,20 @@ class STVPlayerIE(InfoExtractor):
IE_NAME = 'stv:player' IE_NAME = 'stv:player'
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})' _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
_TEST = { _TEST = {
'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
'md5': '2ad867d4afd641fa14187596e0fbc91b', 'md5': '5adf9439c31d554f8be0707c7abe7e0a',
'info_dict': { 'info_dict': {
'id': '6016487034001', 'id': '5333973339001',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20190321', 'upload_date': '20170301',
'title': 'Interview with the cast ahead of new Victoria', 'title': '60 seconds on set with Laura Norton',
'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!",
'timestamp': 1553179628, 'timestamp': 1488388054,
'uploader_id': '1486976045', 'uploader_id': '1486976045',
}, },
'skip': 'this resource is unavailable outside of the UK', 'skip': 'this resource is unavailable outside of the UK',
} }
_PUBLISHER_ID = '1486976045' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
_PTYPE_MAP = { _PTYPE_MAP = {
'episode': 'episodes', 'episode': 'episodes',
'video': 'shortform', 'video': 'shortform',
@ -41,31 +36,14 @@ class STVPlayerIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
ptype, video_id = re.match(self._VALID_URL, url).groups() ptype, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id)
qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex(
r'itemprop="embedURL"[^>]+href="([^"]+)',
webpage, 'embed URL', default=None)).query)
publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID
player_attr = extract_attributes(self._search_regex(
r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {}
info = {}
duration = ref_id = series = video_id = None
api_ref_id = player_attr.get('data-player-api-refid')
if api_ref_id:
resp = self._download_json( resp = self._download_json(
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
api_ref_id, fatal=False) video_id)
if resp:
result = resp.get('results') or {} result = resp['results']
video = result.get('video') or {} video = result['video']
video_id = str_or_none(video.get('id')) video_id = compat_str(video['id'])
ref_id = video.get('guid')
duration = video.get('length')
programme = result.get('programme') or {}
series = programme.get('name') or programme.get('shortName')
subtitles = {} subtitles = {}
_subtitles = result.get('_subtitles') or {} _subtitles = result.get('_subtitles') or {}
for ext, sub_url in _subtitles.items(): for ext, sub_url in _subtitles.items():
@ -73,22 +51,17 @@ class STVPlayerIE(InfoExtractor):
'ext': 'vtt' if ext == 'webvtt' else ext, 'ext': 'vtt' if ext == 'webvtt' else ext,
'url': sub_url, 'url': sub_url,
}) })
info.update({
programme = result.get('programme') or {}
return {
'_type': 'url_transparent',
'id': video_id,
'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id,
'description': result.get('summary'), 'description': result.get('summary'),
'duration': float_or_none(video.get('length'), 1000),
'subtitles': subtitles, 'subtitles': subtitles,
'view_count': int_or_none(result.get('views')), 'view_count': int_or_none(result.get('views')),
}) 'series': programme.get('name') or programme.get('shortName'),
if not video_id:
video_id = qs.get('videoId', [None])[0] or self._search_regex(
r'<link\s+itemprop="url"\s+href="(\d+)"',
webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid'])
info.update({
'_type': 'url_transparent',
'duration': float_or_none(duration or player_attr.get('data-duration'), 1000),
'id': video_id,
'ie_key': 'BrightcoveNew', 'ie_key': 'BrightcoveNew',
'series': series or player_attr.get('data-programme-name'), }
'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id),
})
return info

View File

@ -84,6 +84,19 @@ class TeamcocoIE(TurnerBaseIE):
'only_matching': True, 'only_matching': True,
} }
] ]
_RECORD_TEMPL = '''id
title
teaser
publishOn
thumb {
preview
}
tags {
name
}
duration
turnerMediaId
turnerMediaAuthToken'''
def _graphql_call(self, query_template, object_type, object_id): def _graphql_call(self, query_template, object_type, object_id):
find_object = 'find' + object_type find_object = 'find' + object_type
@ -98,35 +111,35 @@ class TeamcocoIE(TurnerBaseIE):
display_id = self._match_id(url) display_id = self._match_id(url)
response = self._graphql_call('''{ response = self._graphql_call('''{
%s(slug: "%s") { %%s(slug: "%%s") {
... on RecordSlug { ... on RecordSlug {
record { record {
%s
}
}
... on PageSlug {
child {
id id
title
teaser
publishOn
thumb {
preview
}
file {
url
}
tags {
name
}
duration
turnerMediaId
turnerMediaAuthToken
} }
} }
... on NotFoundSlug { ... on NotFoundSlug {
status status
} }
} }
}''', 'Slug', display_id) }''' % self._RECORD_TEMPL, 'Slug', display_id)
if response.get('status'): if response.get('status'):
raise ExtractorError('This video is no longer available.', expected=True) raise ExtractorError('This video is no longer available.', expected=True)
child = response.get('child')
if child:
record = self._graphql_call('''{
%%s(id: "%%s") {
... on Video {
%s
}
}
}''' % self._RECORD_TEMPL, 'Record', child['id'])
else:
record = response['record'] record = response['record']
video_id = record['id'] video_id = record['id']
@ -150,25 +163,21 @@ class TeamcocoIE(TurnerBaseIE):
'accessTokenType': 'jws', 'accessTokenType': 'jws',
})) }))
else: else:
d = self._download_json( video_sources = self._download_json(
'https://teamcoco.com/_truman/d/' + video_id, 'https://teamcoco.com/_truman/d/' + video_id,
video_id, fatal=False) or {} video_id)['meta']['src']
video_sources = d.get('meta') or {} if isinstance(video_sources, dict):
if not video_sources: video_sources = video_sources.values()
video_sources = self._graphql_call('''{
%s(id: "%s") {
src
}
}''', 'RecordVideoSource', video_id) or {}
formats = [] formats = []
get_quality = qualities(['low', 'sd', 'hd', 'uhd']) get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
for format_id, src in video_sources.get('src', {}).items(): for src in video_sources:
if not isinstance(src, dict): if not isinstance(src, dict):
continue continue
src_url = src.get('src') src_url = src.get('src')
if not src_url: if not src_url:
continue continue
format_id = src.get('label')
ext = determine_ext(src_url, mimetype2ext(src.get('type'))) ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8': if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here # compat_urllib_parse.urljoin does not work here
@ -190,9 +199,6 @@ class TeamcocoIE(TurnerBaseIE):
'format_id': format_id, 'format_id': format_id,
'quality': get_quality(format_id), 'quality': get_quality(format_id),
}) })
if not formats:
formats = self._extract_m3u8_formats(
record['file']['url'], video_id, 'mp4', fatal=False)
self._sort_formats(formats) self._sort_formats(formats)
info['formats'] = formats info['formats'] = formats

View File

@ -4,21 +4,25 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
remove_end, int_or_none,
parse_iso8601,
try_get,
) )
class TelegraafIE(InfoExtractor): class TelegraafIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html' _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
'info_dict': { 'info_dict': {
'id': '24353229', 'id': 'gaMItuoSeUg2',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tikibad ontruimd wegens brand', 'title': 'Historisch scheepswrak slaat na 100 jaar los',
'description': 'md5:05ca046ff47b931f9b04855015e163a4', 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 33, 'duration': 55,
'timestamp': 1572805527,
'upload_date': '20191103',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -27,23 +31,30 @@ class TelegraafIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) article_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_id = self._download_json(
'https://www.telegraaf.nl/graphql', article_id, query={
'query': '''{
article(uid: %s) {
videos {
videoId
}
}
}''' % article_id,
})['data']['article']['videos'][0]['videoId']
player_url = self._html_search_regex( item = self._download_json(
r'<iframe[^>]+src="([^"]+")', webpage, 'player URL') 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
player_page = self._download_webpage( video_id)['items'][0]
player_url, video_id, note='Download player webpage') title = item['title']
playlist_url = self._search_regex(
r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
playlist_data = self._download_json(playlist_url, video_id)
item = playlist_data['items'][0]
formats = [] formats = []
locations = item['locations'] locations = item.get('locations') or {}
for location in locations.get('adaptive', []): for location in locations.get('adaptive', []):
manifest_url = location['src'] manifest_url = location.get('src')
if not manifest_url:
continue
ext = determine_ext(manifest_url) ext = determine_ext(manifest_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
@ -54,25 +65,25 @@ class TelegraafIE(InfoExtractor):
else: else:
self.report_warning('Unknown adaptive format %s' % ext) self.report_warning('Unknown adaptive format %s' % ext)
for location in locations.get('progressive', []): for location in locations.get('progressive', []):
src = try_get(location, lambda x: x['sources'][0]['src'])
if not src:
continue
label = location.get('label')
formats.append({ formats.append({
'url': location['sources'][0]['src'], 'url': src,
'width': location.get('width'), 'width': int_or_none(location.get('width')),
'height': location.get('height'), 'height': int_or_none(location.get('height')),
'format_id': 'http-%s' % location['label'], 'format_id': 'http' + ('-%s' % label if label else ''),
}) })
self._sort_formats(formats) self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
description = self._og_search_description(webpage)
duration = item.get('duration')
thumbnail = item.get('poster')
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': item.get('description'),
'formats': formats, 'formats': formats,
'duration': duration, 'duration': int_or_none(item.get('duration')),
'thumbnail': thumbnail, 'thumbnail': item.get('poster'),
'timestamp': parse_iso8601(item.get('datecreated'), ' '),
} }

View File

@ -344,9 +344,8 @@ class TwitchVodIE(TwitchItemBaseIE):
info['subtitles'] = { info['subtitles'] = {
'rechat': [{ 'rechat': [{
'url': update_url_query( 'url': update_url_query(
'https://rechat.twitch.tv/rechat-messages', { 'https://api.twitch.tv/v5/videos/%s/comments' % item_id, {
'video_id': 'v%s' % item_id, 'client_id': self._CLIENT_ID,
'start': info['timestamp'],
}), }),
'ext': 'json', 'ext': 'json',
}], }],
@ -644,7 +643,7 @@ class TwitchStreamIE(TwitchBaseIE):
class TwitchClipsIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips' IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
@ -667,6 +666,9 @@ class TwitchClipsIE(TwitchBaseIE):
}, { }, {
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -4,32 +4,67 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_HTTPError,
compat_parse_qs,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
determine_ext,
dict_get, dict_get,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
remove_end,
try_get, try_get,
strip_or_none,
unified_timestamp,
update_url_query,
xpath_text, xpath_text,
) )
from .periscope import PeriscopeIE from .periscope import (
PeriscopeBaseIE,
PeriscopeIE,
)
class TwitterBaseIE(InfoExtractor): class TwitterBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitter.com/1.1/'
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
_GUEST_TOKEN = None
def _extract_variant_formats(self, variant, video_id):
variant_url = variant.get('url')
if not variant_url:
return []
elif '.m3u8' in variant_url:
return self._extract_m3u8_formats(
variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
else:
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = {
'url': variant_url,
'format_id': 'http' + ('-%d' % tbr if tbr else ''),
'tbr': tbr,
}
self._search_dimensions_in_video_url(f, variant_url)
return [f]
def _extract_formats_from_vmap_url(self, vmap_url, video_id): def _extract_formats_from_vmap_url(self, vmap_url, video_id):
vmap_data = self._download_xml(vmap_url, video_id) vmap_data = self._download_xml(vmap_url, video_id)
video_url = xpath_text(vmap_data, './/MediaFile').strip() formats = []
if determine_ext(video_url) == 'm3u8': urls = []
return self._extract_m3u8_formats( for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
video_url, video_id, ext='mp4', m3u8_id='hls', video_variant.attrib['url'] = compat_urllib_parse_unquote(
entry_protocol='m3u8_native') video_variant.attrib['url'])
return [{ urls.append(video_variant.attrib['url'])
'url': video_url, formats.extend(self._extract_variant_formats(
}] video_variant.attrib, video_id))
video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
if video_url not in urls:
formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
return formats
@staticmethod @staticmethod
def _search_dimensions_in_video_url(a_format, video_url): def _search_dimensions_in_video_url(a_format, video_url):
@ -40,10 +75,30 @@ class TwitterBaseIE(InfoExtractor):
'height': int(m.group('height')), 'height': int(m.group('height')),
}) })
def _call_api(self, path, video_id, query={}):
headers = {
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
}
if not self._GUEST_TOKEN:
self._GUEST_TOKEN = self._download_json(
self._API_BASE + 'guest/activate.json', video_id,
'Downloading guest token', data=b'',
headers=headers)['guest_token']
headers['x-guest-token'] = self._GUEST_TOKEN
try:
return self._download_json(
self._API_BASE + path, video_id, headers=headers, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
raise ExtractorError(self._parse_json(
e.cause.read().decode(),
video_id)['errors'][0]['message'], expected=True)
raise
class TwitterCardIE(TwitterBaseIE):
class TwitterCardIE(InfoExtractor):
IE_NAME = 'twitter:card' IE_NAME = 'twitter:card'
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@ -51,19 +106,28 @@ class TwitterCardIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '560070183650213889', 'id': '560070183650213889',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Twitter web player', 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
'uploader': 'Twitter',
'uploader_id': 'Twitter',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 30.033, 'duration': 30.033,
'timestamp': 1422366112,
'upload_date': '20150127',
}, },
}, },
{ {
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
'md5': '7ee2a553b63d1bccba97fbed97d9e1c8', 'md5': '7137eca597f72b9abbe61e5ae0161399',
'info_dict': { 'info_dict': {
'id': '623160978427936768', 'id': '623160978427936768',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Twitter web player', 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
'thumbnail': r're:^https?://.*$', 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
'uploader': 'NASA',
'uploader_id': 'NASA',
'timestamp': 1437408129,
'upload_date': '20150720',
}, },
}, },
{ {
@ -75,7 +139,7 @@ class TwitterCardIE(TwitterBaseIE):
'title': 'Ubuntu 11.10 Overview', 'title': 'Ubuntu 11.10 Overview',
'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
'upload_date': '20111013', 'upload_date': '20111013',
'uploader': 'OMG! Ubuntu!', 'uploader': 'OMG! UBUNTU!',
'uploader_id': 'omgubuntu', 'uploader_id': 'omgubuntu',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
@ -99,190 +163,30 @@ class TwitterCardIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '705235433198714880', 'id': '705235433198714880',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Twitter web player', 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
'thumbnail': r're:^https?://.*', 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
'uploader': 'Brent Yarina',
'uploader_id': 'BTNBrentYarina',
'timestamp': 1456976204,
'upload_date': '20160303',
}, },
'skip': 'This content is no longer available.',
}, { }, {
'url': 'https://twitter.com/i/videos/752274308186120192', 'url': 'https://twitter.com/i/videos/752274308186120192',
'only_matching': True, 'only_matching': True,
}, },
] ]
_API_BASE = 'https://api.twitter.com/1.1'
def _parse_media_info(self, media_info, video_id):
formats = []
for media_variant in media_info.get('variants', []):
media_url = media_variant['url']
if media_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
elif media_url.endswith('.mpd'):
formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
else:
tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
a_format = {
'url': media_url,
'format_id': 'http-%d' % tbr if tbr else 'http',
'tbr': tbr,
}
# Reported bitRate may be zero
if not a_format['tbr']:
del a_format['tbr']
self._search_dimensions_in_video_url(a_format, media_url)
formats.append(a_format)
return formats
def _extract_mobile_formats(self, username, video_id):
webpage = self._download_webpage(
'https://mobile.twitter.com/%s/status/%s' % (username, video_id),
video_id, 'Downloading mobile webpage',
headers={
# A recent mobile UA is necessary for `gt` cookie
'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0',
})
main_script_url = self._html_search_regex(
r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL')
main_script = self._download_webpage(
main_script_url, video_id, 'Downloading main script')
bearer_token = self._search_regex(
r'BEARER_TOKEN\s*:\s*"([^"]+)"',
main_script, 'bearer token')
# https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
api_data = self._download_json(
'%s/statuses/show/%s.json' % (self._API_BASE, video_id),
video_id, 'Downloading API data',
headers={
'Authorization': 'Bearer ' + bearer_token,
})
media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {}
return self._parse_media_info(media_info, video_id)
def _real_extract(self, url): def _real_extract(self, url):
path, video_id = re.search(self._VALID_URL, url).groups() status_id = self._match_id(url)
return self.url_result(
config = None 'https://twitter.com/statuses/' + status_id,
formats = [] TwitterIE.ie_key(), status_id)
duration = None
urls = [url]
if path.startswith('cards/'):
urls.append('https://twitter.com/i/videos/' + video_id)
for u in urls:
webpage = self._download_webpage(
u, video_id, headers={'Referer': 'https://twitter.com/'})
iframe_url = self._html_search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
webpage, 'video iframe', default=None)
if iframe_url:
return self.url_result(iframe_url)
config = self._parse_json(self._html_search_regex(
r'data-(?:player-)?config="([^"]+)"', webpage,
'data player config', default='{}'),
video_id)
if config.get('source_type') == 'vine':
return self.url_result(config['player_url'], 'Vine')
periscope_url = PeriscopeIE._extract_url(webpage)
if periscope_url:
return self.url_result(periscope_url, PeriscopeIE.ie_key())
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
if video_url:
if determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
else:
f = {
'url': video_url,
}
self._search_dimensions_in_video_url(f, video_url)
formats.append(f)
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
if vmap_url:
formats.extend(
self._extract_formats_from_vmap_url(vmap_url, video_id))
media_info = None
for entity in config.get('status', {}).get('entities', []):
if 'mediaInfo' in entity:
media_info = entity['mediaInfo']
if media_info:
formats.extend(self._parse_media_info(media_info, video_id))
duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
username = config.get('user', {}).get('screen_name')
if username:
formats.extend(self._extract_mobile_formats(username, video_id))
if formats:
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
thumbnail = config.get('posterImageUrl') or config.get('image_src')
duration = float_or_none(config.get('duration'), scale=1000) or duration
break
if not formats:
headers = {
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
'Referer': url,
}
ct0 = self._get_cookies(url).get('ct0')
if ct0:
headers['csrf_token'] = ct0.value
guest_token = self._download_json(
'%s/guest/activate.json' % self._API_BASE, video_id,
'Downloading guest token', data=b'',
headers=headers)['guest_token']
headers['x-guest-token'] = guest_token
self._set_cookie('api.twitter.com', 'gt', guest_token)
config = self._download_json(
'%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
video_id, headers=headers)
track = config['track']
vmap_url = track.get('vmapUrl')
if vmap_url:
formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
else:
playback_url = track['playbackUrl']
if determine_ext(playback_url) == 'm3u8':
formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
else:
formats = [{
'url': playback_url,
}]
title = 'Twitter web player'
thumbnail = config.get('posterImage')
duration = float_or_none(track.get('durationMs'), scale=1000)
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
class TwitterIE(InfoExtractor): class TwitterIE(TwitterBaseIE):
IE_NAME = 'twitter' IE_NAME = 'twitter'
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
_TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
_TESTS = [{ _TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480', 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
@ -291,10 +195,13 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'uploader': 'FREE THE NIPPLE', 'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple', 'uploader_id': 'freethenipple',
'duration': 12.922, 'duration': 12.922,
'timestamp': 1442188653,
'upload_date': '20150913',
'age_limit': 18,
}, },
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
@ -316,19 +223,23 @@ class TwitterIE(InfoExtractor):
'id': '665052190608723968', 'id': '665052190608723968',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.', 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': 'Star Wars', 'uploader': 'Star Wars',
'timestamp': 1447395772,
'upload_date': '20151113',
}, },
}, { }, {
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880', 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
'info_dict': { 'info_dict': {
'id': '705235433198714880', 'id': '705235433198714880',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.', 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."', 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
'uploader_id': 'BTNBrentYarina', 'uploader_id': 'BTNBrentYarina',
'uploader': 'Brent Yarina', 'uploader': 'Brent Yarina',
'timestamp': 1456976204,
'upload_date': '20160303',
}, },
'params': { 'params': {
# The same video as https://twitter.com/i/videos/tweet/705235433198714880 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
@ -340,12 +251,14 @@ class TwitterIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '700207533655363584', 'id': '700207533655363584',
'ext': 'mp4', 'ext': 'mp4',
'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'Simon Vertugo - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'JG', 'uploader': 'Simon Vertugo',
'uploader_id': 'jaydingeer', 'uploader_id': 'simonvertugo',
'duration': 30.0, 'duration': 30.0,
'timestamp': 1455777459,
'upload_date': '20160218',
}, },
}, { }, {
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
@ -353,10 +266,9 @@ class TwitterIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'MIOxnrUteUd', 'id': 'MIOxnrUteUd',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Vince Mancini - Vine of the day', 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"', 'uploader': 'TAKUMA',
'uploader': 'Vince Mancini', 'uploader_id': '1004126642786242560',
'uploader_id': 'Filmdrunk',
'timestamp': 1402826626, 'timestamp': 1402826626,
'upload_date': '20140615', 'upload_date': '20140615',
}, },
@ -367,21 +279,22 @@ class TwitterIE(InfoExtractor):
'id': '719944021058060289', 'id': '719944021058060289',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
'uploader_id': 'captainamerica', 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America', 'uploader': 'Captain America',
'duration': 3.17, 'duration': 3.17,
'timestamp': 1460483005,
'upload_date': '20160412',
}, },
}, { }, {
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
'info_dict': { 'info_dict': {
'id': '1zqKVVlkqLaKB', 'id': '1zqKVVlkqLaKB',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence', 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"',
'upload_date': '20160923', 'upload_date': '20160923',
'uploader_id': 'OPP_HSD', 'uploader_id': '1PmKqpJdOJQoY',
'uploader': 'Sgt Kerry Schmidt', 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
'timestamp': 1474613214, 'timestamp': 1474613214,
}, },
'add_ie': ['Periscope'], 'add_ie': ['Periscope'],
@ -392,10 +305,12 @@ class TwitterIE(InfoExtractor):
'id': '852138619213144067', 'id': '852138619213144067',
'ext': 'mp4', 'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'uploader': 'عالم الأخبار', 'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm', 'uploader_id': 'news_al3alm',
'duration': 277.4, 'duration': 277.4,
'timestamp': 1492000653,
'upload_date': '20170412',
}, },
}, { }, {
'url': 'https://twitter.com/i/web/status/910031516746514432', 'url': 'https://twitter.com/i/web/status/910031516746514432',
@ -404,10 +319,12 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'uploader': 'Préfet de Guadeloupe', 'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971', 'uploader_id': 'Prefet971',
'duration': 47.48, 'duration': 47.48,
'timestamp': 1505803395,
'upload_date': '20170919',
}, },
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
@ -420,10 +337,12 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 're:.*?Shep is on a roll today.*?', 'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:63b036c228772523ae1924d5f8e5ed6b', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'uploader': 'Lis Power', 'uploader': 'Lis Power',
'uploader_id': 'LisPower1', 'uploader_id': 'LisPower1',
'duration': 111.278, 'duration': 111.278,
'timestamp': 1527623489,
'upload_date': '20180529',
}, },
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
@ -435,89 +354,164 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:66d493500c013e3e2d434195746a7f78', 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
'uploader': 'Twitter', 'uploader': 'Twitter',
'uploader_id': 'Twitter', 'uploader_id': 'Twitter',
'duration': 61.567, 'duration': 61.567,
'timestamp': 1548184644,
'upload_date': '20190122',
}, },
}, {
# not available in Periscope
'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
'info_dict': {
'id': '1vOGwqejwoWxB',
'ext': 'mp4',
'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
'uploader': 'Vivi',
'uploader_id': '1eVjYOLGkGrQL',
},
'add_ie': ['TwitterBroadcast'],
}, {
# Twitch Clip Embed
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) twid = self._match_id(url)
twid = mobj.group('id') status = self._call_api(
'statuses/show/%s.json' % twid, twid, {
webpage, urlh = self._download_webpage_handle( 'cards_platform': 'Web-12',
self._TEMPLATE_STATUSES_URL % twid, twid) 'include_cards': 1,
'include_reply_count': 1,
if 'twitter.com/account/suspended' in urlh.geturl(): 'include_user_entities': 0,
raise ExtractorError('Account suspended by Twitter.', expected=True) 'tweet_mode': 'extended',
})
user_id = None
redirect_mobj = re.match(self._VALID_URL, urlh.geturl())
if redirect_mobj:
user_id = redirect_mobj.group('user_id')
if not user_id:
user_id = mobj.group('user_id')
username = remove_end(self._og_search_title(webpage), ' on Twitter')
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
title = description = status['full_text'].replace('\n', ' ')
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
title = re.sub(r'\s+(https?://[^ ]+)', '', title) title = re.sub(r'\s+(https?://[^ ]+)', '', title)
user = status.get('user') or {}
uploader = user.get('name')
if uploader:
title = '%s - %s' % (uploader, title)
uploader_id = user.get('screen_name')
tags = []
for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
hashtag_text = hashtag.get('text')
if not hashtag_text:
continue
tags.append(hashtag_text)
info = { info = {
'uploader_id': user_id, 'id': twid,
'uploader': username, 'title': title,
'webpage_url': url, 'description': description,
'description': '%s on Twitter: "%s"' % (username, description), 'uploader': uploader,
'title': username + ' - ' + title, 'timestamp': unified_timestamp(status.get('created_at')),
'uploader_id': uploader_id,
'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None,
'like_count': int_or_none(status.get('favorite_count')),
'repost_count': int_or_none(status.get('retweet_count')),
'comment_count': int_or_none(status.get('reply_count')),
'age_limit': 18 if status.get('possibly_sensitive') else 0,
'tags': tags,
} }
mobj = re.search(r'''(?x) media = try_get(status, lambda x: x['extended_entities']['media'][0])
<video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s* if media and media.get('type') != 'photo':
<source[^>]+video-src="(?P<url>[^"]+)" video_info = media.get('video_info') or {}
''', webpage)
if mobj: formats = []
more_info = mobj.group('more_info') for variant in video_info.get('variants', []):
height = int_or_none(self._search_regex( formats.extend(self._extract_variant_formats(variant, twid))
r'data-height="(\d+)"', more_info, 'height', fatal=False)) self._sort_formats(formats)
width = int_or_none(self._search_regex(
r'data-width="(\d+)"', more_info, 'width', fatal=False)) thumbnails = []
thumbnail = self._search_regex( media_url = media.get('media_url_https') or media.get('media_url')
r'poster="([^"]+)"', more_info, 'poster', fatal=False) if media_url:
info.update({ def add_thumbnail(name, size):
'id': twid, thumbnails.append({
'url': mobj.group('url'), 'id': name,
'height': height, 'url': update_url_query(media_url, {'name': name}),
'width': width, 'width': int_or_none(size.get('w') or size.get('width')),
'thumbnail': thumbnail, 'height': int_or_none(size.get('h') or size.get('height')),
}) })
return info for name, size in media.get('sizes', {}).items():
add_thumbnail(name, size)
add_thumbnail('orig', media.get('original_info') or {})
twitter_card_url = None info.update({
if 'class="PlayableMedia' in webpage: 'formats': formats,
twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) 'thumbnails': thumbnails,
'duration': float_or_none(video_info.get('duration_millis'), 1000),
})
else: else:
twitter_card_iframe_url = self._search_regex( card = status.get('card')
r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1', if card:
webpage, 'Twitter card iframe URL', default=None, group='url') binding_values = card['binding_values']
if twitter_card_iframe_url:
twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) def get_binding_value(k):
o = binding_values.get(k) or {}
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
card_name = card['name'].split(':')[-1]
if card_name == 'amplify':
formats = self._extract_formats_from_vmap_url(
get_binding_value('amplify_url_vmap'),
get_binding_value('amplify_content_id') or twid)
self._sort_formats(formats)
thumbnails = []
for suffix in ('_small', '', '_large', '_x_large', '_original'):
image = get_binding_value('player_image' + suffix) or {}
image_url = image.get('url')
if not image_url or '/player-placeholder' in image_url:
continue
thumbnails.append({
'id': suffix[1:] if suffix else 'medium',
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
if twitter_card_url:
info.update({ info.update({
'_type': 'url_transparent', 'formats': formats,
'ie_key': 'TwitterCard', 'thumbnails': thumbnails,
'url': twitter_card_url, 'duration': int_or_none(get_binding_value(
'content_duration_seconds')),
})
elif card_name == 'player':
info.update({
'_type': 'url',
'url': get_binding_value('player_url'),
})
elif card_name == 'periscope_broadcast':
info.update({
'_type': 'url',
'url': get_binding_value('url') or get_binding_value('player_url'),
'ie_key': PeriscopeIE.ie_key(),
})
elif card_name == 'broadcast':
info.update({
'_type': 'url',
'url': get_binding_value('broadcast_url'),
'ie_key': TwitterBroadcastIE.ie_key(),
})
else:
raise ExtractorError('Unsupported Twitter Card.')
else:
expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
if not expanded_url:
raise ExtractorError("There's no video in this tweet.")
info.update({
'_type': 'url',
'url': expanded_url,
}) })
return info return info
raise ExtractorError('There\'s no video in this tweet.')
class TwitterAmplifyIE(TwitterBaseIE): class TwitterAmplifyIE(TwitterBaseIE):
IE_NAME = 'twitter:amplify' IE_NAME = 'twitter:amplify'
@ -573,3 +567,27 @@ class TwitterAmplifyIE(TwitterBaseIE):
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
} }
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
IE_NAME = 'twitter:broadcast'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
def _real_extract(self, url):
broadcast_id = self._match_id(url)
broadcast = self._call_api(
'broadcasts/show.json', broadcast_id,
{'ids': broadcast_id})['broadcasts'][broadcast_id]
info = self._parse_broadcast_data(broadcast, broadcast_id)
media_key = broadcast['media_key']
source = self._call_api(
'live_video_stream/status/' + media_key, media_key)['source']
m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
if '/live_video_stream/geoblocked/' in m3u8_url:
self.raise_geo_restricted()
m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
m3u8_url).query).get('type', [None])[0]
state, width, height = self._extract_common_format_info(broadcast)
info['formats'] = self._extract_pscp_m3u8_formats(
m3u8_url, broadcast_id, m3u8_id, state, width, height)
return info

View File

@ -634,14 +634,15 @@ class VKWallPostIE(VKBaseIE):
if not a.url: if not a.url:
continue continue
title = unescapeHTML(a.title) title = unescapeHTML(a.title)
performer = unescapeHTML(a.performer)
entries.append({ entries.append({
'id': '%s_%s' % (a.owner_id, a.id), 'id': '%s_%s' % (a.owner_id, a.id),
'url': self._unmask_url(a.url, a.ads['vk_id']), 'url': self._unmask_url(a.url, a.ads['vk_id']),
'title': '%s - %s' % (a.performer, title) if a.performer else title, 'title': '%s - %s' % (performer, title) if performer else title,
'thumbnail': a.cover_url.split(',') if a.cover_url else None, 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
'duration': a.duration, 'duration': int_or_none(a.duration),
'uploader': uploader, 'uploader': uploader,
'artist': a.performer, 'artist': performer,
'track': title, 'track': title,
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8', 'protocol': 'm3u8',

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.10.29' __version__ = '2019.11.05'