1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-12-26 16:27:54 +01:00

Merge pull request #10 from ytdl-org/master

update
This commit is contained in:
tsia 2019-12-02 09:37:07 +01:00 committed by GitHub
commit 4895e2f02d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 1203 additions and 1281 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've verified that I'm running youtube-dl version **2019.11.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.11.05 [debug] youtube-dl version 2019.11.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've verified that I'm running youtube-dl version **2019.11.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've verified that I'm running youtube-dl version **2019.11.28**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've verified that I'm running youtube-dl version **2019.11.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.11.05 [debug] youtube-dl version 2019.11.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.11.05** - [ ] I've verified that I'm running youtube-dl version **2019.11.28**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,3 +1,82 @@
version 2019.11.28
Core
+ [utils] Add generic caesar cipher and rot47
* [utils] Handle rd-suffixed day parts in unified_strdate (#23199)
Extractors
* [vimeo] Improve extraction
* Fix review extraction
* Fix ondemand extraction
* Make password protected player case as an expected error (#22896)
* Simplify channel based extractors code
- [openload] Remove extractor (#11999)
- [verystream] Remove extractor
- [streamango] Remove extractor (#15406)
* [dailymotion] Improve extraction
* Extract http formats included in m3u8 manifest
* Fix user extraction (#3553, #21415)
+ Add suport for User Authentication (#11491)
* Fix password protected videos extraction (#23176)
* Respect age limit option and family filter cookie value (#18437)
* Handle video url playlist query param
* Report allowed countries for geo-restricted videos
* [corus] Improve extraction
+ Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com
and disneylachaine.ca (#20861)
+ Add support for self hosted videos (#22075)
* Detect DRM protection (#14910, #9164)
* [vivo] Fix extraction (#22328, #22279)
+ [bitchute] Extract upload date (#22990, #23193)
* [soundcloud] Update client id (#23214)
version 2019.11.22
Core
+ [extractor/common] Clean jwplayer description HTML tags
+ [extractor/common] Add data, headers and query to all major extract formats
methods
Extractors
* [chaturbate] Fix extraction (#23010, #23012)
+ [ntvru] Add support for non relative file URLs (#23140)
* [vk] Fix wall audio thumbnails extraction (#23135)
* [ivi] Fix format extraction (#21991)
- [comcarcoff] Remove extractor
+ [drtv] Add support for new URL schema (#23059)
+ [nexx] Add support for Multi Player JS Setup (#23052)
+ [teamcoco] Add support for new videos (#23054)
* [soundcloud] Check if the soundtrack has downloads left (#23045)
* [facebook] Fix posts video data extraction (#22473)
- [addanime] Remove extractor
- [minhateca] Remove extractor
- [daisuki] Remove extractor
* [seeker] Fix extraction
- [revision3] Remove extractors
* [twitch] Fix video comments URL (#18593, #15828)
* [twitter] Improve extraction
+ Add support for generic embeds (#22168)
* Always extract http formats for native videos (#14934)
+ Add support for Twitter Broadcasts (#21369)
+ Extract more metadata
* Improve VMap format extraction
* Unify extraction code for both twitter statuses and cards
+ [twitch] Add support for Clip embed URLs
* [lnkgo] Fix extraction (#16834)
* [mixcloud] Improve extraction
* Improve metadata extraction (#11721)
* Fix playlist extraction (#22378)
* Fix user mixes extraction (#15197, #17865)
+ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384)
* [onionstudios] Fix extraction
+ [hotstar] Pass Referer header to format requests (#22836)
* [dplay] Minimize response size
+ [patreon] Extract uploader_id and filesize
* [patreon] Minimize response size
* [roosterteeth] Fix login request (#16094, #22689)
version 2019.11.05 version 2019.11.05
Extractors Extractors

View File

@ -26,7 +26,6 @@
- **AcademicEarth:Course** - **AcademicEarth:Course**
- **acast** - **acast**
- **acast:channel** - **acast:channel**
- **AddAnime**
- **ADN**: Anime Digital Network - **ADN**: Anime Digital Network
- **AdobeConnect** - **AdobeConnect**
- **AdobeTV** - **AdobeTV**
@ -175,7 +174,6 @@
- **CNN** - **CNN**
- **CNNArticle** - **CNNArticle**
- **CNNBlogs** - **CNNBlogs**
- **ComCarCoff**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralFullEpisodes** - **ComedyCentralFullEpisodes**
- **ComedyCentralShortname** - **ComedyCentralShortname**
@ -203,8 +201,6 @@
- **dailymotion** - **dailymotion**
- **dailymotion:playlist** - **dailymotion:playlist**
- **dailymotion:user** - **dailymotion:user**
- **DaisukiMotto**
- **DaisukiMottoPlaylist**
- **daum.net** - **daum.net**
- **daum.net:clip** - **daum.net:clip**
- **daum.net:playlist** - **daum.net:playlist**
@ -404,6 +400,7 @@
- **Ketnet** - **Ketnet**
- **KhanAcademy** - **KhanAcademy**
- **KickStarter** - **KickStarter**
- **KinjaEmbed**
- **KinoPoisk** - **KinoPoisk**
- **KonserthusetPlay** - **KonserthusetPlay**
- **kontrtube**: KontrTube.ru - Труба зовёт - **kontrtube**: KontrTube.ru - Труба зовёт
@ -485,14 +482,12 @@
- **Mgoon** - **Mgoon**
- **MGTV**: 芒果TV - **MGTV**: 芒果TV
- **MiaoPai** - **MiaoPai**
- **Minhateca**
- **MinistryGrid** - **MinistryGrid**
- **Minoto** - **Minoto**
- **miomio.tv** - **miomio.tv**
- **MiTele**: mitele.es - **MiTele**: mitele.es
- **mixcloud** - **mixcloud**
- **mixcloud:playlist** - **mixcloud:playlist**
- **mixcloud:stream**
- **mixcloud:user** - **mixcloud:user**
- **Mixer:live** - **Mixer:live**
- **Mixer:vod** - **Mixer:vod**
@ -623,7 +618,6 @@
- **OnionStudios** - **OnionStudios**
- **Ooyala** - **Ooyala**
- **OoyalaExternal** - **OoyalaExternal**
- **Openload**
- **OraTV** - **OraTV**
- **orf:fm4**: radio FM4 - **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories - **orf:fm4:story**: fm4.orf.at stories
@ -723,8 +717,6 @@
- **Restudy** - **Restudy**
- **Reuters** - **Reuters**
- **ReverbNation** - **ReverbNation**
- **revision**
- **revision3:embed**
- **RICE** - **RICE**
- **RMCDecouverte** - **RMCDecouverte**
- **RockstarGames** - **RockstarGames**
@ -832,7 +824,6 @@
- **Steam** - **Steam**
- **Stitcher** - **Stitcher**
- **Streamable** - **Streamable**
- **Streamango**
- **streamcloud.eu** - **streamcloud.eu**
- **StreamCZ** - **StreamCZ**
- **StreetVoice** - **StreetVoice**
@ -958,6 +949,7 @@
- **twitch:vod** - **twitch:vod**
- **twitter** - **twitter**
- **twitter:amplify** - **twitter:amplify**
- **twitter:broadcast**
- **twitter:card** - **twitter:card**
- **udemy** - **udemy**
- **udemy:course** - **udemy:course**
@ -982,7 +974,6 @@
- **Vbox7** - **Vbox7**
- **VeeHD** - **VeeHD**
- **Veoh** - **Veoh**
- **verystream**
- **Vesti**: Вести.Ru - **Vesti**: Вести.Ru
- **Vevo** - **Vevo**
- **VevoPlaylist** - **VevoPlaylist**

View File

@ -19,6 +19,7 @@ from youtube_dl.utils import (
age_restricted, age_restricted,
args_to_str, args_to_str,
encode_base_n, encode_base_n,
caesar,
clean_html, clean_html,
date_from_str, date_from_str,
DateRange, DateRange,
@ -69,6 +70,7 @@ from youtube_dl.utils import (
remove_start, remove_start,
remove_end, remove_end,
remove_quotes, remove_quotes,
rot47,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_to_int, str_to_int,
@ -340,6 +342,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
def test_unified_timestamps(self): def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@ -495,6 +499,7 @@ class TestUtil(unittest.TestCase):
def test_str_to_int(self): def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int('123.456'), 123456)
self.assertEqual(str_to_int(523), 523)
def test_url_basename(self): def test_url_basename(self):
self.assertEqual(url_basename('http://foo.de/'), '') self.assertEqual(url_basename('http://foo.de/'), '')
@ -1367,6 +1372,20 @@ Line 1
self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
def test_caesar(self):
self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
def test_rot47(self):
self.assertEqual(rot47('youtube-dl'), r'J@FEF36\5=')
self.assertEqual(rot47('YOUTUBE-DL'), r'*~&%&qt\s{')
def test_urshift(self): def test_urshift(self):
self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646) self.assertEqual(urshift(-3, 1), 2147483646)

View File

@ -4,29 +4,30 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
dict_get,
int_or_none, int_or_none,
parse_iso8601, try_get,
) )
class ABCOTVSIE(InfoExtractor): class ABCOTVSIE(InfoExtractor):
IE_NAME = 'abcotvs' IE_NAME = 'abcotvs'
IE_DESC = 'ABC Owned Television Stations' IE_DESC = 'ABC Owned Television Stations'
_VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)' _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
'info_dict': { 'info_dict': {
'id': '472581', 'id': '472548',
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4', 'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers', 'title': 'East Bay museum celebrates synthesized music',
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3', 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075, 'timestamp': 1421118520,
'upload_date': '20150113', 'upload_date': '20150113',
'uploader': 'Jonathan Bloom',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -37,39 +38,63 @@ class ABCOTVSIE(InfoExtractor):
'url': 'http://abc7news.com/472581', 'url': 'http://abc7news.com/472581',
'only_matching': True, 'only_matching': True,
}, },
{
'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
'only_matching': True,
},
] ]
_SITE_MAP = {
'6abc': 'wpvi',
'abc11': 'wtvd',
'abc13': 'ktrk',
'abc30': 'kfsn',
'abc7': 'kabc',
'abc7chicago': 'wls',
'abc7news': 'kgo',
'abc7ny': 'wabc',
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) site, display_id, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id') display_id = display_id or video_id
display_id = mobj.group('display_id') or video_id station = self._SITE_MAP[site]
webpage = self._download_webpage(url, display_id) data = self._download_json(
'https://api.abcotvs.com/v2/content', display_id, query={
'id': video_id,
'key': 'otv.web.%s.story' % station,
'station': station,
})['data']
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
title = video.get('title') or video['linkText']
m3u8 = self._html_search_meta( formats = []
'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0] m3u8_url = video.get('m3u8')
if m3u8_url:
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') formats = self._extract_m3u8_formats(
video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
mp4_url = video.get('mp4')
if mp4_url:
formats.append({
'abr': 128,
'format_id': 'https',
'height': 360,
'url': mp4_url,
'width': 640,
})
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title(webpage).strip() image = video.get('image') or {}
description = self._og_search_description(webpage).strip()
thumbnail = self._og_search_thumbnail(webpage)
timestamp = parse_iso8601(self._search_regex(
r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
webpage, 'upload date', fatal=False))
uploader = self._search_regex(
r'rel="author">([^<]+)</a>',
webpage, 'uploader', default=None)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
'thumbnail': thumbnail, 'thumbnail': dict_get(image, ('source', 'dynamicSource')),
'timestamp': timestamp, 'timestamp': int_or_none(video.get('date')),
'uploader': uploader, 'duration': int_or_none(video.get('length')),
'formats': formats, 'formats': formats,
} }

View File

@ -1,25 +1,119 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
int_or_none,
float_or_none, float_or_none,
int_or_none,
ISO639Utils, ISO639Utils,
determine_ext, OnDemandPagedList,
parse_duration,
str_or_none,
str_to_int,
unified_strdate,
) )
class AdobeTVBaseIE(InfoExtractor): class AdobeTVBaseIE(InfoExtractor):
_API_BASE_URL = 'http://tv.adobe.com/api/v4/' def _call_api(self, path, video_id, query, note=None):
return self._download_json(
'http://tv.adobe.com/api/v4/' + path,
video_id, note, query=query)['data']
def _parse_subtitles(self, video_data, url_key):
subtitles = {}
for translation in video_data.get('translations', []):
vtt_path = translation.get(url_key)
if not vtt_path:
continue
lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
subtitles.setdefault(lang, []).append({
'ext': 'vtt',
'url': vtt_path,
})
return subtitles
def _parse_video_data(self, video_data):
video_id = compat_str(video_data['id'])
title = video_data['title']
s3_extracted = False
formats = []
for source in video_data.get('videos', []):
source_url = source.get('url')
if not source_url:
continue
f = {
'format_id': source.get('quality_level'),
'fps': int_or_none(source.get('frame_rate')),
'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('video_data_rate')),
'width': int_or_none(source.get('width')),
'url': source_url,
}
original_filename = source.get('original_filename')
if original_filename:
if not (f.get('height') and f.get('width')):
mobj = re.search(r'_(\d+)x(\d+)', original_filename)
if mobj:
f.update({
'height': int(mobj.group(2)),
'width': int(mobj.group(1)),
})
if original_filename.startswith('s3://') and not s3_extracted:
formats.append({
'format_id': 'original',
'preference': 1,
'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
})
s3_extracted = True
formats.append(f)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'upload_date': unified_strdate(video_data.get('start_date')),
'duration': parse_duration(video_data.get('duration')),
'view_count': str_to_int(video_data.get('playcount')),
'formats': formats,
'subtitles': self._parse_subtitles(video_data, 'vtt'),
}
class AdobeTVEmbedIE(AdobeTVBaseIE):
IE_NAME = 'adobetv:embed'
_VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
_TEST = {
'url': 'https://tv.adobe.com/embed/22/4153',
'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
'info_dict': {
'id': '4153',
'ext': 'flv',
'title': 'Creating Graphics Optimized for BlackBerry',
'description': 'md5:eac6e8dced38bdaae51cd94447927459',
'thumbnail': r're:https?://.*\.jpg$',
'upload_date': '20091109',
'duration': 377,
'view_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api(
'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
return self._parse_video_data(video_data)
class AdobeTVIE(AdobeTVBaseIE): class AdobeTVIE(AdobeTVBaseIE):
IE_NAME = 'adobetv'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
_TEST = { _TEST = {
@ -42,45 +136,33 @@ class AdobeTVIE(AdobeTVBaseIE):
if not language: if not language:
language = 'en' language = 'en'
video_data = self._download_json( video_data = self._call_api(
self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname), 'episode/get', urlname, {
urlname)['data'][0] 'disclosure': 'standard',
'language': language,
formats = [{ 'show_urlname': show_urlname,
'url': source['url'], 'urlname': urlname,
'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None, })[0]
'width': int_or_none(source.get('width')), return self._parse_video_data(video_data)
'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('video_data_rate')),
} for source in video_data['videos']]
self._sort_formats(formats)
return {
'id': compat_str(video_data['id']),
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'upload_date': unified_strdate(video_data.get('start_date')),
'duration': parse_duration(video_data.get('duration')),
'view_count': str_to_int(video_data.get('playcount')),
'formats': formats,
}
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
def _parse_page_data(self, page_data): _PAGE_SIZE = 25
return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
def _extract_playlist_entries(self, url, display_id): def _fetch_page(self, display_id, query, page):
page = self._download_json(url, display_id) page += 1
entries = self._parse_page_data(page['data']) query['page'] = page
for page_num in range(2, page['paging']['pages'] + 1): for element_data in self._call_api(
entries.extend(self._parse_page_data( self._RESOURCE, display_id, query, 'Download Page %d' % page):
self._download_json(url + '&page=%d' % page_num, display_id)['data'])) yield self._process_data(element_data)
return entries
def _extract_playlist_entries(self, display_id, query):
return OnDemandPagedList(functools.partial(
self._fetch_page, display_id, query), self._PAGE_SIZE)
class AdobeTVShowIE(AdobeTVPlaylistBaseIE): class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
IE_NAME = 'adobetv:show'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
_TEST = { _TEST = {
@ -92,26 +174,31 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
}, },
'playlist_mincount': 136, 'playlist_mincount': 136,
} }
_RESOURCE = 'episode'
def _get_element_url(self, element_data): _process_data = AdobeTVBaseIE._parse_video_data
return element_data['urls'][0]
def _real_extract(self, url): def _real_extract(self, url):
language, show_urlname = re.match(self._VALID_URL, url).groups() language, show_urlname = re.match(self._VALID_URL, url).groups()
if not language: if not language:
language = 'en' language = 'en'
query = 'language=%s&show_urlname=%s' % (language, show_urlname) query = {
'disclosure': 'standard',
'language': language,
'show_urlname': show_urlname,
}
show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0] show_data = self._call_api(
'show/get', show_urlname, query)[0]
return self.playlist_result( return self.playlist_result(
self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname), self._extract_playlist_entries(show_urlname, query),
compat_str(show_data['id']), str_or_none(show_data.get('id')),
show_data['show_name'], show_data.get('show_name'),
show_data['show_description']) show_data.get('show_description'))
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
IE_NAME = 'adobetv:channel'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
_TEST = { _TEST = {
@ -121,24 +208,30 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
}, },
'playlist_mincount': 96, 'playlist_mincount': 96,
} }
_RESOURCE = 'show'
def _get_element_url(self, element_data): def _process_data(self, show_data):
return element_data['url'] return self.url_result(
show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
def _real_extract(self, url): def _real_extract(self, url):
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
if not language: if not language:
language = 'en' language = 'en'
query = 'language=%s&channel_urlname=%s' % (language, channel_urlname) query = {
'channel_urlname': channel_urlname,
'language': language,
}
if category_urlname: if category_urlname:
query += '&category_urlname=%s' % category_urlname query['category_urlname'] = category_urlname
return self.playlist_result( return self.playlist_result(
self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname), self._extract_playlist_entries(channel_urlname, query),
channel_urlname) channel_urlname)
class AdobeTVVideoIE(InfoExtractor): class AdobeTVVideoIE(AdobeTVBaseIE):
IE_NAME = 'adobetv:video'
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
_TEST = { _TEST = {
@ -160,38 +253,36 @@ class AdobeTVVideoIE(InfoExtractor):
video_data = self._parse_json(self._search_regex( video_data = self._parse_json(self._search_regex(
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
title = video_data['title']
formats = [{ formats = []
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), sources = video_data.get('sources') or []
'url': source['src'], for source in sources:
'width': int_or_none(source.get('width')), source_src = source.get('src')
'height': int_or_none(source.get('height')), if not source_src:
'tbr': int_or_none(source.get('bitrate')), continue
} for source in video_data['sources']] formats.append({
'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
'height': int_or_none(source.get('height') or None),
'tbr': int_or_none(source.get('bitrate') or None),
'width': int_or_none(source.get('width') or None),
'url': source_src,
})
self._sort_formats(formats) self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among # For both metadata and downloaded files the duration varies among
# formats. I just pick the max one # formats. I just pick the max one
duration = max(filter(None, [ duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000) float_or_none(source.get('duration'), scale=1000)
for source in video_data['sources']])) for source in sources]))
subtitles = {}
for translation in video_data.get('translations', []):
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
if lang_id not in subtitles:
subtitles[lang_id] = []
subtitles[lang_id].append({
'url': translation['vttPath'],
'ext': 'vtt',
})
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': video_data['title'], 'title': title,
'description': video_data.get('description'), 'description': video_data.get('description'),
'thumbnail': video_data['video'].get('poster'), 'thumbnail': video_data.get('video', {}).get('poster'),
'duration': duration, 'duration': duration,
'subtitles': subtitles, 'subtitles': self._parse_subtitles(video_data, 'vttPath'),
} }

View File

@ -7,6 +7,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
orderedSet, orderedSet,
unified_strdate,
urlencode_postdata, urlencode_postdata,
) )
@ -23,6 +24,7 @@ class BitChuteIE(InfoExtractor):
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Victoria X Rave', 'uploader': 'Victoria X Rave',
'upload_date': '20170813',
}, },
}, { }, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
@ -74,12 +76,17 @@ class BitChuteIE(InfoExtractor):
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._search_regex(
r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
webpage, 'upload date', fatal=False))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,
'upload_date': upload_date,
'formats': formats, 'formats': formats,
} }

View File

@ -32,7 +32,7 @@ class Channel9IE(InfoExtractor):
'upload_date': '20130828', 'upload_date': '20130828',
'session_code': 'KOS002', 'session_code': 'KOS002',
'session_room': 'Arena 1A', 'session_room': 'Arena 1A',
'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'], 'session_speakers': 'count:5',
}, },
}, { }, {
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
@ -64,15 +64,15 @@ class Channel9IE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
'info_dict': {
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
'title': 'Channel 9',
},
'playlist_mincount': 100,
}, { }, {
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
'info_dict': {
'id': 'Events/DEVintersection/DEVintersection-2016',
'title': 'DEVintersection 2016 Orlando Sessions',
},
'playlist_mincount': 14,
}, {
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
@ -112,11 +112,11 @@ class Channel9IE(InfoExtractor):
episode_data), content_path) episode_data), content_path)
content_id = episode_data['contentId'] content_id = episode_data['contentId']
is_session = '/Sessions(' in episode_data['api'] is_session = '/Sessions(' in episode_data['api']
content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
if is_session: if is_session:
content_url += '?$expand=Speakers' content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
else: else:
content_url += '?$expand=Authors' content_url += 'Authors,Body&$expand=Authors'
content_data = self._download_json(content_url, content_id) content_data = self._download_json(content_url, content_id)
title = content_data['Title'] title = content_data['Title']
@ -210,7 +210,7 @@ class Channel9IE(InfoExtractor):
'id': content_id, 'id': content_id,
'title': title, 'title': title,
'description': clean_html(content_data.get('Description') or content_data.get('Body')), 'description': clean_html(content_data.get('Description') or content_data.get('Body')),
'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'), 'thumbnail': content_data.get('VideoPlayerPreviewImage'),
'duration': int_or_none(content_data.get('MediaLengthInSeconds')), 'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
'timestamp': parse_iso8601(content_data.get('PublishedDate')), 'timestamp': parse_iso8601(content_data.get('PublishedDate')),
'avg_rating': int_or_none(content_data.get('Rating')), 'avg_rating': int_or_none(content_data.get('Rating')),

View File

@ -3,7 +3,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
lowercase_escape,
url_or_none,
)
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
@ -38,12 +42,31 @@ class ChaturbateIE(InfoExtractor):
'https://chaturbate.com/%s/' % video_id, video_id, 'https://chaturbate.com/%s/' % video_id, video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers())
m3u8_urls = [] found_m3u8_urls = []
data = self._parse_json(
self._search_regex(
r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'data', default='{}', group='value'),
video_id, transform_source=lowercase_escape, fatal=False)
if data:
m3u8_url = url_or_none(data.get('hls_source'))
if m3u8_url:
found_m3u8_urls.append(m3u8_url)
if not found_m3u8_urls:
for m in re.finditer(
r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
found_m3u8_urls.append(lowercase_escape(m.group('url')))
if not found_m3u8_urls:
for m in re.finditer( for m in re.finditer(
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage): r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group( found_m3u8_urls.append(m.group('url'))
'url').replace('_fast', '')
m3u8_urls = []
for found_m3u8_url in found_m3u8_urls:
m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
if m3u8_url not in m3u8_urls: if m3u8_url not in m3u8_urls:
m3u8_urls.append(m3u8_url) m3u8_urls.append(m3u8_url)
@ -63,7 +86,12 @@ class ChaturbateIE(InfoExtractor):
formats = [] formats = []
for m3u8_url in m3u8_urls: for m3u8_url in m3u8_urls:
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow' for known_id in ('fast', 'slow'):
if '_%s' % known_id in m3u8_url:
m3u8_id = known_id
break
else:
m3u8_id = None
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_url, video_id, ext='mp4',
# ffmpeg skips segments for fast m3u8 # ffmpeg skips segments for fast m3u8

View File

@ -1766,6 +1766,19 @@ class InfoExtractor(object):
# the same GROUP-ID # the same GROUP-ID
f['acodec'] = 'none' f['acodec'] = 'none'
formats.append(f) formats.append(f)
# for DailyMotion
progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
if progressive_uri:
http_f = f.copy()
del http_f['manifest_url']
http_f.update({
'format_id': f['format_id'].replace('hls-', 'http-'),
'protocol': 'http',
'url': progressive_uri,
})
formats.append(http_f)
last_stream_inf = {} last_stream_inf = {}
return formats return formats

View File

@ -4,7 +4,12 @@ from __future__ import unicode_literals
import re import re
from .theplatform import ThePlatformFeedIE from .theplatform import ThePlatformFeedIE
from ..utils import int_or_none from ..utils import (
dict_get,
ExtractorError,
float_or_none,
int_or_none,
)
class CorusIE(ThePlatformFeedIE): class CorusIE(ThePlatformFeedIE):
@ -12,24 +17,49 @@ class CorusIE(ThePlatformFeedIE):
https?:// https?://
(?:www\.)? (?:www\.)?
(?P<domain> (?P<domain>
(?:globaltv|etcanada)\.com| (?:
(?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca globaltv|
etcanada|
seriesplus|
wnetwork|
ytv
)\.com|
(?:
hgtv|
foodnetwork|
slice|
history|
showcase|
bigbrothercanada|
abcspark|
disney(?:channel|lachaine)
)\.ca
)
/(?:[^/]+/)*
(?:
video\.html\?.*?\bv=|
videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
)
(?P<id>
[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
(?:[A-Z]{4})?\d{12,20}
) )
/(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
(?P<id>\d+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/', 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
'info_dict': { 'info_dict': {
'id': '870923331648', 'id': '870923331648',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Movie Night Popcorn with Bryan', 'title': 'Movie Night Popcorn with Bryan',
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.', 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
'uploader': 'SHWM-NEW',
'upload_date': '20170206', 'upload_date': '20170206',
'timestamp': 1486392197, 'timestamp': 1486392197,
}, },
'params': {
'format': 'bestvideo',
'skip_download': True,
},
'expected_warnings': ['Failed to parse JSON'],
}, { }, {
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753', 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
'only_matching': True, 'only_matching': True,
@ -48,58 +78,83 @@ class CorusIE(ThePlatformFeedIE):
}, { }, {
'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/', 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
'only_matching': True 'only_matching': True
}, {
'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
'only_matching': True
}, {
'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
'only_matching': True
}] }]
_GEO_BYPASS = False
_TP_FEEDS = { _SITE_MAP = {
'globaltv': { 'globaltv': 'series',
'feed_id': 'ChQqrem0lNUp', 'etcanada': 'series',
'account_id': 2269680845, 'foodnetwork': 'food',
}, 'bigbrothercanada': 'series',
'etcanada': { 'disneychannel': 'disneyen',
'feed_id': 'ChQqrem0lNUp', 'disneylachaine': 'disneyfr',
'account_id': 2269680845,
},
'hgtv': {
'feed_id': 'L0BMHXi2no43',
'account_id': 2414428465,
},
'foodnetwork': {
'feed_id': 'ukK8o58zbRmJ',
'account_id': 2414429569,
},
'slice': {
'feed_id': '5tUJLgV2YNJ5',
'account_id': 2414427935,
},
'history': {
'feed_id': 'tQFx_TyyEq4J',
'account_id': 2369613659,
},
'showcase': {
'feed_id': '9H6qyshBZU3E',
'account_id': 2414426607,
},
'bigbrothercanada': {
'feed_id': 'ChQqrem0lNUp',
'account_id': 2269680845,
},
} }
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id = re.match(self._VALID_URL, url).groups() domain, video_id = re.match(self._VALID_URL, url).groups()
feed_info = self._TP_FEEDS[domain.split('.')[0]] site = domain.split('.')[0]
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: { path = self._SITE_MAP.get(site, site)
'episode_number': int_or_none(e.get('pl1$episode')), if path != 'series':
'season_number': int_or_none(e.get('pl1$season')), path = 'migration/' + path
'series': e.get('pl1$show'), video = self._download_json(
}, { 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
'HLS': { video_id, query={'byId': video_id},
'manifest': 'm3u', headers={'Accept': 'application/json'})[0]
}, title = video['title']
'DesktopHLS Default': {
'manifest': 'm3u', formats = []
}, for source in video.get('sources', []):
'MP4 MBR': { smil_url = source.get('file')
'manifest': 'm3u', if not smil_url:
}, continue
}, feed_info['account_id']) source_type = source.get('type')
note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
resp = self._download_webpage(
smil_url, video_id, note, fatal=False,
headers=self.geo_verification_headers())
if not resp:
continue
error = self._parse_json(resp, video_id, fatal=False)
if error:
if error.get('exception') == 'GeoLocationBlocked':
self.raise_geo_restricted(countries=['CA'])
raise ExtractorError(error['description'])
smil = self._parse_xml(resp, video_id, fatal=False)
if smil is None:
continue
namespace = self._parse_smil_namespace(smil)
formats.extend(self._parse_smil_formats(
smil, smil_url, video_id, namespace))
if not formats and video.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats)
subtitles = {}
for track in video.get('tracks', []):
track_url = track.get('file')
if not track_url:
continue
lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
subtitles.setdefault(lang, []).append({'url': track_url})
metadata = video.get('metadata') or {}
get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
'description': video.get('description'),
'timestamp': int_or_none(video.get('availableDate'), 1000),
'subtitles': subtitles,
'duration': float_or_none(metadata.get('duration')),
'series': dict_get(video, ('show', 'pl1$show')),
'season_number': get_number('season'),
'episode_number': get_number('episode'),
}

View File

@ -1,50 +1,93 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import functools import functools
import hashlib
import itertools
import json import json
import random
import re import re
import string
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_struct_pack from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
determine_ext, age_restricted,
error_to_compat_str, clean_html,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
mimetype2ext,
OnDemandPagedList, OnDemandPagedList,
parse_iso8601,
sanitized_Request,
str_to_int,
try_get, try_get,
unescapeHTML, unescapeHTML,
update_url_query,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionBaseInfoExtractor(InfoExtractor):
_FAMILY_FILTER = None
_HEADERS = {
'Content-Type': 'application/json',
'Origin': 'https://www.dailymotion.com',
}
_NETRC_MACHINE = 'dailymotion'
def _get_dailymotion_cookies(self):
return self._get_cookies('https://www.dailymotion.com/')
@staticmethod @staticmethod
def _build_request(url): def _get_cookie_value(cookies, name):
"""Build a request with the family filter disabled""" cookie = cookies.get('name')
request = sanitized_Request(url) if cookie:
request.add_header('Cookie', 'family_filter=off; ff=off') return cookie.value
return request
def _download_webpage_handle_no_ff(self, url, *args, **kwargs): def _set_dailymotion_cookie(self, name, value):
request = self._build_request(url) self._set_cookie('www.dailymotion.com', name, value)
return self._download_webpage_handle(request, *args, **kwargs)
def _download_webpage_no_ff(self, url, *args, **kwargs): def _real_initialize(self):
request = self._build_request(url) cookies = self._get_dailymotion_cookies()
return self._download_webpage(request, *args, **kwargs) ff = self._get_cookie_value(cookies, 'ff')
self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
if not self._HEADERS.get('Authorization'):
cookies = self._get_dailymotion_cookies()
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
if not token:
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
}
username, password = self._get_login_info()
if username:
data.update({
'grant_type': 'password',
'password': password,
'username': username,
})
else:
data['grant_type'] = 'client_credentials'
try:
token = self._download_json(
'https://graphql.api.dailymotion.com/oauth/token',
None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
raise ExtractorError(self._parse_json(
e.cause.read().decode(), xid)['error_description'], expected=True)
raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
self._HEADERS['Authorization'] = 'Bearer ' + token
resp = self._download_json(
'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
'query': '''{
%s(xid: "%s"%s) {
%s
}
}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
}).encode(), headers=self._HEADERS)
obj = resp['data'][object_type]
if not obj:
raise ExtractorError(resp['errors'][0]['message'], expected=True)
return obj
class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor):
@ -54,18 +97,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
(?:www\.)?lequipe\.fr/video (?:www\.)?lequipe\.fr/video
) )
/(?P<id>[^/?_]+) /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
''' '''
IE_NAME = 'dailymotion' IE_NAME = 'dailymotion'
_FORMATS = [
('stream_h264_ld_url', 'ld'),
('stream_h264_url', 'standard'),
('stream_h264_hq_url', 'hq'),
('stream_h264_hd_url', 'hd'),
('stream_h264_hd1080_url', 'hd180'),
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
'md5': '074b95bdee76b9e3654137aee9c79dfe', 'md5': '074b95bdee76b9e3654137aee9c79dfe',
@ -74,7 +108,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Office Christmas Party Review Jason Bateman, Olivia Munn, T.J. Miller', 'title': 'Office Christmas Party Review Jason Bateman, Olivia Munn, T.J. Miller',
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
'duration': 187, 'duration': 187,
'timestamp': 1493651285, 'timestamp': 1493651285,
'upload_date': '20170501', 'upload_date': '20170501',
@ -146,7 +179,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, { }, {
'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
'only_matching': True,
}] }]
_GEO_BYPASS = False
_COMMON_MEDIA_FIELDS = '''description
geoblockedCountries {
allowed
}
xid'''
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -162,264 +204,140 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
return urls return urls
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, playlist_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage_no_ff( if playlist_id:
'https://www.dailymotion.com/video/%s' % video_id, video_id) if not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
return self.url_result(
'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', playlist_id)
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
age_limit = self._rta_search(webpage)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'description', webpage, 'description')
view_count_str = self._search_regex(
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
webpage, 'view count', default=None)
if view_count_str:
view_count_str = re.sub(r'\s', '', view_count_str)
view_count = str_to_int(view_count_str)
comment_count = int_or_none(self._search_regex(
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
webpage, 'comment count', default=None))
player_v5 = self._search_regex(
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
r'buildPlayer\(({.+?})\);',
r'var\s+config\s*=\s*({.+?});',
# New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580)
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
player = self._parse_json(player_v5, video_id, fatal=False) or {}
metadata = try_get(player, lambda x: x['metadata'], dict)
if not metadata:
metadata_url = url_or_none(try_get(
player, lambda x: x['context']['metadata_template_url1']))
if metadata_url:
metadata_url = metadata_url.replace(':videoId', video_id)
else:
metadata_url = update_url_query(
'https://www.dailymotion.com/player/metadata/video/%s'
% video_id, {
'embedder': url,
'integration': 'inline',
'GK_PV5_NEON': '1',
})
metadata = self._download_json(
metadata_url, video_id, 'Downloading metadata JSON')
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
password = self._downloader.params.get('videopassword') password = self._downloader.params.get('videopassword')
if password: media = self._call_api(
r = int(metadata['id'][1:], 36) 'media', video_id, '''... on Video {
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=') %s
t = ''.join(random.choice(string.ascii_letters) for i in range(10)) stats {
n = us64e(compat_struct_pack('I', r)) likes {
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest()) total
}
views {
total
}
}
}
... on Live {
%s
audienceCount
isOnAir
}''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
xid = media['xid']
metadata = self._download_json( metadata = self._download_json(
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id) 'https://www.dailymotion.com/player/metadata/video/' + xid,
xid, 'Downloading metadata JSON',
query={'app': 'com.dailymotion.neon'})
self._check_error(metadata) error = metadata.get('error')
formats = []
for quality, media_list in metadata['qualities'].items():
for media in media_list:
media_url = media.get('url')
if not media_url:
continue
type_ = media.get('type')
if type_ == 'application/vnd.lumberjack.manifest':
continue
ext = mimetype2ext(type_) or determine_ext(media_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', preference=-1,
m3u8_id='hls', fatal=False)
for f in m3u8_formats:
f['url'] = f['url'].split('#')[0]
formats.append(f)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
else:
f = {
'url': media_url,
'format_id': 'http-%s' % quality,
'ext': ext,
}
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
if m:
f.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
formats.append(f)
self._sort_formats(formats)
title = metadata['title']
duration = int_or_none(metadata.get('duration'))
timestamp = int_or_none(metadata.get('created_time'))
thumbnail = metadata.get('poster_url')
uploader = metadata.get('owner', {}).get('screenname')
uploader_id = metadata.get('owner', {}).get('id')
subtitles = {}
subtitles_data = metadata.get('subtitles', {}).get('data', {})
if subtitles_data and isinstance(subtitles_data, dict):
for subtitle_lang, subtitle in subtitles_data.items():
subtitles[subtitle_lang] = [{
'ext': determine_ext(subtitle_url),
'url': subtitle_url,
} for subtitle_url in subtitle.get('urls', [])]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'age_limit': age_limit,
'view_count': view_count,
'comment_count': comment_count,
'formats': formats,
'subtitles': subtitles,
}
# vevo embed
vevo_id = self._search_regex(
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
webpage, 'vevo embed', default=None)
if vevo_id:
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
# fallback old player
embed_page = self._download_webpage_no_ff(
'https://www.dailymotion.com/embed/video/%s' % video_id,
video_id, 'Downloading embed page')
timestamp = parse_iso8601(self._html_search_meta(
'video:release_date', webpage, 'upload date'))
info = self._parse_json(
self._search_regex(
r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE),
video_id)
self._check_error(info)
formats = []
for (key, format_id) in self._FORMATS:
video_url = info.get(key)
if video_url is not None:
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if m_size is not None:
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
else:
width, height = None, None
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
self._sort_formats(formats)
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
'title')
return {
'id': video_id,
'formats': formats,
'uploader': info['owner.screenname'],
'timestamp': timestamp,
'title': title,
'description': description,
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
'view_count': view_count,
'duration': info['duration']
}
def _check_error(self, info):
error = info.get('error')
if error: if error:
title = error.get('title') or error['message'] title = error.get('title') or error['raw_message']
# See https://developer.dailymotion.com/api#access-error # See https://developer.dailymotion.com/api#access-error
if error.get('code') == 'DM007': if error.get('code') == 'DM007':
self.raise_geo_restricted(msg=title) allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
self.raise_geo_restricted(msg=title, countries=allowed_countries)
raise ExtractorError( raise ExtractorError(
'%s said: %s' % (self.IE_NAME, title), expected=True) '%s said: %s' % (self.IE_NAME, title), expected=True)
def _get_subtitles(self, video_id, webpage): title = metadata['title']
try: is_live = media.get('isOnAir')
sub_list = self._download_webpage( formats = []
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, for quality, media_list in metadata['qualities'].items():
video_id, note=False) for m in media_list:
except ExtractorError as err: media_url = m.get('url')
self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err)) media_type = m.get('type')
return {} if not media_url or media_type == 'application/vnd.lumberjack.manifest':
info = json.loads(sub_list) continue
if (info['total'] > 0): if media_type == 'application/x-mpegURL':
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) formats.extend(self._extract_m3u8_formats(
return sub_lang_list media_url, video_id, 'mp4',
self._downloader.report_warning('video doesn\'t have subtitles') 'm3u8' if is_live else 'm3u8_native',
return {} m3u8_id='hls', fatal=False))
else:
f = {
'url': media_url,
'format_id': 'http-' + quality,
}
m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
if m:
width, height, fps = map(int_or_none, m.groups())
f.update({
'fps': fps,
'height': height,
'width': width,
})
formats.append(f)
for f in formats:
f['url'] = f['url'].split('#')[0]
if not f.get('fps') and f['format_id'].endswith('@60'):
f['fps'] = 60
self._sort_formats(formats)
subtitles = {}
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
for subtitle_lang, subtitle in subtitles_data.items():
subtitles[subtitle_lang] = [{
'url': subtitle_url,
} for subtitle_url in subtitle.get('urls', [])]
thumbnails = []
for height, poster_url in metadata.get('posters', {}).items():
thumbnails.append({
'height': int_or_none(height),
'id': height,
'url': poster_url,
})
owner = metadata.get('owner') or {}
stats = media.get('stats') or {}
get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'description': clean_html(media.get('description')),
'thumbnails': thumbnails,
'duration': int_or_none(metadata.get('duration')) or None,
'timestamp': int_or_none(metadata.get('created_time')),
'uploader': owner.get('screenname'),
'uploader_id': owner.get('id') or metadata.get('screenname'),
'age_limit': 18 if metadata.get('explicit') else 0,
'tags': metadata.get('tags'),
'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
'like_count': get_count('like'),
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
}
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
'id': 'xv4bw',
},
'playlist_mincount': 20,
}]
_PAGE_SIZE = 100 _PAGE_SIZE = 100
def _fetch_page(self, playlist_id, authorizaion, page): def _fetch_page(self, playlist_id, page):
page += 1 page += 1
videos = self._download_json( videos = self._call_api(
'https://graphql.api.dailymotion.com', self._OBJECT_TYPE, playlist_id,
playlist_id, 'Downloading page %d' % page, '''videos(allowExplicit: %s, first: %d, page: %d) {
data=json.dumps({
'query': '''{
collection(xid: "%s") {
videos(first: %d, page: %d) {
pageInfo {
hasNextPage
nextPage
}
edges { edges {
node { node {
xid xid
url url
} }
} }
} }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
} 'Downloading page %d' % page)['videos']
}''' % (playlist_id, self._PAGE_SIZE, page)
}).encode(), headers={
'Authorization': authorizaion,
'Origin': 'https://www.dailymotion.com',
})['data']['collection']['videos']
for edge in videos['edges']: for edge in videos['edges']:
node = edge['node'] node = edge['node']
yield self.url_result( yield self.url_result(
@ -427,86 +345,49 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
api = self._parse_json(self._search_regex(
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
webpage, 'player config'), playlist_id)['context']['api']
auth = self._download_json(
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
playlist_id, data=urlencode_postdata({
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
'grant_type': 'client_credentials',
}))
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
entries = OnDemandPagedList(functools.partial( entries = OnDemandPagedList(functools.partial(
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE) self._fetch_page, playlist_id), self._PAGE_SIZE)
return self.playlist_result( return self.playlist_result(
entries, playlist_id, entries, playlist_id)
self._og_search_title(webpage))
class DailymotionUserIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'id': 'xv4bw',
},
'playlist_mincount': 20,
}]
_OBJECT_TYPE = 'collection'
class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user' IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)' _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{ _TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv', 'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': { 'info_dict': {
'id': 'nqtv', 'id': 'nqtv',
'title': 'Rémi Gaillard',
}, },
'playlist_mincount': 100, 'playlist_mincount': 152,
}, { }, {
'url': 'http://www.dailymotion.com/user/UnderProject', 'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': { 'info_dict': {
'id': 'UnderProject', 'id': 'UnderProject',
'title': 'UnderProject',
}, },
'playlist_mincount': 1800, 'playlist_mincount': 1000,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time', 'skip': 'Takes too long time',
}, {
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
},
'playlist_mincount': 148,
'params': {
'age_limit': 0,
},
}] }]
_OBJECT_TYPE = 'channel'
def _extract_entries(self, id):
video_ids = set()
processed_urls = set()
for pagenum in itertools.count(1):
page_url = self._PAGE_TEMPLATE % (id, pagenum)
webpage, urlh = self._download_webpage_handle_no_ff(
page_url, id, 'Downloading page %s' % pagenum)
if urlh.geturl() in processed_urls:
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
page_url, urlh.geturl()), id)
break
processed_urls.add(urlh.geturl())
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
if video_id not in video_ids:
yield self.url_result(
'http://www.dailymotion.com/video/%s' % video_id,
DailymotionIE.ie_key(), video_id)
video_ids.add(video_id)
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
webpage = self._download_webpage(
'https://www.dailymotion.com/user/%s' % user, user)
full_user = unescapeHTML(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, 'user'))
return {
'_type': 'playlist',
'id': user,
'title': full_user,
'entries': self._extract_entries(user),
}

View File

@ -21,6 +21,7 @@ from .acast import (
from .adn import ADNIE from .adn import ADNIE
from .adobeconnect import AdobeConnectIE from .adobeconnect import AdobeConnectIE
from .adobetv import ( from .adobetv import (
AdobeTVEmbedIE,
AdobeTVIE, AdobeTVIE,
AdobeTVShowIE, AdobeTVShowIE,
AdobeTVChannelIE, AdobeTVChannelIE,
@ -800,10 +801,6 @@ from .ooyala import (
OoyalaIE, OoyalaIE,
OoyalaExternalIE, OoyalaExternalIE,
) )
from .openload import (
OpenloadIE,
VerystreamIE,
)
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
@ -1064,7 +1061,6 @@ from .srmediathek import SRMediathekIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE from .steam import SteamIE
from .streamable import StreamableIE from .streamable import StreamableIE
from .streamango import StreamangoIE
from .streamcloud import StreamcloudIE from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE from .streetvoice import StreetVoiceIE
@ -1176,6 +1172,7 @@ from .turbo import TurboIE
from .tv2 import ( from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
KatsomoIE,
) )
from .tv2dk import TV2DKIE from .tv2dk import TV2DKIE
from .tv2hu import TV2HuIE from .tv2hu import TV2HuIE

View File

@ -88,10 +88,6 @@ from .piksel import PikselIE
from .videa import VideaIE from .videa import VideaIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .openload import (
OpenloadIE,
VerystreamIE,
)
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE from .limelight import LimelightBaseIE
@ -3048,18 +3044,6 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
# Look for Openload embeds
openload_urls = OpenloadIE._extract_urls(webpage)
if openload_urls:
return self.playlist_from_matches(
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
# Look for Verystream embeds
verystream_urls = VerystreamIE._extract_urls(webpage)
if verystream_urls:
return self.playlist_from_matches(
verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
# Look for VideoPress embeds # Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage) videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls: if videopress_urls:

View File

@ -1,8 +1,9 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
import re
import sys
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -91,31 +92,42 @@ class IviIE(InfoExtractor):
'contentid': video_id 'contentid': video_id
} }
] ]
}).encode() })
bundled = hasattr(sys, 'frozen')
for site in (353, 183):
content_data = (data % site).encode()
if site == 353:
if bundled:
continue
try: try:
from Crypto.Cipher import Blowfish from Cryptodome.Cipher import Blowfish
from Crypto.Hash import CMAC from Cryptodome.Hash import CMAC
pycryptodomex_found = True
except ImportError:
pycryptodomex_found = False
continue
timestamp = self._download_json( timestamp = (self._download_json(
self._LIGHT_URL, video_id, self._LIGHT_URL, video_id,
'Downloading timestamp JSON', data=json.dumps({ 'Downloading timestamp JSON', data=json.dumps({
'method': 'da.timestamp.get', 'method': 'da.timestamp.get',
'params': [] 'params': []
}).encode())['result'] }).encode(), fatal=False) or {}).get('result')
if not timestamp:
continue
data = data % 353
query = { query = {
'ts': timestamp, 'ts': timestamp,
'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + data, Blowfish).hexdigest(), 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(),
} }
except ImportError: else:
data = data % 183
query = {} query = {}
video_json = self._download_json( video_json = self._download_json(
self._LIGHT_URL, video_id, self._LIGHT_URL, video_id,
'Downloading video JSON', data=data, query=query) 'Downloading video JSON', data=content_data, query=query)
error = video_json.get('error') error = video_json.get('error')
if error: if error:
@ -126,13 +138,21 @@ class IviIE(InfoExtractor):
self.raise_geo_restricted(message, self._GEO_COUNTRIES) self.raise_geo_restricted(message, self._GEO_COUNTRIES)
elif origin == 'NoRedisValidData': elif origin == 'NoRedisValidData':
extractor_msg = 'Video %s does not exist' extractor_msg = 'Video %s does not exist'
elif message: elif site == 353:
if 'недоступен для просмотра на площадке s183' in message: continue
elif bundled:
raise ExtractorError( raise ExtractorError(
'pycryptodome not found. Please install it.', 'This feature does not work from bundled exe. Run youtube-dl from sources.',
expected=True) expected=True)
elif not pycryptodomex_found:
raise ExtractorError(
'pycryptodomex not found. Please install it.',
expected=True)
elif message:
extractor_msg += ': ' + message extractor_msg += ': ' + message
raise ExtractorError(extractor_msg % video_id, expected=True) raise ExtractorError(extractor_msg % video_id, expected=True)
else:
break
result = video_json['result'] result = video_json['result']
title = result['title'] title = result['title']

View File

@ -14,20 +14,27 @@ from ..utils import (
class MSNIE(InfoExtractor): class MSNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)' _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE', 'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
'md5': '8442f66c116cbab1ff7098f986983458', 'md5': '087548191d273c5c55d05028f8d2cbcd',
'info_dict': { 'info_dict': {
'id': 'BBqQYNE', 'id': 'BBPxU6d',
'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message', 'display_id': '7-ways-to-get-rid-of-chest-congestion',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message', 'title': 'Seven ways to get rid of chest congestion',
'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25', 'description': '7 Ways to Get Rid of Chest Congestion',
'duration': 104, 'duration': 88,
'uploader': 'CBS Entertainment', 'uploader': 'Health',
'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v', 'uploader_id': 'BBPrMqa',
}, },
}, {
# Article, multiple Dailymotion Embeds
'url': 'https://www.msn.com/en-in/money/sports/hottest-football-wags-greatest-footballers-turned-managers-and-more/ar-BBpc7Nl',
'info_dict': {
'id': 'BBpc7Nl',
},
'playlist_mincount': 4,
}, { }, {
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf', 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
'only_matching': True, 'only_matching': True,
@ -43,44 +50,58 @@ class MSNIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
# Vidible(AOL) Embed # Vidible(AOL) Embed
'url': 'https://www.msn.com/en-us/video/animals/yellowstone-park-staffers-catch-deer-engaged-in-behavior-they-cant-explain/vi-AAGfdg1', 'url': 'https://www.msn.com/en-us/money/other/jupiter-is-about-to-come-so-close-you-can-see-its-moons-with-binoculars/vi-AACqsHR',
'only_matching': True, 'only_matching': True,
}, { }, {
# Dailymotion Embed # Dailymotion Embed
'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L', 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
'only_matching': True, 'only_matching': True,
}, {
# YouTube Embed
'url': 'https://www.msn.com/en-in/money/news/meet-vikram-%E2%80%94-chandrayaan-2s-lander/vi-AAGUr0v',
'only_matching': True,
}, {
# NBCSports Embed
'url': 'https://www.msn.com/en-us/money/football_nfl/week-13-preview-redskins-vs-panthers/vi-BBXsCDb',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id, page_id = re.match(self._VALID_URL, url).groups()
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video = self._parse_json( entries = []
self._search_regex( for _, metadata in re.findall(r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', webpage):
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', video = self._parse_json(unescapeHTML(metadata), display_id)
webpage, 'video data', default='{}', group='data'),
display_id, transform_source=unescapeHTML)
if not video:
error = unescapeHTML(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
player_name = video.get('playerName')
if player_name:
provider_id = video.get('providerId') provider_id = video.get('providerId')
if provider_id: player_name = video.get('playerName')
if player_name and provider_id:
entry = None
if player_name == 'AOL': if player_name == 'AOL':
return self.url_result( if provider_id.startswith('http'):
provider_id = self._search_regex(
r'https?://delivery\.vidible\.tv/video/redirect/([0-9a-f]{24})',
provider_id, 'vidible id')
entry = self.url_result(
'aol-video:' + provider_id, 'Aol', provider_id) 'aol-video:' + provider_id, 'Aol', provider_id)
elif player_name == 'Dailymotion': elif player_name == 'Dailymotion':
return self.url_result( entry = self.url_result(
'https://www.dailymotion.com/video/' + provider_id, 'https://www.dailymotion.com/video/' + provider_id,
'Dailymotion', provider_id) 'Dailymotion', provider_id)
elif player_name == 'YouTube':
entry = self.url_result(
provider_id, 'Youtube', provider_id)
elif player_name == 'NBCSports':
entry = self.url_result(
'http://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/' + provider_id,
'NBCSportsVPlayer', provider_id)
if entry:
entries.append(entry)
continue
video_id = video['uuid']
title = video['title'] title = video['title']
formats = [] formats = []
@ -88,23 +109,30 @@ class MSNIE(InfoExtractor):
format_url = file_.get('url') format_url = file_.get('url')
if not format_url: if not format_url:
continue continue
if 'm3u8' in format_url: if 'format=m3u8-aapl' in format_url:
# m3u8_native should not be used here until # m3u8_native should not be used here until
# https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
m3u8_formats = self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, display_id, 'mp4', format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False))
formats.extend(m3u8_formats) elif 'format=mpd-time-csf' in format_url:
elif determine_ext(format_url) == 'ism': formats.extend(self._extract_mpd_formats(
format_url, display_id, 'dash', fatal=False))
elif '.ism' in format_url:
if format_url.endswith('.ism'):
format_url += '/manifest'
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
format_url + '/Manifest', display_id, 'mss', fatal=False)) format_url, display_id, 'mss', fatal=False))
else: else:
format_id = file_.get('formatCode')
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'ext': 'mp4', 'ext': 'mp4',
'format_id': 'http', 'format_id': format_id,
'width': int_or_none(file_.get('width')), 'width': int_or_none(file_.get('width')),
'height': int_or_none(file_.get('height')), 'height': int_or_none(file_.get('height')),
'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
'preference': 1 if format_id == '1001' else None,
}) })
self._sort_formats(formats) self._sort_formats(formats)
@ -120,7 +148,7 @@ class MSNIE(InfoExtractor):
'url': format_url, 'url': format_url,
}) })
return { entries.append({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -132,4 +160,12 @@ class MSNIE(InfoExtractor):
'creator': video.get('creator'), 'creator': video.get('creator'),
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} })
if not entries:
error = unescapeHTML(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
return self.playlist_result(entries, page_id)

View File

@ -5,13 +5,12 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from ..utils import unescapeHTML
class NintendoIE(InfoExtractor): class NintendoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:games/detail|nintendo-direct)/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', 'url': 'https://www.nintendo.com/games/detail/duck-hunt-wii-u/',
'info_dict': { 'info_dict': {
'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW', 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
'ext': 'flv', 'ext': 'flv',
@ -28,7 +27,19 @@ class NintendoIE(InfoExtractor):
'id': 'tokyo-mirage-sessions-fe-wii-u', 'id': 'tokyo-mirage-sessions-fe-wii-u',
'title': 'Tokyo Mirage Sessions ♯FE', 'title': 'Tokyo Mirage Sessions ♯FE',
}, },
'playlist_count': 3, 'playlist_count': 4,
}, {
'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
'info_dict': {
'id': 'J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V',
'ext': 'mp4',
'title': 'Switch_ROS_ND0904-H264.mov',
'duration': 2324.758,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -39,8 +50,11 @@ class NintendoIE(InfoExtractor):
entries = [ entries = [
OoyalaIE._build_url_result(m.group('code')) OoyalaIE._build_url_result(m.group('code'))
for m in re.finditer( for m in re.finditer(
r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2', r'data-(?:video-id|directVideoId)=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage)]
webpage)]
title = self._html_search_regex(
r'(?s)<(?:span|div)[^>]+class="(?:title|wrapper)"[^>]*>.*?<h1>(.+?)</h1>',
webpage, 'title', fatal=False)
return self.playlist_result( return self.playlist_result(
entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) entries, page_id, title)

View File

@ -23,8 +23,8 @@ class NRLTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
q_data = self._parse_json(self._search_regex( q_data = self._parse_json(self._html_search_regex(
r"(?s)q-data='({.+?})'", webpage, 'player data'), display_id) r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
ooyala_id = q_data['videoId'] ooyala_id = q_data['videoId']
return self.url_result( return self.url_result(
'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title')) 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))

View File

@ -1,12 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode, compat_b64decode,
compat_str, compat_str,
compat_urllib_parse_urlencode,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -21,9 +21,9 @@ from ..utils import (
class OoyalaBaseIE(InfoExtractor): class OoyalaBaseIE(InfoExtractor):
_PLAYER_BASE = 'http://player.ooyala.com/' _PLAYER_BASE = 'http://player.ooyala.com/'
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s'
def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None): def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree'] content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]] metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code'] embed_code = metadata['embed_code']
@ -31,19 +31,22 @@ class OoyalaBaseIE(InfoExtractor):
title = metadata['title'] title = metadata['title']
auth_data = self._download_json( auth_data = self._download_json(
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code),
+ compat_urllib_parse_urlencode({ video_id, headers=self.geo_verification_headers(), query={
'domain': domain, 'domain': domain or 'player.ooyala.com',
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
'embedToken': embed_token, 'embedToken': embed_token,
}), video_id, headers=self.geo_verification_headers()) })['authorization_data'][embed_code]
cur_auth_data = auth_data['authorization_data'][embed_code]
urls = [] urls = []
formats = [] formats = []
if cur_auth_data['authorized']: streams = auth_data.get('streams') or [{
for stream in cur_auth_data['streams']: 'delivery_type': 'hls',
'url': {
'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(),
}
}]
for stream in streams:
url_data = try_get(stream, lambda x: x['url']['data'], compat_str) url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
if not url_data: if not url_data:
continue continue
@ -81,9 +84,9 @@ class OoyalaBaseIE(InfoExtractor):
'vbr': int_or_none(stream.get('video_bitrate')), 'vbr': int_or_none(stream.get('video_bitrate')),
'fps': float_or_none(stream.get('framerate')), 'fps': float_or_none(stream.get('framerate')),
}) })
else: if not formats and not auth_data.get('authorized'):
raise ExtractorError('%s said: %s' % ( raise ExtractorError('%s said: %s' % (
self.IE_NAME, cur_auth_data['message']), expected=True) self.IE_NAME, auth_data['message']), expected=True)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}

View File

@ -3,21 +3,17 @@ from __future__ import unicode_literals
import json import json
import os import os
import re
import subprocess import subprocess
import tempfile import tempfile
from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urlparse, compat_urlparse,
compat_kwargs, compat_kwargs,
) )
from ..utils import ( from ..utils import (
check_executable, check_executable,
determine_ext,
encodeArgument, encodeArgument,
ExtractorError, ExtractorError,
get_element_by_id,
get_exe_version, get_exe_version,
is_outdated_version, is_outdated_version,
std_headers, std_headers,
@ -240,262 +236,3 @@ class PhantomJSwrapper(object):
self._load_cookies() self._load_cookies()
return (html, encodeArgument(out)) return (html, encodeArgument(out))
class OpenloadIE(InfoExtractor):
_DOMAINS = r'''
(?:
openload\.(?:co|io|link|pw)|
oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)|
oladblock\.(?:services|xyz|me)|openloed\.co
)
'''
_VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
%s
)/
(?:f|embed)/
(?P<id>[a-zA-Z0-9-_]+)
''' % _DOMAINS
_EMBED_WORD = 'embed'
_STREAM_WORD = 'f'
_REDIR_WORD = 'stream'
_URL_IDS = ('streamurl', 'streamuri', 'streamurj')
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
'md5': 'bf1c059b004ebc7a256f89408e65c36e',
'info_dict': {
'id': 'kUEfGclsU9o',
'ext': 'mp4',
'title': 'skyrim_no-audio_1080.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'https://openload.co/embed/rjC09fkPLYs',
'info_dict': {
'id': 'rjC09fkPLYs',
'ext': 'mp4',
'title': 'movie.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': {
'en': [{
'ext': 'vtt',
}],
},
},
'params': {
'skip_download': True, # test subtitles only
},
}, {
'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
'only_matching': True,
}, {
'url': 'https://openload.io/f/ZAn6oz-VZGE/',
'only_matching': True,
}, {
'url': 'https://openload.co/f/_-ztPaZtMhM/',
'only_matching': True,
}, {
# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
# for title and ext
'url': 'https://openload.co/embed/Sxz5sADo82g/',
'only_matching': True,
}, {
# unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
# via https://openload.co/f/e-Ixz9ZR5L0/
'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
'only_matching': True,
}, {
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
'only_matching': True,
}, {
'url': 'http://www.openload.link/f/KnG-kKZdcfY',
'only_matching': True,
}, {
'url': 'https://oload.stream/f/KnG-kKZdcfY',
'only_matching': True,
}, {
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
'only_matching': True,
}, {
'url': 'https://oload.win/f/kUEfGclsU9o',
'only_matching': True,
}, {
'url': 'https://oload.download/f/kUEfGclsU9o',
'only_matching': True,
}, {
'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
'only_matching': True,
}, {
# Its title has not got its extension but url has it
'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
'only_matching': True,
}, {
'url': 'https://oload.cc/embed/5NEAbI2BDSk',
'only_matching': True,
}, {
'url': 'https://oload.icu/f/-_i4y_F_Hs8',
'only_matching': True,
}, {
'url': 'https://oload.fun/f/gb6G1H4sHXY',
'only_matching': True,
}, {
'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
'only_matching': True,
}, {
'url': 'https://oload.info/f/5NEAbI2BDSk',
'only_matching': True,
}, {
'url': 'https://openload.pw/f/WyKgK8s94N0',
'only_matching': True,
}, {
'url': 'https://oload.pw/f/WyKgK8s94N0',
'only_matching': True,
}, {
'url': 'https://oload.live/f/-Z58UZ-GR4M',
'only_matching': True,
}, {
'url': 'https://oload.space/f/IY4eZSst3u8/',
'only_matching': True,
}, {
'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True,
}, {
'url': 'https://oload.online/f/W8o2UfN1vNY/',
'only_matching': True,
}, {
'url': 'https://oload.monster/f/W8o2UfN1vNY/',
'only_matching': True,
}, {
'url': 'https://oload.press/embed/drTBl1aOTvk/',
'only_matching': True,
}, {
'url': 'https://oload.website/embed/drTBl1aOTvk/',
'only_matching': True,
}, {
'url': 'https://oload.life/embed/oOzZjNPw9Dc/',
'only_matching': True,
}, {
'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
'only_matching': True,
}, {
'url': 'https://oload.best/embed/kkz9JgVZeWc/',
'only_matching': True,
}, {
'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://openloed.co/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://oload.vip/f/kUEfGclsU9o',
'only_matching': True,
}]
@classmethod
def _extract_urls(cls, webpage):
return re.findall(
r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
% (cls._DOMAINS, cls._EMBED_WORD), webpage)
def _extract_decrypted_page(self, page_url, webpage, video_id):
phantom = PhantomJSwrapper(self, required_version='2.0')
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id)
return webpage
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
video_id = mobj.group('id')
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
for path in (self._EMBED_WORD, self._STREAM_WORD):
page_url = url_pattern % path
last = path == self._STREAM_WORD
webpage = self._download_webpage(
page_url, video_id, 'Downloading %s webpage' % path,
fatal=last)
if not webpage:
continue
if 'File not found' in webpage or 'deleted by the owner' in webpage:
if not last:
continue
raise ExtractorError('File not found', expected=True, video_id=video_id)
break
webpage = self._extract_decrypted_page(page_url, webpage, video_id)
for element_id in self._URL_IDS:
decoded_id = get_element_by_id(element_id, webpage)
if decoded_id:
break
if not decoded_id:
decoded_id = self._search_regex(
(r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
'stream URL')
video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
'title', default=None) or self._html_search_meta(
'description', webpage, 'title', fatal=True)
entries = self._parse_html5_media_entries(page_url, webpage, video_id)
entry = entries[0] if entries else {}
subtitles = entry.get('subtitles')
return {
'id': video_id,
'title': title,
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
'url': video_url,
'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
'subtitles': subtitles,
}
class VerystreamIE(OpenloadIE):
IE_NAME = 'verystream'
_DOMAINS = r'(?:verystream\.com|woof\.tube)'
_VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
%s
)/
(?:stream|e)/
(?P<id>[a-zA-Z0-9-_]+)
''' % _DOMAINS
_EMBED_WORD = 'e'
_STREAM_WORD = 'stream'
_REDIR_WORD = 'gettoken'
_URL_IDS = ('videolink', )
_TESTS = [{
'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
'md5': 'd3e8c5628ccb9970b65fd65269886795',
'info_dict': {
'id': 'c1GWQ9ngBBx',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
'only_matching': True,
}]
def _extract_decrypted_page(self, page_url, webpage, video_id):
return webpage # for Verystream, the webpage is already decrypted

View File

@ -1,13 +1,18 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote_plus,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json,
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
parse_filesize, parse_filesize,
rot47,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -112,16 +117,22 @@ class VivoIE(SharedBaseIE):
webpage, 'filesize', fatal=False)) webpage, 'filesize', fatal=False))
def _extract_video_url(self, webpage, video_id, url): def _extract_video_url(self, webpage, video_id, url):
def decode_url(encoded_url): def decode_url_old(encoded_url):
return compat_b64decode(encoded_url).decode('utf-8') return compat_b64decode(encoded_url).decode('utf-8')
stream_url = url_or_none(decode_url(self._search_regex( stream_url = self._search_regex(
r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'stream url', default=None, group='url'))) 'stream url', default=None, group='url')
if stream_url:
stream_url = url_or_none(decode_url_old(stream_url))
if stream_url: if stream_url:
return stream_url return stream_url
return self._parse_json(
def decode_url(encoded_url):
return rot47(compat_urllib_parse_unquote_plus(encoded_url))
return decode_url(self._parse_json(
self._search_regex( self._search_regex(
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage,
webpage, 'stream', group='url'), 'stream'),
video_id, transform_source=decode_url)[0] video_id, transform_source=js_to_json)['source'])

View File

@ -245,7 +245,7 @@ class SoundcloudIE(InfoExtractor):
_API_BASE = 'https://api.soundcloud.com/' _API_BASE = 'https://api.soundcloud.com/'
_API_V2_BASE = 'https://api-v2.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/' _BASE_URL = 'https://soundcloud.com/'
_CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' _CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
_ARTWORK_MAP = { _ARTWORK_MAP = {

View File

@ -1,128 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
js_to_json,
)
class StreamangoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
'md5': 'e992787515a182f55e38fc97588d802a',
'info_dict': {
'id': 'clapasobsptpkdfe',
'ext': 'mp4',
'title': '20170315_150006.mp4',
}
}, {
# no og:title
'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
'info_dict': {
'id': 'foqebrpftarclpob',
'ext': 'mp4',
'title': 'foqebrpftarclpob',
},
'params': {
'skip_download': True,
},
'skip': 'gone',
}, {
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
'only_matching': True,
}, {
'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
'only_matching': True,
}, {
'url': 'https://streamcherry.com/f/clapasobsptpkdfe/',
'only_matching': True,
}]
def _real_extract(self, url):
def decrypt_src(encoded, val):
ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
decoded = ''
sm = [None] * 4
i = 0
str_len = len(encoded)
while i < str_len:
for j in range(4):
sm[j % 4] = ALPHABET.index(encoded[i])
i += 1
char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
decoded += compat_chr(char_code)
if sm[2] != 0x40:
char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
decoded += compat_chr(char_code)
if sm[3] != 0x40:
char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
decoded += compat_chr(char_code)
return decoded
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage, default=video_id)
formats = []
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
if mobj is None:
continue
format_ = format_.replace(mobj.group(0), '')
video = self._parse_json(
format_, video_id, transform_source=js_to_json,
fatal=False) or {}
mobj = re.search(
r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
mobj.group(1))
if mobj is None:
continue
src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
if not src:
continue
ext = determine_ext(src, default_ext=None)
if video.get('type') == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': src,
'ext': ext or 'mp4',
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'tbr': int_or_none(video.get('bitrate')),
})
if not formats:
error = self._search_regex(
r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
'error', default=None)
if not error and '>Sorry' in webpage:
error = 'Video %s is not available' % video_id
if error:
raise ExtractorError(error, expected=True)
self._sort_formats(formats)
return {
'id': video_id,
'url': url,
'title': title,
'formats': formats,
}

View File

@ -1,35 +1,33 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE
class TeachingChannelIE(InfoExtractor): class TeachingChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)' _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P<id>[^/?&#]+)'
_TEST = { _TEST = {
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
'md5': '3d6361864d7cac20b57c8784da17166f',
'info_dict': { 'info_dict': {
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'id': '3swwlzkT',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A History of Teaming', 'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
'duration': 422.255, 'duration': 422,
'upload_date': '20170316',
'timestamp': 1489691297,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'], 'add_ie': ['JWPlatform'],
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
title = mobj.group('title') webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, title) mid = self._search_regex(
ooyala_code = self._search_regex( r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})',
r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code') webpage, 'media id')
return OoyalaIE._build_url_result(ooyala_code) return self.url_result('jwplatform:' + mid, 'JWPlatform', mid)

View File

@ -4,13 +4,16 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
js_to_json, js_to_json,
parse_iso8601, parse_iso8601,
remove_end, remove_end,
strip_or_none,
try_get, try_get,
) )
@ -21,7 +24,7 @@ class TV2IE(InfoExtractor):
'url': 'http://www.tv2.no/v/916509/', 'url': 'http://www.tv2.no/v/916509/',
'info_dict': { 'info_dict': {
'id': '916509', 'id': '916509',
'ext': 'mp4', 'ext': 'flv',
'title': 'Se Frode Gryttens hyllest av Steven Gerrard', 'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
'timestamp': 1431715610, 'timestamp': 1431715610,
@ -30,21 +33,32 @@ class TV2IE(InfoExtractor):
'view_count': int, 'view_count': int,
'categories': list, 'categories': list,
}, },
'params': {
# m3u8 download
'skip_download': True,
},
} }
_API_DOMAIN = 'sumo.tv2.no'
_PROTOCOLS = ('HDS', 'HLS', 'DASH')
_GEO_COUNTRIES = ['NO']
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
formats = [] formats = []
format_urls = [] format_urls = []
for protocol in ('HDS', 'HLS'): for protocol in self._PROTOCOLS:
try:
data = self._download_json( data = self._download_json(
'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
video_id, 'Downloading play JSON')['playback'] video_id, 'Downloading play JSON')['playback']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), video_id)['error']
error_code = error.get('code')
if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif error_code == 'SESSION_NOT_AUTHENTICATED':
self.raise_login_required()
raise ExtractorError(error['description'])
raise
items = try_get(data, lambda x: x['items']['item']) items = try_get(data, lambda x: x['items']['item'])
if not items: if not items:
continue continue
@ -65,9 +79,13 @@ class TV2IE(InfoExtractor):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False)) video_url, video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
if not data.get('drmProtected'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native', video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)) m3u8_id=format_id, fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, format_id, fatal=False))
elif ext == 'ism' or video_url.endswith('.ism/Manifest'): elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
pass pass
else: else:
@ -77,34 +95,30 @@ class TV2IE(InfoExtractor):
'tbr': int_or_none(item.get('bitrate')), 'tbr': int_or_none(item.get('bitrate')),
'filesize': int_or_none(item.get('fileSize')), 'filesize': int_or_none(item.get('fileSize')),
}) })
if not formats and data.get('drmProtected'):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
asset = self._download_json( asset = self._download_json(
'http://sumo.tv2.no/api/web/asset/%s.json' % video_id, api_base + '.json', video_id,
video_id, 'Downloading metadata JSON')['asset'] 'Downloading metadata JSON')['asset']
title = asset['title'] title = asset['title']
description = asset.get('description')
timestamp = parse_iso8601(asset.get('createTime'))
duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
view_count = int_or_none(asset.get('views'))
categories = asset.get('keywords', '').split(',')
thumbnails = [{ thumbnails = [{
'id': thumbnail.get('@type'), 'id': thumbnail.get('@type'),
'url': thumbnail.get('url'), 'url': thumbnail.get('url'),
} for _, thumbnail in asset.get('imageVersions', {}).items()] } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': title, 'title': title,
'description': description, 'description': strip_or_none(asset.get('description')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': timestamp, 'timestamp': parse_iso8601(asset.get('createTime')),
'duration': duration, 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
'view_count': view_count, 'view_count': int_or_none(asset.get('views')),
'categories': categories, 'categories': asset.get('keywords', '').split(','),
'formats': formats, 'formats': formats,
} }
@ -116,7 +130,7 @@ class TV2ArticleIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '6930542', 'id': '6930542',
'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret', 'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret',
'description': 'md5:339573779d3eea3542ffe12006190954', 'description': 'De fire siktede nekter fortsatt for å ha stjålet pingvinbabyene, men innrømmer å ha åpnet luken til de små kyllingene.',
}, },
'playlist_count': 2, 'playlist_count': 2,
}, { }, {
@ -134,7 +148,7 @@ class TV2ArticleIE(InfoExtractor):
if not assets: if not assets:
# New embed pattern # New embed pattern
for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage): for v in re.findall(r'(?s)TV2ContentboxVideo\(({.+?})\)', webpage):
video = self._parse_json( video = self._parse_json(
v, playlist_id, transform_source=js_to_json, fatal=False) v, playlist_id, transform_source=js_to_json, fatal=False)
if not video: if not video:
@ -151,3 +165,28 @@ class TV2ArticleIE(InfoExtractor):
description = remove_end(self._og_search_description(webpage), ' - TV2.no') description = remove_end(self._og_search_description(webpage), ' - TV2.no')
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(entries, playlist_id, title, description)
class KatsomoIE(TV2IE):
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
_TEST = {
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
'info_dict': {
'id': '1181321',
'ext': 'mp4',
'title': 'MTV Uutiset Live',
'description': 'Päätöksen teki Pelicansin hallitus.',
'timestamp': 1575116484,
'upload_date': '20191130',
'duration': 37.12,
'view_count': int,
'categories': list,
},
'params': {
# m3u8 download
'skip_download': True,
},
}
_API_DOMAIN = 'api.katsomo.fi'
_PROTOCOLS = ('HLS', 'MPD')
_GEO_COUNTRIES = ['FI']

View File

@ -15,18 +15,20 @@ from ..compat import (
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
dict_get,
ExtractorError, ExtractorError,
js_to_json, js_to_json,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
NO_DEFAULT,
OnDemandPagedList, OnDemandPagedList,
parse_filesize, parse_filesize,
RegexNotFoundError, RegexNotFoundError,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
std_headers, std_headers,
str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
@ -210,7 +212,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
video_uploader_url = owner.get('url') video_uploader_url = owner.get('url')
return { return {
'id': video_id, 'id': str_or_none(video_data.get('id')) or video_id,
'title': self._live_title(video_title) if is_live else video_title, 'title': self._live_title(video_title) if is_live else video_title,
'uploader': owner.get('name'), 'uploader': owner.get('name'),
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
@ -258,11 +260,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
(?: (?:
(?: (?:
www| www|
(?P<player>player) player
) )
\. \.
)? )?
vimeo(?P<pro>pro)?\.com/ vimeo(?:pro)?\.com/
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)? (?:.*?/)?
(?: (?:
@ -284,7 +286,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '56015672', 'id': '56015672',
'ext': 'mp4', 'ext': 'mp4',
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
'description': 'md5:509a9ad5c9bf97c60faee9203aca4479', 'description': 'md5:2d3305bad981a06ff79f027f19865021',
'timestamp': 1355990239, 'timestamp': 1355990239,
'upload_date': '20121220', 'upload_date': '20121220',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
@ -293,6 +295,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 10, 'duration': 10,
'license': 'by-sa', 'license': 'by-sa',
}, },
'params': {
'format': 'best[protocol=https]',
},
}, },
{ {
'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
@ -305,8 +310,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'openstreetmapus', 'uploader_id': 'openstreetmapus',
'uploader': 'OpenStreetMap US', 'uploader': 'OpenStreetMap US',
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30', 'description': 'md5:2c362968038d4499f4d79f88458590c1',
'duration': 1595, 'duration': 1595,
'upload_date': '20130610',
'timestamp': 1370893156,
},
'params': {
'format': 'best[protocol=https]',
}, },
}, },
{ {
@ -323,6 +333,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 3610, 'duration': 3610,
'description': None, 'description': None,
}, },
'params': {
'format': 'best[protocol=https]',
},
'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
'url': 'http://vimeo.com/68375962', 'url': 'http://vimeo.com/68375962',
@ -341,6 +355,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
}, },
'params': { 'params': {
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
}, },
@ -441,10 +456,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': '10Ft Films', 'uploader': '10Ft Films',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
'uploader_id': 'tenfootfilms', 'uploader_id': 'tenfootfilms',
'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
'upload_date': '20130830',
'timestamp': 1377853339,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
'url': 'http://player.vimeo.com/video/68375962', 'url': 'http://player.vimeo.com/video/68375962',
@ -459,6 +478,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 10, 'duration': 10,
}, },
'params': { 'params': {
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
}, },
@ -523,7 +543,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
def _verify_player_video_password(self, url, video_id, headers): def _verify_player_video_password(self, url, video_id, headers):
password = self._downloader.params.get('videopassword') password = self._downloader.params.get('videopassword')
if password is None: if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option') raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
data = urlencode_postdata({ data = urlencode_postdata({
'password': base64.b64encode(password.encode()), 'password': base64.b64encode(password.encode()),
}) })
@ -552,28 +572,26 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
# Extract ID from URL # Extract ID from URL
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
orig_url = url orig_url = url
if mobj.group('pro'): is_pro = 'vimeopro.com/' in url
is_player = '://player.vimeo.com/video/' in url
if is_pro:
# some videos require portfolio_id to be present in player url # some videos require portfolio_id to be present in player url
# https://github.com/ytdl-org/youtube-dl/issues/20070 # https://github.com/ytdl-org/youtube-dl/issues/20070
url = self._extract_url(url, self._download_webpage(url, video_id)) url = self._extract_url(url, self._download_webpage(url, video_id))
elif mobj.group('player'): if not url:
url = 'https://vimeo.com/' + video_id
elif is_player:
url = 'https://player.vimeo.com/video/' + video_id url = 'https://player.vimeo.com/video/' + video_id
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = sanitized_Request(url, headers=headers)
try: try:
webpage, urlh = self._download_webpage_handle(request, video_id) # Retrieve video webpage to extract further information
webpage, urlh = self._download_webpage_handle(
url, video_id, headers=headers)
redirect_url = compat_str(urlh.geturl()) redirect_url = compat_str(urlh.geturl())
# Some URLs redirect to ondemand can't be extracted with
# this extractor right away thus should be passed through
# ondemand extractor (e.g. https://vimeo.com/73445910)
if VimeoOndemandIE.suitable(redirect_url):
return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read() errmsg = ee.cause.read()
@ -600,6 +618,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
cc_license = None cc_license = None
timestamp = None timestamp = None
video_description = None
# Extract the config JSON # Extract the config JSON
try: try:
@ -611,17 +630,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Sometimes new react-based page is served instead of old one that require # Sometimes new react-based page is served instead of old one that require
# different config URL extraction approach (see # different config URL extraction approach (see
# https://github.com/ytdl-org/youtube-dl/pull/7209) # https://github.com/ytdl-org/youtube-dl/pull/7209)
vimeo_clip_page_config = self._search_regex( page_config = self._parse_json(self._search_regex(
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
'vimeo clip page config') webpage, 'page config'), video_id)
page_config = self._parse_json(vimeo_clip_page_config, video_id)
config_url = page_config['player']['config_url'] config_url = page_config['player']['config_url']
cc_license = page_config.get('cc_license') cc_license = page_config.get('cc_license')
timestamp = try_get( timestamp = try_get(
page_config, lambda x: x['clip']['uploaded_on'], page_config, lambda x: x['clip']['uploaded_on'],
compat_str) compat_str)
config_json = self._download_webpage(config_url, video_id) video_description = clean_html(dict_get(
config = json.loads(config_json) page_config, ('description', 'description_html_escaped')))
config = self._download_json(config_url, video_id)
except RegexNotFoundError: except RegexNotFoundError:
# For pro videos or player.vimeo.com urls # For pro videos or player.vimeo.com urls
# We try to find out to which variable is assigned the config dic # We try to find out to which variable is assigned the config dic
@ -675,14 +694,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
{'force_feature_id': True}), 'Vimeo') {'force_feature_id': True}), 'Vimeo')
# Extract video description # Extract video description
if not video_description:
video_description = self._html_search_regex( video_description = self._html_search_regex(
r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>', r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
webpage, 'description', default=None) webpage, 'description', default=None)
if not video_description: if not video_description:
video_description = self._html_search_meta( video_description = self._html_search_meta(
'description', webpage, default=None) 'description', webpage, default=None)
if not video_description and mobj.group('pro'): if not video_description and is_pro:
orig_webpage = self._download_webpage( orig_webpage = self._download_webpage(
orig_url, video_id, orig_url, video_id,
note='Downloading webpage for description', note='Downloading webpage for description',
@ -690,7 +709,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if orig_webpage: if orig_webpage:
video_description = self._html_search_meta( video_description = self._html_search_meta(
'description', orig_webpage, default=None) 'description', orig_webpage, default=None)
if not video_description and not mobj.group('player'): if not video_description and not is_player:
self._downloader.report_warning('Cannot find video description') self._downloader.report_warning('Cannot find video description')
# Extract upload date # Extract upload date
@ -747,9 +766,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
return info_dict return info_dict
class VimeoOndemandIE(VimeoBaseInfoExtractor): class VimeoOndemandIE(VimeoIE):
IE_NAME = 'vimeo:ondemand' IE_NAME = 'vimeo:ondemand'
_VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
# ondemand video not available via https://vimeo.com/id # ondemand video not available via https://vimeo.com/id
'url': 'https://vimeo.com/ondemand/20704', 'url': 'https://vimeo.com/ondemand/20704',
@ -761,24 +780,32 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
'uploader': 'גם סרטים', 'uploader': 'גם סרטים',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
'uploader_id': 'gumfilms', 'uploader_id': 'gumfilms',
'description': 'md5:4c027c965e439de4baab621e48b60791',
'upload_date': '20140906',
'timestamp': 1410032453,
}, },
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, { }, {
# requires Referer to be passed along with og:video:url # requires Referer to be passed along with og:video:url
'url': 'https://vimeo.com/ondemand/36938/126682985', 'url': 'https://vimeo.com/ondemand/36938/126682985',
'info_dict': { 'info_dict': {
'id': '126682985', 'id': '126584684',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rävlock, rätt läte på rätt plats', 'title': 'Rävlock, rätt läte på rätt plats',
'uploader': 'Lindroth & Norin', 'uploader': 'Lindroth & Norin',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
'uploader_id': 'user14430847', 'uploader_id': 'lindrothnorin',
'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
'upload_date': '20150502',
'timestamp': 1430586422,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, { }, {
'url': 'https://vimeo.com/ondemand/nazmaalik', 'url': 'https://vimeo.com/ondemand/nazmaalik',
'only_matching': True, 'only_matching': True,
@ -790,16 +817,6 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return self.url_result(
# Some videos require Referer to be passed along with og:video:url
# similarly to generic vimeo embeds (e.g.
# https://vimeo.com/ondemand/36938/126682985).
VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url),
VimeoIE.ie_key())
class VimeoChannelIE(VimeoBaseInfoExtractor): class VimeoChannelIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:channel' IE_NAME = 'vimeo:channel'
@ -815,6 +832,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
}, },
'playlist_mincount': 25, 'playlist_mincount': 25,
}] }]
_BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
def _page_url(self, base_url, pagenum): def _page_url(self, base_url, pagenum):
return '%s/videos/page:%d/' % (base_url, pagenum) return '%s/videos/page:%d/' % (base_url, pagenum)
@ -886,14 +904,13 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
return self.playlist_result(title_and_entries, list_id, list_title) return self.playlist_result(title_and_entries, list_id, list_title)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) channel_id = self._match_id(url)
channel_id = mobj.group('id') return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE): class VimeoUserIE(VimeoChannelIE):
IE_NAME = 'vimeo:user' IE_NAME = 'vimeo:user'
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)' _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>' _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/nkistudio/videos', 'url': 'https://vimeo.com/nkistudio/videos',
@ -903,11 +920,7 @@ class VimeoUserIE(VimeoChannelIE):
}, },
'playlist_mincount': 66, 'playlist_mincount': 66,
}] }]
_BASE_URL_TEMPL = 'https://vimeo.com/%s'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'https://vimeo.com/%s' % name)
class VimeoAlbumIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE):
@ -969,25 +982,18 @@ class VimeoAlbumIE(VimeoChannelIE):
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False)) r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
class VimeoGroupsIE(VimeoAlbumIE): class VimeoGroupsIE(VimeoChannelIE):
IE_NAME = 'vimeo:group' IE_NAME = 'vimeo:group'
_VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)' _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/groups/rolexawards', 'url': 'https://vimeo.com/groups/kattykay',
'info_dict': { 'info_dict': {
'id': 'rolexawards', 'id': 'kattykay',
'title': 'Rolex Awards for Enterprise', 'title': 'Katty Kay',
}, },
'playlist_mincount': 73, 'playlist_mincount': 27,
}] }]
_BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
def _extract_list_title(self, webpage):
return self._og_search_title(webpage, fatal=False)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
class VimeoReviewIE(VimeoBaseInfoExtractor): class VimeoReviewIE(VimeoBaseInfoExtractor):
@ -1003,7 +1009,9 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'title': "DICK HARDWICK 'Comedian'", 'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick', 'uploader': 'Richard Hardwick',
'uploader_id': 'user21297594', 'uploader_id': 'user21297594',
} 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
},
'expected_warnings': ['Unable to download JSON metadata'],
}, { }, {
'note': 'video player needs Referer', 'note': 'video player needs Referer',
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
@ -1016,7 +1024,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'duration': 2773, 'duration': 2773,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader_id': 'user22258446', 'uploader_id': 'user22258446',
} },
'skip': 'video gone',
}, { }, {
'note': 'Password protected', 'note': 'Password protected',
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
@ -1036,32 +1045,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
config_url = self._html_search_regex(
r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'config URL', default=None, group='url')
if not config_url:
data = self._parse_json(self._search_regex(
r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
default=NO_DEFAULT if video_password_verified else '{}'), video_id)
config = data.get('vimeo_esi', {}).get('config', {})
config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl'])
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
webpage_url, video_id, video_password_verified=True)
return config_url
def _real_extract(self, url): def _real_extract(self, url):
page_url, video_id = re.match(self._VALID_URL, url).groups() page_url, video_id = re.match(self._VALID_URL, url).groups()
config_url = self._get_config_url(url, video_id) clip_data = self._download_json(
page_url.replace('/review/', '/review/data/'),
video_id)['clipData']
config_url = clip_data['configUrl']
config = self._download_json(config_url, video_id) config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id) info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format(page_url, video_id) source_format = self._extract_original_format(
page_url + '/action', video_id)
if source_format: if source_format:
info_dict['formats'].append(source_format) info_dict['formats'].append(source_format)
self._vimeo_sort_formats(info_dict['formats']) self._vimeo_sort_formats(info_dict['formats'])
info_dict['description'] = clean_html(clip_data.get('description'))
return info_dict return info_dict

View File

@ -216,8 +216,7 @@ class VKIE(VKBaseIE):
'id': 'k3lz2cmXyRuJQSjGHUv', 'id': 'k3lz2cmXyRuJQSjGHUv',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
# TODO: fix test by fixing dailymotion description extraction 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
'uploader': 'AniLibria.Tv', 'uploader': 'AniLibria.Tv',
'upload_date': '20160914', 'upload_date': '20160914',
'uploader_id': 'x1p5vl5', 'uploader_id': 'x1p5vl5',

View File

@ -69,7 +69,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}' _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
def _set_language(self): def _set_language(self):
self._set_cookie( self._set_cookie(
@ -372,7 +372,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_VALID_URL = r"""(?x)^ _VALID_URL = r"""(?x)^
( (
(?:https?://|//) # http(s):// or protocol-independent URL (?:https?://|//) # http(s):// or protocol-independent URL
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
(?:www\.)?deturl\.com/www\.youtube\.com/| (?:www\.)?deturl\.com/www\.youtube\.com/|
(?:www\.)?pwnyoutube\.com/| (?:www\.)?pwnyoutube\.com/|
(?:www\.)?hooktube\.com/| (?:www\.)?hooktube\.com/|
@ -1224,6 +1224,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
'only_matching': True,
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -2465,7 +2469,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(?:\w+\.)? (?:\w+\.)?
(?: (?:
(?: (?:
youtube\.com| youtube(?:kids)?\.com|
invidio\.us invidio\.us
) )
/ /
@ -2477,7 +2481,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
) )
( (
(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,} (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots # Top tracks, they can also include dots
|(?:MC)[\w\.]* |(?:MC)[\w\.]*
) )
@ -2647,6 +2651,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
}, { }, {
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
'only_matching': True,
}] }]
def _real_initialize(self): def _real_initialize(self):
@ -2817,7 +2824,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com channels' IE_DESC = 'YouTube.com channels'
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?' _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
IE_NAME = 'youtube:channel' IE_NAME = 'youtube:channel'
@ -2845,6 +2852,9 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
}, { }, {
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
'only_matching': True,
}] }]
@classmethod @classmethod

View File

@ -1718,13 +1718,16 @@ DATE_FORMATS = (
'%B %d %Y', '%B %d %Y',
'%B %dst %Y', '%B %dst %Y',
'%B %dnd %Y', '%B %dnd %Y',
'%B %drd %Y',
'%B %dth %Y', '%B %dth %Y',
'%b %d %Y', '%b %d %Y',
'%b %dst %Y', '%b %dst %Y',
'%b %dnd %Y', '%b %dnd %Y',
'%b %drd %Y',
'%b %dth %Y', '%b %dth %Y',
'%b %dst %Y %I:%M', '%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M', '%b %dnd %Y %I:%M',
'%b %drd %Y %I:%M',
'%b %dth %Y %I:%M', '%b %dth %Y %I:%M',
'%Y %m %d', '%Y %m %d',
'%Y-%m-%d', '%Y-%m-%d',
@ -3516,8 +3519,8 @@ def str_or_none(v, default=None):
def str_to_int(int_str): def str_to_int(int_str):
""" A more relaxed version of int_or_none """ """ A more relaxed version of int_or_none """
if int_str is None: if not isinstance(int_str, compat_str):
return None return int_str
int_str = re.sub(r'[,\.\+]', '', int_str) int_str = re.sub(r'[,\.\+]', '', int_str)
return int(int_str) return int(int_str)
@ -5380,6 +5383,19 @@ def decode_packed_codes(code):
obfucasted_code) obfucasted_code)
def caesar(s, alphabet, shift):
if shift == 0:
return s
l = len(alphabet)
return ''.join(
alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
for c in s)
def rot47(s):
return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
def parse_m3u8_attributes(attrib): def parse_m3u8_attributes(attrib):
info = {} info = {}
for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib): for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.11.05' __version__ = '2019.11.28'