mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2025-01-09 06:37:54 +01:00
Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
f612348c49
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2019.01.30.1
|
[debug] youtube-dl version 2019.03.01
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -9,7 +9,6 @@ python:
|
|||||||
- "3.6"
|
- "3.6"
|
||||||
- "pypy"
|
- "pypy"
|
||||||
- "pypy3"
|
- "pypy3"
|
||||||
sudo: false
|
|
||||||
env:
|
env:
|
||||||
- YTDL_TEST_SET=core
|
- YTDL_TEST_SET=core
|
||||||
- YTDL_TEST_SET=download
|
- YTDL_TEST_SET=download
|
||||||
|
69
ChangeLog
69
ChangeLog
@ -1,3 +1,72 @@
|
|||||||
|
version 2019.03.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [downloader/external] Add support for rate limit and retries for wget
|
||||||
|
* [downloader/external] Fix infinite retries for curl (#19303)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [npo] Fix extraction (#20084)
|
||||||
|
* [francetv:site] Extend video id regex (#20029, #20071)
|
||||||
|
+ [periscope] Extract width and height (#20015)
|
||||||
|
* [servus] Fix extraction (#19297)
|
||||||
|
* [bbccouk] Make subtitles non fatal (#19651)
|
||||||
|
* [metacafe] Fix family filter bypass (#19287)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.02.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [tvp:website] Fix and improve extraction
|
||||||
|
+ [tvp] Detect unavailable videos
|
||||||
|
* [tvp] Fix description extraction and make thumbnail optional
|
||||||
|
+ [linuxacademy] Add support for linuxacademy.com (#12207)
|
||||||
|
* [bilibili] Update keys (#19233)
|
||||||
|
* [udemy] Extend URL regular expressions (#14330, #15883)
|
||||||
|
* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
|
||||||
|
* [noovo] Fix extraction (#19230)
|
||||||
|
* [rai] Relax URL regular expression (#19232)
|
||||||
|
+ [vshare] Pass Referer to download request (#19205, #19221)
|
||||||
|
+ [openload] Add support for oload.live (#19222)
|
||||||
|
* [imgur] Use video id as title fallback (#18590)
|
||||||
|
+ [twitch] Add new source format detection approach (#19193)
|
||||||
|
* [tvplayhome] Fix video id extraction (#19190)
|
||||||
|
* [tvplayhome] Fix episode metadata extraction (#19190)
|
||||||
|
* [rutube:embed] Fix extraction (#19163)
|
||||||
|
+ [rutube:embed] Add support private videos (#19163)
|
||||||
|
+ [soundcloud] Extract more metadata
|
||||||
|
+ [trunews] Add support for trunews.com (#19153)
|
||||||
|
+ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.02.08
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve JSON-LD regular expression (#18058)
|
||||||
|
* [YoutubeDL] Fallback to ie_key of matching extractor while making
|
||||||
|
download archive id when no explicit ie_key is provided (#19022)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [malltv] Add support for mall.tv (#18058, #17856)
|
||||||
|
+ [spankbang:playlist] Add support for playlists (#19145)
|
||||||
|
* [spankbang] Extend URL regular expression
|
||||||
|
* [trutv] Fix extraction (#17336)
|
||||||
|
* [toutv] Fix authentication (#16398, #18700)
|
||||||
|
* [pornhub] Fix tags and categories extraction (#13720, #19135)
|
||||||
|
* [pornhd] Fix formats extraction
|
||||||
|
+ [pornhd] Extract like count (#19123, #19125)
|
||||||
|
* [radiocanada] Switch to the new media requests (#19115)
|
||||||
|
+ [teachable] Add support for courses.workitdaily.com (#18871)
|
||||||
|
- [vporn] Remove extractor (#16276)
|
||||||
|
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
|
||||||
|
+ [drtuber] Extract duration (#19078)
|
||||||
|
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
|
||||||
|
* [soundcloud] Update client id
|
||||||
|
* [drtv] Improve preference (#19079)
|
||||||
|
+ [openload] Add support for openload.pw and oload.pw (#18930)
|
||||||
|
+ [openload] Add support for oload.info (#19073)
|
||||||
|
* [crackle] Authorize media detail request (#16931)
|
||||||
|
|
||||||
|
|
||||||
version 2019.01.30.1
|
version 2019.01.30.1
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -458,6 +458,7 @@
|
|||||||
- **LineTV**
|
- **LineTV**
|
||||||
- **linkedin:learning**
|
- **linkedin:learning**
|
||||||
- **linkedin:learning:course**
|
- **linkedin:learning:course**
|
||||||
|
- **LinuxAcademy**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **LiveLeakEmbed**
|
- **LiveLeakEmbed**
|
||||||
@ -476,6 +477,7 @@
|
|||||||
- **mailru:music**: Музыка@Mail.Ru
|
- **mailru:music**: Музыка@Mail.Ru
|
||||||
- **mailru:music:search**: Музыка@Mail.Ru
|
- **mailru:music:search**: Музыка@Mail.Ru
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
|
- **MallTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
- **ManyVids**
|
- **ManyVids**
|
||||||
@ -827,6 +829,7 @@
|
|||||||
- **southpark.nl**
|
- **southpark.nl**
|
||||||
- **southparkstudios.dk**
|
- **southparkstudios.dk**
|
||||||
- **SpankBang**
|
- **SpankBang**
|
||||||
|
- **SpankBangPlaylist**
|
||||||
- **Spankwire**
|
- **Spankwire**
|
||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
@ -913,6 +916,7 @@
|
|||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
- **TrailerAddict** (Currently broken)
|
- **TrailerAddict** (Currently broken)
|
||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
|
- **TruNews**
|
||||||
- **TruTV**
|
- **TruTV**
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
- **TubiTv**
|
- **TubiTv**
|
||||||
@ -1057,7 +1061,6 @@
|
|||||||
- **Voot**
|
- **Voot**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **VoxMediaVolume**
|
- **VoxMediaVolume**
|
||||||
- **Vporn**
|
|
||||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **Vrak**
|
- **Vrak**
|
||||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||||
|
@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
<meta content='Foo' property=og:foobar>
|
<meta content='Foo' property=og:foobar>
|
||||||
<meta name="og:test1" content='foo > < bar'/>
|
<meta name="og:test1" content='foo > < bar'/>
|
||||||
<meta name="og:test2" content="foo >//< bar"/>
|
<meta name="og:test2" content="foo >//< bar"/>
|
||||||
|
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||||
'''
|
'''
|
||||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||||
|
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
|
||||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||||
|
@ -29,6 +29,16 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||||||
tf.close()
|
tf.close()
|
||||||
os.remove(tf.name)
|
os.remove(tf.name)
|
||||||
|
|
||||||
|
def test_strip_httponly_prefix(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
def assert_cookie_has_value(key):
|
||||||
|
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||||
|
|
||||||
|
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||||
|
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
6
test/testdata/cookies/httponly_cookies.txt
vendored
Normal file
6
test/testdata/cookies/httponly_cookies.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
|
||||||
|
www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
|
@ -82,6 +82,7 @@ from .utils import (
|
|||||||
sanitize_url,
|
sanitize_url,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
str_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
url_basename,
|
url_basename,
|
||||||
@ -2060,15 +2061,24 @@ class YoutubeDL(object):
|
|||||||
self.report_warning('Unable to remove downloaded original file')
|
self.report_warning('Unable to remove downloaded original file')
|
||||||
|
|
||||||
def _make_archive_id(self, info_dict):
|
def _make_archive_id(self, info_dict):
|
||||||
|
video_id = info_dict.get('id')
|
||||||
|
if not video_id:
|
||||||
|
return
|
||||||
# Future-proof against any change in case
|
# Future-proof against any change in case
|
||||||
# and backwards compatibility with prior versions
|
# and backwards compatibility with prior versions
|
||||||
extractor = info_dict.get('extractor_key')
|
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
|
||||||
if extractor is None:
|
if extractor is None:
|
||||||
if 'id' in info_dict:
|
url = str_or_none(info_dict.get('url'))
|
||||||
extractor = info_dict.get('ie_key') # key in a playlist
|
if not url:
|
||||||
if extractor is None:
|
return
|
||||||
return None # Incomplete video information
|
# Try to find matching extractor for the URL and take its ie_key
|
||||||
return extractor.lower() + ' ' + info_dict['id']
|
for ie in self._ies:
|
||||||
|
if ie.suitable(url):
|
||||||
|
extractor = ie.ie_key()
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
return extractor.lower() + ' ' + video_id
|
||||||
|
|
||||||
def in_download_archive(self, info_dict):
|
def in_download_archive(self, info_dict):
|
||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
@ -2076,7 +2086,7 @@ class YoutubeDL(object):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
vid_id = self._make_archive_id(info_dict)
|
vid_id = self._make_archive_id(info_dict)
|
||||||
if vid_id is None:
|
if not vid_id:
|
||||||
return False # Incomplete video information
|
return False # Incomplete video information
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -121,7 +121,11 @@ class CurlFD(ExternalFD):
|
|||||||
cmd += self._valueless_option('--silent', 'noprogress')
|
cmd += self._valueless_option('--silent', 'noprogress')
|
||||||
cmd += self._valueless_option('--verbose', 'verbose')
|
cmd += self._valueless_option('--verbose', 'verbose')
|
||||||
cmd += self._option('--limit-rate', 'ratelimit')
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
cmd += self._option('--retry', 'retries')
|
retry = self._option('--retry', 'retries')
|
||||||
|
if len(retry) == 2:
|
||||||
|
if retry[1] in ('inf', 'infinite'):
|
||||||
|
retry[1] = '2147483647'
|
||||||
|
cmd += retry
|
||||||
cmd += self._option('--max-filesize', 'max_filesize')
|
cmd += self._option('--max-filesize', 'max_filesize')
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
@ -160,6 +164,12 @@ class WgetFD(ExternalFD):
|
|||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
|
retry = self._option('--tries', 'retries')
|
||||||
|
if len(retry) == 2:
|
||||||
|
if retry[1] in ('inf', 'infinite'):
|
||||||
|
retry[1] = '0'
|
||||||
|
cmd += retry
|
||||||
cmd += self._option('--bind-address', 'source_address')
|
cmd += self._option('--bind-address', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
|
import xml
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -17,6 +18,7 @@ from ..utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
cc_url = url_or_none(connection.get('href'))
|
||||||
|
if not cc_url:
|
||||||
|
continue
|
||||||
|
captions = self._download_xml(
|
||||||
|
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||||
|
if not isinstance(captions, xml.etree.ElementTree.Element):
|
||||||
|
continue
|
||||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
subtitles[lang] = [
|
subtitles[lang] = [
|
||||||
{
|
{
|
||||||
|
@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = '84956560bc028eb7'
|
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||||
|
|
||||||
def _report_error(self, result):
|
def _report_error(self, result):
|
||||||
if 'message' in result:
|
if 'message' in result:
|
||||||
|
@ -1058,7 +1058,7 @@ class InfoExtractor(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||||
% {'prop': re.escape(prop)})
|
% {'prop': re.escape(prop)})
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
|
@ -56,22 +56,11 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
self._download_webpage(
|
|
||||||
'https://www.crunchyroll.com/?a=formhandler',
|
|
||||||
None, 'Logging in', 'Wrong login info',
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'formname': 'RpcApiUser_Login',
|
|
||||||
'next_url': 'https://www.crunchyroll.com/acct/membership',
|
|
||||||
'name': username,
|
|
||||||
'password': password,
|
|
||||||
}))
|
|
||||||
|
|
||||||
'''
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
self._LOGIN_URL, None, 'Downloading login page')
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
def is_logged(webpage):
|
def is_logged(webpage):
|
||||||
return '<title>Redirecting' in webpage
|
return 'href="/logout"' in webpage
|
||||||
|
|
||||||
# Already logged in
|
# Already logged in
|
||||||
if is_logged(login_page):
|
if is_logged(login_page):
|
||||||
@ -110,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
'''
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
@ -4,7 +4,9 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(video_data.get('duration')) or parse_duration(
|
||||||
|
video_data.get('duration_format'))
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
||||||
r'<title>([^<]+)\s*@\s+DrTuber',
|
r'<title>([^<]+)\s*@\s+DrTuber',
|
||||||
@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor):
|
|||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'age_limit': self._rta_search(webpage),
|
'age_limit': self._rta_search(webpage),
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
@ -171,10 +171,13 @@ class DRTVIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
target = link.get('Target')
|
target = link.get('Target')
|
||||||
format_id = target or ''
|
format_id = target or ''
|
||||||
preference = None
|
if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
|
||||||
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
|
|
||||||
preference = -1
|
preference = -1
|
||||||
format_id += '-%s' % asset_target
|
format_id += '-%s' % asset_target
|
||||||
|
elif asset_target == 'Default':
|
||||||
|
preference = 1
|
||||||
|
else:
|
||||||
|
preference = None
|
||||||
if target == 'HDS':
|
if target == 'HDS':
|
||||||
f4m_formats = self._extract_f4m_formats(
|
f4m_formats = self._extract_f4m_formats(
|
||||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||||
|
@ -29,7 +29,8 @@ class ESPNIE(OnceIE):
|
|||||||
(?:
|
(?:
|
||||||
.*?\?.*?\bid=|
|
.*?\?.*?\bid=|
|
||||||
/_/id/
|
/_/id/
|
||||||
)
|
)|
|
||||||
|
[^/]+/video/
|
||||||
)
|
)
|
||||||
)|
|
)|
|
||||||
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
||||||
@ -94,6 +95,9 @@ class ESPNIE(OnceIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -593,6 +593,7 @@ from .linkedin import (
|
|||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
LinkedInLearningCourseIE,
|
LinkedInLearningCourseIE,
|
||||||
)
|
)
|
||||||
|
from .linuxacademy import LinuxAcademyIE
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .liveleak import (
|
from .liveleak import (
|
||||||
LiveLeakIE,
|
LiveLeakIE,
|
||||||
@ -619,6 +620,7 @@ from .mailru import (
|
|||||||
MailRuMusicSearchIE,
|
MailRuMusicSearchIE,
|
||||||
)
|
)
|
||||||
from .makertv import MakerTVIE
|
from .makertv import MakerTVIE
|
||||||
|
from .malltv import MallTVIE
|
||||||
from .mangomolo import (
|
from .mangomolo import (
|
||||||
MangomoloVideoIE,
|
MangomoloVideoIE,
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
@ -1058,7 +1060,10 @@ from .southpark import (
|
|||||||
SouthParkEsIE,
|
SouthParkEsIE,
|
||||||
SouthParkNlIE
|
SouthParkNlIE
|
||||||
)
|
)
|
||||||
from .spankbang import SpankBangIE
|
from .spankbang import (
|
||||||
|
SpankBangIE,
|
||||||
|
SpankBangPlaylistIE,
|
||||||
|
)
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
@ -1167,6 +1172,7 @@ from .toutv import TouTvIE
|
|||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
|
from .trunews import TruNewsIE
|
||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tubitv import TubiTvIE
|
from .tubitv import TubiTvIE
|
||||||
@ -1212,7 +1218,7 @@ from .tvnow import (
|
|||||||
from .tvp import (
|
from .tvp import (
|
||||||
TVPEmbedIE,
|
TVPEmbedIE,
|
||||||
TVPIE,
|
TVPIE,
|
||||||
TVPSeriesIE,
|
TVPWebsiteIE,
|
||||||
)
|
)
|
||||||
from .tvplay import (
|
from .tvplay import (
|
||||||
TVPlayIE,
|
TVPlayIE,
|
||||||
@ -1362,7 +1368,6 @@ from .voxmedia import (
|
|||||||
VoxMediaVolumeIE,
|
VoxMediaVolumeIE,
|
||||||
VoxMediaIE,
|
VoxMediaIE,
|
||||||
)
|
)
|
||||||
from .vporn import VpornIE
|
|
||||||
from .vrt import VRTIE
|
from .vrt import VRTIE
|
||||||
from .vrak import VrakIE
|
from .vrak import VrakIE
|
||||||
from .vrv import (
|
from .vrv import (
|
||||||
|
@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
catalogue = None
|
catalogue = None
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
webpage, 'video id', default=None, group='id')
|
webpage, 'video id', default=None, group='id')
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
|
@ -27,6 +27,10 @@ class ImgurIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# no title
|
||||||
|
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -87,7 +91,7 @@ class ImgurIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage, default=video_id),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
get_element_by_class,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
strip_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor):
|
|||||||
'id': '6385796',
|
'id': '6385796',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': "Champion Minded - Developing a Growth Mindset",
|
'title': "Champion Minded - Developing a Growth Mindset",
|
||||||
'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
# description fetched using another request:
|
||||||
|
# http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
|
||||||
|
# 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
||||||
'upload_date': '20180320',
|
'upload_date': '20180320',
|
||||||
'thumbnail': 're:^https?://.*',
|
'thumbnail': 're:^https?://.*',
|
||||||
},
|
},
|
||||||
@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
url, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
video_id = m.group('id')
|
|
||||||
url = m.group('mainurl')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
podcast_title = self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
|
r'var\s+playlistItem\s*=\s*({.+?});',
|
||||||
if podcast_title:
|
webpage, 'JSON data block'), video_id)
|
||||||
podcast_title = podcast_title.strip()
|
|
||||||
episode_title = self._search_regex(
|
episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
|
||||||
r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
|
if not episode_title:
|
||||||
if episode_title:
|
self._search_regex(
|
||||||
episode_title = episode_title.strip()
|
[r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
|
||||||
|
webpage, 'episode title')
|
||||||
|
episode_title = episode_title.strip()
|
||||||
|
|
||||||
|
podcast_title = strip_or_none(clean_html(self._search_regex(
|
||||||
|
r'<h3>([^<]+)</h3>', webpage, 'podcast title',
|
||||||
|
default=None) or get_element_by_class('podcast-title', webpage)))
|
||||||
|
|
||||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
|
||||||
|
f_url = data.get(k)
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
||||||
'description', default=None)
|
'description', default=None)
|
||||||
@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor):
|
|||||||
# Strip non-breaking and normal spaces
|
# Strip non-breaking and normal spaces
|
||||||
description = description.replace('\u00A0', ' ').strip()
|
description = description.replace('\u00A0', ' ').strip()
|
||||||
release_date = unified_strdate(self._search_regex(
|
release_date = unified_strdate(self._search_regex(
|
||||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
r'<div class="release_date">Released: ([^<]+)<',
|
||||||
|
webpage, 'release date', default=None) or data.get('release_date'))
|
||||||
data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
|
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': data['media_url'],
|
|
||||||
'format_id': 'main',
|
|
||||||
}, {
|
|
||||||
'url': data['media_url_libsyn'],
|
|
||||||
'format_id': 'libsyn',
|
|
||||||
}]
|
|
||||||
thumbnail = data.get('thumbnail_url')
|
|
||||||
duration = parse_duration(data.get('duration'))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': data.get('thumbnail_url'),
|
||||||
'upload_date': release_date,
|
'upload_date': release_date,
|
||||||
'duration': duration,
|
'duration': parse_duration(data.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -34,12 +34,15 @@ class LinkedInLearningBaseIE(InfoExtractor):
|
|||||||
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
||||||
}, query=query)['elements'][0]
|
}, query=query)['elements'][0]
|
||||||
|
|
||||||
def _get_video_id(self, urn, course_slug, video_slug):
|
def _get_urn_id(self, video_data):
|
||||||
|
urn = video_data.get('urn')
|
||||||
if urn:
|
if urn:
|
||||||
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group(1)
|
return mobj.group(1)
|
||||||
return '%s/%s' % (course_slug, video_slug)
|
|
||||||
|
def _get_video_id(self, video_data, course_slug, video_slug):
|
||||||
|
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
email, password = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
@ -123,7 +126,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
|||||||
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
|
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': video_data.get('defaultThumbnail'),
|
'thumbnail': video_data.get('defaultThumbnail'),
|
||||||
@ -154,18 +157,21 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
|||||||
course_data = self._call_api(course_slug, 'chapters,description,title')
|
course_data = self._call_api(course_slug, 'chapters,description,title')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for chapter in course_data.get('chapters', []):
|
for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
|
||||||
chapter_title = chapter.get('title')
|
chapter_title = chapter.get('title')
|
||||||
|
chapter_id = self._get_urn_id(chapter)
|
||||||
for video in chapter.get('videos', []):
|
for video in chapter.get('videos', []):
|
||||||
video_slug = video.get('slug')
|
video_slug = video.get('slug')
|
||||||
if not video_slug:
|
if not video_slug:
|
||||||
continue
|
continue
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
|
'id': self._get_video_id(video, course_slug, video_slug),
|
||||||
'title': video.get('title'),
|
'title': video.get('title'),
|
||||||
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
||||||
'chapter': chapter_title,
|
'chapter': chapter_title,
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
'chapter_id': chapter_id,
|
||||||
'ie_key': LinkedInLearningIE.ie_key(),
|
'ie_key': LinkedInLearningIE.ie_key(),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
174
youtube_dl/extractor/linuxacademy.py
Normal file
174
youtube_dl/extractor/linuxacademy.py
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_b64decode,
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
orderedSet,
|
||||||
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LinuxAcademyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?linuxacademy\.com/cp/
|
||||||
|
(?:
|
||||||
|
courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
|
||||||
|
modules/view/id/(?P<course_id>\d+)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1498-2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Introduction to the Practitioner's Brief",
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://linuxacademy.com/cp/modules/view/id/154',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '154',
|
||||||
|
'title': 'AWS Certified Cloud Practitioner',
|
||||||
|
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
|
||||||
|
},
|
||||||
|
'playlist_count': 41,
|
||||||
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
}]
|
||||||
|
|
||||||
|
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
||||||
|
_ORIGIN_URL = 'https://linuxacademy.com'
|
||||||
|
_CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
|
||||||
|
_NETRC_MACHINE = 'linuxacademy'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
def random_string():
|
||||||
|
return ''.join([
|
||||||
|
random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
|
||||||
|
for _ in range(32)])
|
||||||
|
|
||||||
|
webpage, urlh = self._download_webpage_handle(
|
||||||
|
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
||||||
|
'client_id': self._CLIENT_ID,
|
||||||
|
'response_type': 'token id_token',
|
||||||
|
'redirect_uri': self._ORIGIN_URL,
|
||||||
|
'scope': 'openid email user_impersonation profile',
|
||||||
|
'audience': self._ORIGIN_URL,
|
||||||
|
'state': random_string(),
|
||||||
|
'nonce': random_string(),
|
||||||
|
})
|
||||||
|
|
||||||
|
login_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'login info', group='value'), None,
|
||||||
|
transform_source=lambda x: compat_b64decode(x).decode('utf-8')
|
||||||
|
)['extraParams']
|
||||||
|
|
||||||
|
login_data.update({
|
||||||
|
'client_id': self._CLIENT_ID,
|
||||||
|
'redirect_uri': self._ORIGIN_URL,
|
||||||
|
'tenant': 'lacausers',
|
||||||
|
'connection': 'Username-Password-Authentication',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'sso': 'true',
|
||||||
|
})
|
||||||
|
|
||||||
|
login_state_url = compat_str(urlh.geturl())
|
||||||
|
|
||||||
|
try:
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
'https://login.linuxacademy.com/usernamepassword/login', None,
|
||||||
|
'Downloading login page', data=json.dumps(login_data).encode(),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Origin': 'https://login.linuxacademy.com',
|
||||||
|
'Referer': login_state_url,
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
|
error = self._parse_json(e.cause.read(), None)
|
||||||
|
message = error.get('description') or error['code']
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
callback_page, urlh = self._download_webpage_handle(
|
||||||
|
'https://login.linuxacademy.com/login/callback', None,
|
||||||
|
'Downloading callback page',
|
||||||
|
data=urlencode_postdata(self._hidden_inputs(login_page)),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Origin': 'https://login.linuxacademy.com',
|
||||||
|
'Referer': login_state_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
access_token = self._search_regex(
|
||||||
|
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
||||||
|
'access token')
|
||||||
|
|
||||||
|
self._download_webpage(
|
||||||
|
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
||||||
|
% access_token, None, 'Downloading token validation page')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
|
||||||
|
item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, item_id)
|
||||||
|
|
||||||
|
# course path
|
||||||
|
if course_id:
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
|
||||||
|
for lesson_url in orderedSet(re.findall(
|
||||||
|
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
|
||||||
|
webpage))]
|
||||||
|
title = unescapeHTML(self._html_search_regex(
|
||||||
|
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
|
||||||
|
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'title', default=None, group='value'))
|
||||||
|
description = unescapeHTML(self._html_search_regex(
|
||||||
|
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'description', default=None, group='value'))
|
||||||
|
return self.playlist_result(entries, course_id, title, description)
|
||||||
|
|
||||||
|
# single video path
|
||||||
|
info = self._extract_jwplayer_data(
|
||||||
|
webpage, item_id, require_title=False, m3u8_id='hls',)
|
||||||
|
title = self._search_regex(
|
||||||
|
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||||
|
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||||
|
'title', group='value')
|
||||||
|
info.update({
|
||||||
|
'id': item_id,
|
||||||
|
'title': title,
|
||||||
|
})
|
||||||
|
return info
|
53
youtube_dl/extractor/malltv.py
Normal file
53
youtube_dl/extractor/malltv.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import merge_dicts
|
||||||
|
|
||||||
|
|
||||||
|
class MallTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
|
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 't0zzt0',
|
||||||
|
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||||
|
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
||||||
|
'duration': 216,
|
||||||
|
'timestamp': 1538870400,
|
||||||
|
'upload_date': '20181007',
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, display_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
|
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
||||||
|
video_id = self._search_regex(
|
||||||
|
SOURCE_RE, webpage, 'video id', group='id')
|
||||||
|
|
||||||
|
media = self._parse_html5_media_entries(
|
||||||
|
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
||||||
|
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
||||||
|
|
||||||
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts(media, info, {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': self._og_search_title(webpage, default=None) or display_id,
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
})
|
@ -1,12 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
# Disable family filter
|
# Disable family filter
|
||||||
'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
|
'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
|
||||||
}
|
}
|
||||||
|
|
||||||
# AnyClip videos require the flashversion cookie so that we get the link
|
# AnyClip videos require the flashversion cookie so that we get the link
|
||||||
|
@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
brightcove_id = self._search_regex(
|
||||||
|
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||||
|
|
||||||
data = self._parse_json(
|
data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': BrightcoveNewIE.ie_key(),
|
'ie_key': BrightcoveNewIE.ie_key(),
|
||||||
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
'url': smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
{'geo_countries': ['CA']}),
|
||||||
|
'id': brightcove_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'series': series,
|
'series': series,
|
||||||
|
@ -12,11 +12,16 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self._get_info(video_id)
|
try:
|
||||||
|
return self._get_info(url, video_id)
|
||||||
|
except ExtractorError:
|
||||||
|
return self._get_old_info(video_id)
|
||||||
|
|
||||||
def _get_info(self, video_id):
|
def _get_info(self, url, video_id):
|
||||||
|
token = self._download_json(
|
||||||
|
'https://www.npostart.nl/api/token', video_id,
|
||||||
|
'Downloading token', headers={
|
||||||
|
'Referer': url,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
})['token']
|
||||||
|
|
||||||
|
player = self._download_json(
|
||||||
|
'https://www.npostart.nl/player/%s' % video_id, video_id,
|
||||||
|
'Downloading player JSON', data=urlencode_postdata({
|
||||||
|
'autoplay': 0,
|
||||||
|
'share': 1,
|
||||||
|
'pageUrl': url,
|
||||||
|
'hasAdConsent': 0,
|
||||||
|
'_token': token,
|
||||||
|
}))
|
||||||
|
|
||||||
|
player_token = player['token']
|
||||||
|
|
||||||
|
format_urls = set()
|
||||||
|
formats = []
|
||||||
|
for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
|
||||||
|
streams = self._download_json(
|
||||||
|
'https://start-player.npo.nl/video/%s/streams' % video_id,
|
||||||
|
video_id, 'Downloading %s profile JSON' % profile, fatal=False,
|
||||||
|
query={
|
||||||
|
'profile': profile,
|
||||||
|
'quality': 'npo',
|
||||||
|
'tokenId': player_token,
|
||||||
|
'streamType': 'broadcast',
|
||||||
|
})
|
||||||
|
if not streams:
|
||||||
|
continue
|
||||||
|
stream = streams.get('stream')
|
||||||
|
if not isinstance(stream, dict):
|
||||||
|
continue
|
||||||
|
stream_url = url_or_none(stream.get('src'))
|
||||||
|
if not stream_url or stream_url in format_urls:
|
||||||
|
continue
|
||||||
|
format_urls.add(stream_url)
|
||||||
|
if stream.get('protection') is not None:
|
||||||
|
continue
|
||||||
|
stream_type = stream.get('type')
|
||||||
|
stream_ext = determine_ext(stream_url)
|
||||||
|
if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
stream_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
elif '.ism/Manifest' in stream_url:
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
stream_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': stream_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
embed_url = url_or_none(player.get('embedUrl'))
|
||||||
|
if embed_url:
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
embed_url, video_id, 'Downloading embed page', fatal=False)
|
||||||
|
if webpage:
|
||||||
|
video = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
|
||||||
|
default='{}'), video_id)
|
||||||
|
if video:
|
||||||
|
title = video.get('episodeTitle')
|
||||||
|
subtitles = {}
|
||||||
|
subtitles_list = video.get('subtitles')
|
||||||
|
if isinstance(subtitles_list, list):
|
||||||
|
for cc in subtitles_list:
|
||||||
|
cc_url = url_or_none(cc.get('src'))
|
||||||
|
if not cc_url:
|
||||||
|
continue
|
||||||
|
lang = str_or_none(cc.get('language')) or 'nl'
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': cc_url,
|
||||||
|
})
|
||||||
|
return merge_dicts({
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': url_or_none(
|
||||||
|
video.get('still_image_url') or video.get('orig_image_url')),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(video.get('broadcastDate')),
|
||||||
|
'creator': video.get('channel'),
|
||||||
|
'series': video.get('title'),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _get_old_info(self, video_id):
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
'http://e.omroep.nl/metadata/%s' % video_id,
|
'http://e.omroep.nl/metadata/%s' % video_id,
|
||||||
video_id,
|
video_id,
|
||||||
@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
# JSON
|
# JSON
|
||||||
else:
|
else:
|
||||||
video_url = stream_info.get('url')
|
video_url = stream_info.get('url')
|
||||||
if not video_url or video_url in urls:
|
if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.add(video_url)
|
urls.add(video_url)
|
||||||
if determine_ext(video_url) == 'm3u8':
|
if determine_ext(video_url) == 'm3u8':
|
||||||
|
@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
openload\.(?:co|io|link|pw)|
|
openload\.(?:co|io|link|pw)|
|
||||||
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw)
|
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live)
|
||||||
)
|
)
|
||||||
)/
|
)/
|
||||||
(?:f|embed)/
|
(?:f|embed)/
|
||||||
@ -346,6 +346,9 @@ class OpenloadIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://oload.pw/f/WyKgK8s94N0',
|
'url': 'https://oload.pw/f/WyKgK8s94N0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.live/f/-Z58UZ-GR4M',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
|
@ -5,6 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
@ -75,6 +76,14 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
'url': broadcast[image],
|
'url': broadcast[image],
|
||||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||||
|
|
||||||
|
width = int_or_none(broadcast.get('width'))
|
||||||
|
height = int_or_none(broadcast.get('height'))
|
||||||
|
|
||||||
|
def add_width_and_height(f):
|
||||||
|
for key, val in (('width', width), ('height', height)):
|
||||||
|
if not f.get(key):
|
||||||
|
f[key] = val
|
||||||
|
|
||||||
video_urls = set()
|
video_urls = set()
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
||||||
@ -83,16 +92,21 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
continue
|
continue
|
||||||
video_urls.add(video_url)
|
video_urls.add(video_url)
|
||||||
if format_id != 'rtmp':
|
if format_id != 'rtmp':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
video_url, token, 'mp4',
|
video_url, token, 'mp4',
|
||||||
entry_protocol='m3u8_native'
|
entry_protocol='m3u8_native'
|
||||||
if state in ('ended', 'timed_out') else 'm3u8',
|
if state in ('ended', 'timed_out') else 'm3u8',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False)
|
||||||
|
if len(m3u8_formats) == 1:
|
||||||
|
add_width_and_height(m3u8_formats[0])
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
continue
|
continue
|
||||||
formats.append({
|
rtmp_format = {
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||||
})
|
}
|
||||||
|
add_width_and_height(rtmp_format)
|
||||||
|
formats.append(rtmp_format)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -4,9 +4,11 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -14,7 +16,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
|
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
||||||
'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
|
'md5': '87f1540746c1d32ec7a2305c12b96b25',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9864',
|
'id': '9864',
|
||||||
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
||||||
@ -23,6 +25,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
|
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
@ -37,6 +40,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'skip': 'Not available anymore',
|
'skip': 'Not available anymore',
|
||||||
@ -65,12 +69,14 @@ class PornHdIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, video_url in sources.items():
|
for format_id, video_url in sources.items():
|
||||||
|
video_url = urljoin(url, video_url)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
height = int_or_none(self._search_regex(
|
height = int_or_none(self._search_regex(
|
||||||
r'^(\d+)[pP]', format_id, 'height', default=None))
|
r'^(\d+)[pP]', format_id, 'height', default=None))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_url, 'mp4'),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
@ -85,6 +91,11 @@ class PornHdIE(InfoExtractor):
|
|||||||
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||||
'thumbnail', fatal=False, group='url')
|
'thumbnail', fatal=False, group='url')
|
||||||
|
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
(r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
|
||||||
|
r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@ -92,6 +103,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,6 @@ from .openload import PhantomJSwrapper
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@ -303,14 +302,12 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
page_params = self._parse_json(self._search_regex(
|
def extract_list(meta_key):
|
||||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
div = self._search_regex(
|
||||||
webpage, 'page parameters', group='data', default='{}'),
|
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
% meta_key, webpage, meta_key, default=None)
|
||||||
tags = categories = None
|
if div:
|
||||||
if page_params:
|
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
||||||
tags = page_params.get('tags', '').split(',')
|
|
||||||
categories = page_params.get('categories', '').split(',')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -325,8 +322,8 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': tags,
|
'tags': extract_list('tags'),
|
||||||
'categories': categories,
|
'categories': extract_list('categories'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,16 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
|
||||||
find_xpath_attr,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_element,
|
|
||||||
ExtractorError,
|
|
||||||
determine_protocol,
|
|
||||||
unsmuggle_url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -61,107 +57,67 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
|
_access_token = None
|
||||||
|
_claims = None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _call_api(self, path, video_id=None, app_code=None, query=None):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
if not query:
|
||||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
query = {}
|
||||||
|
query.update({
|
||||||
metadata = self._download_xml(
|
'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
|
||||||
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
|
'output': 'json',
|
||||||
video_id, note='Downloading metadata XML', query={
|
})
|
||||||
|
if video_id:
|
||||||
|
query.update({
|
||||||
'appCode': app_code,
|
'appCode': app_code,
|
||||||
'idMedia': video_id,
|
'idMedia': video_id,
|
||||||
})
|
})
|
||||||
|
if self._access_token:
|
||||||
|
query['access_token'] = self._access_token
|
||||||
|
try:
|
||||||
|
return self._download_json(
|
||||||
|
'https://services.radio-canada.ca/media/' + path, video_id, query=query)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
|
||||||
|
data = self._parse_json(e.cause.read().decode(), None)
|
||||||
|
error = data.get('error_description') or data['errorMessage']['text']
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_info(self, app_code, video_id):
|
||||||
|
metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
|
||||||
|
|
||||||
def get_meta(name):
|
def get_meta(name):
|
||||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
for meta in metas:
|
||||||
return el.text if el is not None else None
|
if meta.get('name') == name:
|
||||||
|
text = meta.get('text')
|
||||||
|
if text:
|
||||||
|
return text
|
||||||
|
|
||||||
# protectionType does not necessarily mean the video is DRM protected (see
|
# protectionType does not necessarily mean the video is DRM protected (see
|
||||||
# https://github.com/rg3/youtube-dl/pull/18609).
|
# https://github.com/rg3/youtube-dl/pull/18609).
|
||||||
if get_meta('protectionType'):
|
if get_meta('protectionType'):
|
||||||
self.report_warning('This video is probably DRM protected.')
|
self.report_warning('This video is probably DRM protected.')
|
||||||
|
|
||||||
device_types = ['ipad']
|
query = {
|
||||||
if not smuggled_data:
|
'connectionType': 'hd',
|
||||||
device_types.append('flash')
|
'deviceType': 'ipad',
|
||||||
device_types.append('android')
|
'multibitrate': 'true',
|
||||||
|
}
|
||||||
formats = []
|
if self._claims:
|
||||||
error = None
|
query['claims'] = self._claims
|
||||||
# TODO: extract f4m formats
|
v_data = self._call_api('validation/v2/', video_id, app_code, query)
|
||||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
v_url = v_data.get('url')
|
||||||
for device_type in device_types:
|
if not v_url:
|
||||||
validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
|
error = v_data['message']
|
||||||
query = {
|
if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
|
||||||
'appCode': app_code,
|
raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
|
||||||
'idMedia': video_id,
|
if error == 'Le contenu sélectionné est disponible seulement en premium':
|
||||||
'connectionType': 'broadband',
|
self.raise_login_required(error)
|
||||||
'multibitrate': 'true',
|
|
||||||
'deviceType': device_type,
|
|
||||||
}
|
|
||||||
if smuggled_data:
|
|
||||||
validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
|
|
||||||
query.update(smuggled_data)
|
|
||||||
else:
|
|
||||||
query.update({
|
|
||||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
|
||||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
|
||||||
'bypasslock': 'NZt5K62gRqfc',
|
|
||||||
})
|
|
||||||
v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
|
|
||||||
v_url = xpath_text(v_data, 'url')
|
|
||||||
if not v_url:
|
|
||||||
continue
|
|
||||||
if v_url == 'null':
|
|
||||||
error = xpath_text(v_data, 'message')
|
|
||||||
continue
|
|
||||||
ext = determine_ext(v_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
v_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
ext = determine_ext(v_url)
|
|
||||||
bitrates = xpath_element(v_data, 'bitrates')
|
|
||||||
for url_e in bitrates.findall('url'):
|
|
||||||
tbr = int_or_none(url_e.get('bitrate'))
|
|
||||||
if not tbr:
|
|
||||||
continue
|
|
||||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
|
||||||
protocol = determine_protocol({'url': f_url})
|
|
||||||
f = {
|
|
||||||
'format_id': '%s-%d' % (protocol, tbr),
|
|
||||||
'url': f_url,
|
|
||||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
|
||||||
'protocol': protocol,
|
|
||||||
'width': int_or_none(url_e.get('width')),
|
|
||||||
'height': int_or_none(url_e.get('height')),
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
|
|
||||||
if mobj:
|
|
||||||
f.update({
|
|
||||||
'url': mobj.group('url') + mobj.group('auth'),
|
|
||||||
'play_path': mobj.group('playpath'),
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
if protocol == 'rtsp':
|
|
||||||
base_url = self._search_regex(
|
|
||||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
|
||||||
if base_url:
|
|
||||||
base_url = 'http://' + base_url
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
base_url + '/playlist.m3u8', video_id, 'mp4',
|
|
||||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
base_url + '/manifest.f4m', video_id,
|
|
||||||
f4m_id='hds', fatal=False))
|
|
||||||
if not formats and error:
|
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -186,11 +142,14 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_info(*re.match(self._VALID_URL, url).groups())
|
||||||
|
|
||||||
|
|
||||||
class RadioCanadaAudioVideoIE(InfoExtractor):
|
class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||||
'radiocanada:audiovideo'
|
'radiocanada:audiovideo'
|
||||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7527184',
|
'id': '7527184',
|
||||||
@ -203,7 +162,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
||||||
|
@ -288,7 +288,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# var uniquename = "ContentItem-..."
|
# var uniquename = "ContentItem-..."
|
||||||
# data-id="ContentItem-..."
|
# data-id="ContentItem-..."
|
||||||
@ -375,6 +375,9 @@ class RaiIE(RaiBaseIE):
|
|||||||
# Direct MMS URL
|
# Direct MMS URL
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_from_content_id(self, content_id, url):
|
def _extract_from_content_id(self, content_id, url):
|
||||||
|
@ -21,7 +21,17 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class RutubeBaseIE(InfoExtractor):
|
class RutubeBaseIE(InfoExtractor):
|
||||||
def _extract_video(self, video, video_id=None, require_title=True):
|
def _download_api_info(self, video_id, query=None):
|
||||||
|
if not query:
|
||||||
|
query = {}
|
||||||
|
query['format'] = 'json'
|
||||||
|
return self._download_json(
|
||||||
|
'http://rutube.ru/api/video/%s/' % video_id,
|
||||||
|
video_id, 'Downloading video JSON',
|
||||||
|
'Unable to download video JSON', query=query)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_info(video, video_id=None, require_title=True):
|
||||||
title = video['title'] if require_title else video.get('title')
|
title = video['title'] if require_title else video.get('title')
|
||||||
|
|
||||||
age_limit = video.get('is_adult')
|
age_limit = video.get('is_adult')
|
||||||
@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor):
|
|||||||
category = try_get(video, lambda x: x['category']['name'])
|
category = try_get(video, lambda x: x['category']['name'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video.get('id') or video_id,
|
'id': video.get('id') or video_id if video_id else video['id'],
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
'thumbnail': video.get('thumbnail_url'),
|
'thumbnail': video.get('thumbnail_url'),
|
||||||
@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor):
|
|||||||
'is_live': bool_or_none(video.get('is_livestream')),
|
'is_live': bool_or_none(video.get('is_livestream')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _download_and_extract_info(self, video_id, query=None):
|
||||||
|
return self._extract_info(
|
||||||
|
self._download_api_info(video_id, query=query), video_id)
|
||||||
|
|
||||||
|
def _download_api_options(self, video_id, query=None):
|
||||||
|
if not query:
|
||||||
|
query = {}
|
||||||
|
query['format'] = 'json'
|
||||||
|
return self._download_json(
|
||||||
|
'http://rutube.ru/api/play/options/%s/' % video_id,
|
||||||
|
video_id, 'Downloading options JSON',
|
||||||
|
'Unable to download options JSON',
|
||||||
|
headers=self.geo_verification_headers(), query=query)
|
||||||
|
|
||||||
|
def _extract_formats(self, options, video_id):
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in options['video_balancer'].items():
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _download_and_extract_formats(self, video_id, query=None):
|
||||||
|
return self._extract_formats(
|
||||||
|
self._download_api_options(video_id, query=query), video_id)
|
||||||
|
|
||||||
|
|
||||||
class RutubeIE(RutubeBaseIE):
|
class RutubeIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube'
|
IE_NAME = 'rutube'
|
||||||
@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
'md5': '79938ade01294ef7e27574890d0d3769',
|
'md5': '1d24f180fac7a02f3900712e5a5764d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 81,
|
||||||
'uploader': 'NTDRussian',
|
'uploader': 'NTDRussian',
|
||||||
'uploader_id': '29790',
|
'uploader_id': '29790',
|
||||||
'timestamp': 1381943602,
|
'timestamp': 1381943602,
|
||||||
@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_and_extract_info(video_id)
|
||||||
video = self._download_json(
|
info['formats'] = self._download_and_extract_formats(video_id)
|
||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
|
||||||
video_id, 'Downloading video JSON')
|
|
||||||
|
|
||||||
info = self._extract_video(video, video_id)
|
|
||||||
|
|
||||||
options = self._download_json(
|
|
||||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
|
||||||
video_id, 'Downloading options JSON',
|
|
||||||
headers=self.geo_verification_headers())
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_id, format_url in options['video_balancer'].items():
|
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': format_id,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info['formats'] = formats
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RutubeEmbedIE(InfoExtractor):
|
class RutubeEmbedIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube:embed'
|
IE_NAME = 'rutube:embed'
|
||||||
IE_DESC = 'Rutube embedded videos'
|
IE_DESC = 'Rutube embedded videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||||
@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'timestamp': 1387830582,
|
'timestamp': 1387830582,
|
||||||
'upload_date': '20131223',
|
'upload_date': '20131223',
|
||||||
'uploader_id': '297833',
|
'uploader_id': '297833',
|
||||||
@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/8083783',
|
'url': 'http://rutube.ru/play/embed/8083783',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# private video
|
||||||
|
'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
embed_id = self._match_id(url)
|
embed_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, embed_id)
|
# Query may contain private videos token and should be passed to API
|
||||||
|
# requests (see #19163)
|
||||||
canonical_url = self._html_search_regex(
|
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
options = self._download_api_options(embed_id, query)
|
||||||
'Canonical URL')
|
video_id = options['effective_video']
|
||||||
return self.url_result(canonical_url, RutubeIE.ie_key())
|
formats = self._extract_formats(options, video_id)
|
||||||
|
info = self._download_and_extract_info(video_id, query)
|
||||||
|
info.update({
|
||||||
|
'extractor_key': 'Rutube',
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RutubePlaylistBaseIE(RutubeBaseIE):
|
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||||
@ -181,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
|||||||
video_url = url_or_none(result.get('video_url'))
|
video_url = url_or_none(result.get('video_url'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
entry = self._extract_video(result, require_title=False)
|
entry = self._extract_info(result, require_title=False)
|
||||||
entry.update({
|
entry.update({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -1,31 +1,44 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class ServusIE(InfoExtractor):
|
class ServusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
|
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
|
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
|
||||||
'md5': '046dee641cda1c4cabe13baef3be2c1c',
|
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AA-1T6VBU5PW1W12',
|
'id': 'AA-1T6VBU5PW1W12',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Die Grünen aus Volkssicht',
|
'title': 'Die Grünen aus Sicht des Volkes',
|
||||||
'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
|
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
|
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url).upper()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._search_regex(
|
||||||
|
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
|
||||||
|
webpage, 'title', default=None,
|
||||||
|
group='title') or self._og_search_title(webpage)
|
||||||
|
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
@ -61,7 +61,8 @@ class SixPlayIE(InfoExtractor):
|
|||||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for asset in clip_data['assets']:
|
assets = clip_data.get('assets') or []
|
||||||
|
for asset in assets:
|
||||||
asset_url = asset.get('full_physical_path')
|
asset_url = asset.get('full_physical_path')
|
||||||
protocol = asset.get('protocol')
|
protocol = asset.get('protocol')
|
||||||
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
||||||
|
@ -16,8 +16,10 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -34,7 +36,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||||
(?!stations/track)
|
(?!stations/track)
|
||||||
(?P<uploader>[\w\d-]+)/
|
(?P<uploader>[\w\d-]+)/
|
||||||
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||||
(?P<title>[\w\d-]+)/?
|
(?P<title>[\w\d-]+)/?
|
||||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||||
@ -50,12 +52,17 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '62986583',
|
'id': '62986583',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'upload_date': '20121011',
|
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||||
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
||||||
'uploader': 'E.T. ExTerrestrial Music',
|
'uploader': 'E.T. ExTerrestrial Music',
|
||||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
'timestamp': 1349920598,
|
||||||
|
'upload_date': '20121011',
|
||||||
'duration': 143,
|
'duration': 143,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# not streamable song
|
# not streamable song
|
||||||
@ -67,9 +74,14 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Goldrushed',
|
'title': 'Goldrushed',
|
||||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||||
'uploader': 'The Royal Concept',
|
'uploader': 'The Royal Concept',
|
||||||
|
'timestamp': 1337635207,
|
||||||
'upload_date': '20120521',
|
'upload_date': '20120521',
|
||||||
'duration': 227,
|
'duration': 30,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp
|
# rtmp
|
||||||
@ -84,11 +96,16 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'id': '123998367',
|
'id': '123998367',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||||
'uploader': 'jaimeMF',
|
|
||||||
'description': 'test chars: \"\'/\\ä↭',
|
'description': 'test chars: \"\'/\\ä↭',
|
||||||
|
'uploader': 'jaimeMF',
|
||||||
|
'timestamp': 1386604920,
|
||||||
'upload_date': '20131209',
|
'upload_date': '20131209',
|
||||||
'duration': 9,
|
'duration': 9,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link (alt format)
|
# private link (alt format)
|
||||||
@ -99,11 +116,16 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'id': '123998367',
|
'id': '123998367',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||||
'uploader': 'jaimeMF',
|
|
||||||
'description': 'test chars: \"\'/\\ä↭',
|
'description': 'test chars: \"\'/\\ä↭',
|
||||||
|
'uploader': 'jaimeMF',
|
||||||
|
'timestamp': 1386604920,
|
||||||
'upload_date': '20131209',
|
'upload_date': '20131209',
|
||||||
'duration': 9,
|
'duration': 9,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# downloadable song
|
# downloadable song
|
||||||
@ -116,9 +138,14 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Bus Brakes',
|
'title': 'Bus Brakes',
|
||||||
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
|
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
|
||||||
'uploader': 'oddsamples',
|
'uploader': 'oddsamples',
|
||||||
|
'timestamp': 1389232924,
|
||||||
'upload_date': '20140109',
|
'upload_date': '20140109',
|
||||||
'duration': 17,
|
'duration': 17,
|
||||||
'license': 'cc-by-sa',
|
'license': 'cc-by-sa',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link, downloadable format
|
# private link, downloadable format
|
||||||
@ -131,9 +158,14 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||||
'uploader': 'Ori Uplift Music',
|
'uploader': 'Ori Uplift Music',
|
||||||
|
'timestamp': 1504206263,
|
||||||
'upload_date': '20170831',
|
'upload_date': '20170831',
|
||||||
'duration': 7449,
|
'duration': 7449,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# no album art, use avatar pic for thumbnail
|
# no album art, use avatar pic for thumbnail
|
||||||
@ -146,10 +178,15 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Sideways (Prod. Mad Real)',
|
'title': 'Sideways (Prod. Mad Real)',
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'uploader': 'garyvee',
|
'uploader': 'garyvee',
|
||||||
|
'timestamp': 1488152409,
|
||||||
'upload_date': '20170226',
|
'upload_date': '20170226',
|
||||||
'duration': 207,
|
'duration': 207,
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -157,7 +194,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
|
_CLIENT_ID = 'NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
@ -175,22 +212,33 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
||||||
track_id = compat_str(info['id'])
|
track_id = compat_str(info['id'])
|
||||||
|
title = info['title']
|
||||||
name = full_title or track_id
|
name = full_title or track_id
|
||||||
if quiet:
|
if quiet:
|
||||||
self.report_extraction(name)
|
self.report_extraction(name)
|
||||||
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
|
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
|
||||||
if isinstance(thumbnail, compat_str):
|
if isinstance(thumbnail, compat_str):
|
||||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||||
|
username = try_get(info, lambda x: x['user']['username'], compat_str)
|
||||||
|
|
||||||
|
def extract_count(key):
|
||||||
|
return int_or_none(info.get('%s_count' % key))
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'uploader': info.get('user', {}).get('username'),
|
'uploader': username,
|
||||||
'upload_date': unified_strdate(info.get('created_at')),
|
'timestamp': unified_timestamp(info.get('created_at')),
|
||||||
'title': info['title'],
|
'title': title,
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': int_or_none(info.get('duration'), 1000),
|
'duration': int_or_none(info.get('duration'), 1000),
|
||||||
'webpage_url': info.get('permalink_url'),
|
'webpage_url': info.get('permalink_url'),
|
||||||
'license': info.get('license'),
|
'license': info.get('license'),
|
||||||
|
'view_count': extract_count('playback'),
|
||||||
|
'like_count': extract_count('favoritings'),
|
||||||
|
'comment_count': extract_count('comment'),
|
||||||
|
'repost_count': extract_count('reposts'),
|
||||||
|
'genre': info.get('genre'),
|
||||||
}
|
}
|
||||||
formats = []
|
formats = []
|
||||||
query = {'client_id': self._CLIENT_ID}
|
query = {'client_id': self._CLIENT_ID}
|
||||||
@ -368,7 +416,6 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
||||||
_API_BASE = 'https://api.soundcloud.com'
|
|
||||||
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
||||||
|
|
||||||
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||||
@ -389,21 +436,30 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
|||||||
next_href, playlist_id, 'Downloading track page %s' % (i + 1))
|
next_href, playlist_id, 'Downloading track page %s' % (i + 1))
|
||||||
|
|
||||||
collection = response['collection']
|
collection = response['collection']
|
||||||
if not collection:
|
|
||||||
break
|
|
||||||
|
|
||||||
def resolve_permalink_url(candidates):
|
if not isinstance(collection, list):
|
||||||
|
collection = []
|
||||||
|
|
||||||
|
# Empty collection may be returned, in this case we proceed
|
||||||
|
# straight to next_href
|
||||||
|
|
||||||
|
def resolve_entry(candidates):
|
||||||
for cand in candidates:
|
for cand in candidates:
|
||||||
if isinstance(cand, dict):
|
if not isinstance(cand, dict):
|
||||||
permalink_url = cand.get('permalink_url')
|
continue
|
||||||
entry_id = self._extract_id(cand)
|
permalink_url = url_or_none(cand.get('permalink_url'))
|
||||||
if permalink_url and permalink_url.startswith('http'):
|
if not permalink_url:
|
||||||
return permalink_url, entry_id
|
continue
|
||||||
|
return self.url_result(
|
||||||
|
permalink_url,
|
||||||
|
ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
|
||||||
|
video_id=self._extract_id(cand),
|
||||||
|
video_title=cand.get('title'))
|
||||||
|
|
||||||
for e in collection:
|
for e in collection:
|
||||||
permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
|
entry = resolve_entry((e, e.get('track'), e.get('playlist')))
|
||||||
if permalink_url:
|
if entry:
|
||||||
entries.append(self.url_result(permalink_url, video_id=entry_id))
|
entries.append(entry)
|
||||||
|
|
||||||
next_href = response.get('next_href')
|
next_href = response.get('next_href')
|
||||||
if not next_href:
|
if not next_href:
|
||||||
@ -429,46 +485,53 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
|||||||
(?:(?:www|m)\.)?soundcloud\.com/
|
(?:(?:www|m)\.)?soundcloud\.com/
|
||||||
(?P<user>[^/]+)
|
(?P<user>[^/]+)
|
||||||
(?:/
|
(?:/
|
||||||
(?P<rsrc>tracks|sets|reposts|likes|spotlight)
|
(?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
|
||||||
)?
|
)?
|
||||||
/?(?:[?#].*)?$
|
/?(?:[?#].*)?$
|
||||||
'''
|
'''
|
||||||
IE_NAME = 'soundcloud:user'
|
IE_NAME = 'soundcloud:user'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler',
|
'url': 'https://soundcloud.com/soft-cell-official',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '207965082',
|
||||||
'title': 'The Akashic Chronicler (All)',
|
'title': 'Soft Cell (All)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 74,
|
'playlist_mincount': 28,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
'url': 'https://soundcloud.com/soft-cell-official/tracks',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '207965082',
|
||||||
'title': 'The Akashic Chronicler (Tracks)',
|
'title': 'Soft Cell (Tracks)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 37,
|
'playlist_mincount': 27,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
|
'url': 'https://soundcloud.com/soft-cell-official/albums',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '207965082',
|
||||||
'title': 'The Akashic Chronicler (Playlists)',
|
'title': 'Soft Cell (Albums)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/jcv246/sets',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12982173',
|
||||||
|
'title': 'Jordi / cv (Playlists)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
|
'url': 'https://soundcloud.com/jcv246/reposts',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '12982173',
|
||||||
'title': 'The Akashic Chronicler (Reposts)',
|
'title': 'Jordi / cv (Reposts)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 6,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
'url': 'https://soundcloud.com/clalberg/likes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '11817582',
|
||||||
'title': 'The Akashic Chronicler (Likes)',
|
'title': 'clalberg (Likes)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 321,
|
'playlist_mincount': 5,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -479,10 +542,11 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_BASE_URL_MAP = {
|
_BASE_URL_MAP = {
|
||||||
'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
|
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
|
'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
}
|
}
|
||||||
@ -490,6 +554,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
|||||||
_TITLE_MAP = {
|
_TITLE_MAP = {
|
||||||
'all': 'All',
|
'all': 'All',
|
||||||
'tracks': 'Tracks',
|
'tracks': 'Tracks',
|
||||||
|
'albums': 'Albums',
|
||||||
'sets': 'Playlists',
|
'sets': 'Playlists',
|
||||||
'reposts': 'Reposts',
|
'reposts': 'Reposts',
|
||||||
'likes': 'Likes',
|
'likes': 'Likes',
|
||||||
|
@ -5,14 +5,17 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SpankBangIE(InfoExtractor):
|
class SpankBangIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||||
@ -41,29 +44,71 @@ class SpankBangIE(InfoExtractor):
|
|||||||
# 4k
|
# 4k
|
||||||
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.spankbang.com/3vvn/play',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://spankbang.com/2y3td/embed/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id, headers={
|
webpage = self._download_webpage(
|
||||||
'Cookie': 'country=US'
|
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||||
})
|
video_id, headers={'Cookie': 'country=US'})
|
||||||
|
|
||||||
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s is not available' % video_id, expected=True)
|
'Video %s is not available' % video_id, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for mobj in re.finditer(
|
|
||||||
r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
def extract_format(format_id, format_url):
|
||||||
webpage):
|
f_url = url_or_none(format_url)
|
||||||
format_id, format_url = mobj.group('id', 'url')
|
if not f_url:
|
||||||
|
return
|
||||||
f = parse_resolution(format_id)
|
f = parse_resolution(format_id)
|
||||||
f.update({
|
f.update({
|
||||||
'url': format_url,
|
'url': f_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
STREAM_URL_PREFIX = 'stream_url_'
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
|
||||||
|
% STREAM_URL_PREFIX, webpage):
|
||||||
|
extract_format(mobj.group('id', 'url'))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
stream_key = self._search_regex(
|
||||||
|
r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'stream key', group='value')
|
||||||
|
|
||||||
|
sb_csrf_session = self._get_cookies(
|
||||||
|
'https://spankbang.com')['sb_csrf_session'].value
|
||||||
|
|
||||||
|
stream = self._download_json(
|
||||||
|
'https://spankbang.com/api/videos/stream', video_id,
|
||||||
|
'Downloading stream JSON', data=urlencode_postdata({
|
||||||
|
'id': stream_key,
|
||||||
|
'data': 0,
|
||||||
|
'sb_csrf_session': sb_csrf_session,
|
||||||
|
}), headers={
|
||||||
|
'Referer': url,
|
||||||
|
'X-CSRFToken': sb_csrf_session,
|
||||||
|
})
|
||||||
|
|
||||||
|
for format_id, format_url in stream.items():
|
||||||
|
if format_id.startswith(STREAM_URL_PREFIX):
|
||||||
|
extract_format(
|
||||||
|
format_id[len(STREAM_URL_PREFIX):], format_url)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
@ -94,3 +139,33 @@ class SpankBangIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SpankBangPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ug0k',
|
||||||
|
'title': 'Big Ass Titties',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||||
|
|
||||||
|
entries = [self.url_result(
|
||||||
|
'https://spankbang.com/%s/video' % video_id,
|
||||||
|
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||||
|
for video_id in orderedSet(re.findall(
|
||||||
|
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
@ -27,6 +27,7 @@ class TeachableBaseIE(InfoExtractor):
|
|||||||
'market.saleshacker.com': 'saleshacker',
|
'market.saleshacker.com': 'saleshacker',
|
||||||
'learnability.org': 'learnability',
|
'learnability.org': 'learnability',
|
||||||
'edurila.com': 'edurila',
|
'edurila.com': 'edurila',
|
||||||
|
'courses.workitdaily.com': 'workitdaily',
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
|
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
|
||||||
|
@ -3,22 +3,19 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .radiocanada import RadioCanadaIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
urlencode_postdata,
|
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
smuggle_url,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TouTvIE(InfoExtractor):
|
class TouTvIE(RadioCanadaIE):
|
||||||
_NETRC_MACHINE = 'toutv'
|
_NETRC_MACHINE = 'toutv'
|
||||||
IE_NAME = 'tou.tv'
|
IE_NAME = 'tou.tv'
|
||||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
|
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
|
||||||
_access_token = None
|
|
||||||
_claims = None
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
||||||
@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor):
|
|||||||
email, password = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
if email is None:
|
if email is None:
|
||||||
return
|
return
|
||||||
state = 'http://ici.tou.tv/'
|
|
||||||
webpage = self._download_webpage(state, None, 'Downloading homepage')
|
|
||||||
toutvlogin = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
|
|
||||||
authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
|
|
||||||
login_webpage = self._download_webpage(
|
login_webpage = self._download_webpage(
|
||||||
authorize_url, None, 'Downloading login page', query={
|
'https://services.radio-canada.ca/auth/oauth/v2/authorize',
|
||||||
'client_id': toutvlogin['clientId'],
|
None, 'Downloading login page', query={
|
||||||
'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
|
'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36',
|
||||||
|
'redirect_uri': 'https://ici.tou.tv/logincallback',
|
||||||
'response_type': 'token',
|
'response_type': 'token',
|
||||||
'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
|
'scope': 'id.write media-validation.read',
|
||||||
'state': state,
|
'state': '/',
|
||||||
})
|
})
|
||||||
|
|
||||||
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
|
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
|
||||||
@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor):
|
|||||||
self._access_token = self._search_regex(
|
self._access_token = self._search_regex(
|
||||||
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||||
urlh.geturl(), 'access token')
|
urlh.geturl(), 'access token')
|
||||||
self._claims = self._download_json(
|
self._claims = self._call_api('validation/v2/getClaims')['claims']
|
||||||
'https://services.radio-canada.ca/media/validation/v2/getClaims',
|
|
||||||
None, 'Extracting Claims', query={
|
|
||||||
'token': self._access_token,
|
|
||||||
'access_token': self._access_token,
|
|
||||||
})['claims']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path = self._match_id(url)
|
path = self._match_id(url)
|
||||||
@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor):
|
|||||||
self.report_warning('This video is probably DRM protected.', path)
|
self.report_warning('This video is probably DRM protected.', path)
|
||||||
video_id = metadata['IdMedia']
|
video_id = metadata['IdMedia']
|
||||||
details = metadata['Details']
|
details = metadata['Details']
|
||||||
title = details['OriginalTitle']
|
|
||||||
video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
|
|
||||||
if self._access_token and self._claims:
|
|
||||||
video_url = smuggle_url(video_url, {
|
|
||||||
'access_token': self._access_token,
|
|
||||||
'claims': self._claims,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return merge_dicts({
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': video_url,
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': details.get('OriginalTitle'),
|
||||||
'thumbnail': details.get('ImageUrl'),
|
'thumbnail': details.get('ImageUrl'),
|
||||||
'duration': int_or_none(details.get('LengthInSeconds')),
|
'duration': int_or_none(details.get('LengthInSeconds')),
|
||||||
}
|
}, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
|
||||||
|
75
youtube_dl/extractor/trunews.py
Normal file
75
youtube_dl/extractor/trunews.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TruNewsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
|
||||||
|
'md5': 'a19c024c3906ff954fac9b96ce66bb08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5c5a21e65d3c196e1c0020cc',
|
||||||
|
'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
|
||||||
|
'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
|
||||||
|
'duration': 3685,
|
||||||
|
'timestamp': 1549411440,
|
||||||
|
'upload_date': '20190206',
|
||||||
|
},
|
||||||
|
'add_ie': ['Zype'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://api.zype.com/videos', display_id, query={
|
||||||
|
'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
|
||||||
|
'per_page': 1,
|
||||||
|
'active': 'true',
|
||||||
|
'friendly_title': display_id,
|
||||||
|
})['response'][0]
|
||||||
|
|
||||||
|
zype_id = video['_id']
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
thumbnails_list = video.get('thumbnails')
|
||||||
|
if isinstance(thumbnails_list, list):
|
||||||
|
for thumbnail in thumbnails_list:
|
||||||
|
if not isinstance(thumbnail, dict):
|
||||||
|
continue
|
||||||
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': update_url_query(
|
||||||
|
'https://player.zype.com/embed/%s.js' % zype_id,
|
||||||
|
{'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
|
||||||
|
'ie_key': 'Zype',
|
||||||
|
'id': zype_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'description': dict_get(video, ('description', 'ott_description', 'short_description')),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(video.get('published_at')),
|
||||||
|
'average_rating': float_or_none(video.get('rating')),
|
||||||
|
'view_count': int_or_none(video.get('request_count')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
@ -4,44 +4,72 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .turner import TurnerBaseIE
|
from .turner import TurnerBaseIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TruTVIE(TurnerBaseIE):
|
class TruTVIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))'
|
_VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
|
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
|
||||||
'md5': '2cdc844f317579fed1a7251b087ff417',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
|
'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'You Won\'t Believe These Sports Bets',
|
'title': 'Sunlight-Activated Flower',
|
||||||
'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
|
'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
|
||||||
'upload_date': '20130305',
|
},
|
||||||
}
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
auth_required = False
|
|
||||||
if path:
|
if video_id:
|
||||||
data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
|
path = 'episode'
|
||||||
|
display_id = video_id
|
||||||
else:
|
else:
|
||||||
webpage = self._download_webpage(url, video_id)
|
path = 'series/clip'
|
||||||
video_id = self._search_regex(
|
display_id = clip_slug
|
||||||
r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
|
|
||||||
webpage, 'video id', default=video_id)
|
data = self._download_json(
|
||||||
auth_required = self._search_regex(
|
'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
|
||||||
r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
|
display_id)
|
||||||
webpage, 'auth required', default='false') == 'true'
|
video_data = data['episode'] if video_id else data['info']
|
||||||
data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
|
media_id = video_data['mediaId']
|
||||||
return self._extract_cvp_info(
|
title = video_data['title'].strip()
|
||||||
data_src, path, {
|
|
||||||
'secure': {
|
info = self._extract_ngtv_info(
|
||||||
'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
|
media_id, {}, {
|
||||||
'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': url,
|
'url': url,
|
||||||
'site_name': 'truTV',
|
'site_name': 'truTV',
|
||||||
'auth_required': auth_required,
|
'auth_required': video_data.get('isAuthRequired'),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for image in video_data.get('images', []):
|
||||||
|
image_url = image.get('srcUrl')
|
||||||
|
if not image_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'id': media_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': parse_iso8601(video_data.get('publicationDate')),
|
||||||
|
'series': video_data.get('showTitle'),
|
||||||
|
'season_number': int_or_none(video_data.get('seasonNum')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeNum')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
clean_html,
|
clean_html,
|
||||||
get_element_by_attribute,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
get_element_by_attribute,
|
||||||
|
orderedSet,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -19,12 +21,12 @@ class TVPIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
|
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
|
||||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '194536',
|
'id': '194536',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Czas honoru, I seria – odc. 13',
|
'title': 'Czas honoru, odc. 13 – Władek',
|
||||||
'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
|
'description': 'md5:437f48b93558370b031740546b696e24',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||||
@ -45,6 +47,7 @@ class TVPIE(InfoExtractor):
|
|||||||
'title': 'Wiadomości, 28.09.2017, 19:30',
|
'title': 'Wiadomości, 28.09.2017, 19:30',
|
||||||
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
|
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -75,8 +78,10 @@ class TVPIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'tvp:' + video_id,
|
'url': 'tvp:' + video_id,
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': self._og_search_description(
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'description', webpage, default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'ie_key': 'TVPEmbed',
|
'ie_key': 'TVPEmbed',
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,6 +92,15 @@ class TVPEmbedIE(InfoExtractor):
|
|||||||
_VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)'
|
_VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'tvp:194536',
|
||||||
|
'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '194536',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Czas honoru, odc. 13 – Władek',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# not available
|
||||||
'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268',
|
'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268',
|
||||||
'md5': '8c9cd59d16edabf39331f93bf8a766c7',
|
'md5': '8c9cd59d16edabf39331f93bf8a766c7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -94,6 +108,7 @@ class TVPEmbedIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Panorama, 07.12.2015, 15:40',
|
'title': 'Panorama, 07.12.2015, 15:40',
|
||||||
},
|
},
|
||||||
|
'skip': 'Transmisja została zakończona lub materiał niedostępny',
|
||||||
}, {
|
}, {
|
||||||
'url': 'tvp:22670268',
|
'url': 'tvp:22670268',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -105,10 +120,13 @@ class TVPEmbedIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
||||||
|
|
||||||
error_massage = get_element_by_attribute('class', 'msg error', webpage)
|
error = self._html_search_regex(
|
||||||
if error_massage:
|
r'(?s)<p[^>]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)</p>',
|
||||||
|
webpage, 'error', default=None) or clean_html(
|
||||||
|
get_element_by_attribute('class', 'msg error', webpage))
|
||||||
|
if error:
|
||||||
raise ExtractorError('%s said: %s' % (
|
raise ExtractorError('%s said: %s' % (
|
||||||
self.IE_NAME, clean_html(error_massage)), expected=True)
|
self.IE_NAME, clean_html(error)), expected=True)
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
|
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
|
||||||
@ -180,48 +198,55 @@ class TVPEmbedIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TVPSeriesIE(InfoExtractor):
|
class TVPWebsiteIE(InfoExtractor):
|
||||||
IE_NAME = 'tvp:series'
|
IE_NAME = 'tvp:series'
|
||||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
|
_VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem',
|
# series
|
||||||
|
'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Ogniem i mieczem',
|
'id': '38678312',
|
||||||
'id': '4278026',
|
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 115,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat',
|
# film
|
||||||
|
'url': 'https://vod.tvp.pl/website/gloria,35139666',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Boso przez świat',
|
'id': '36637049',
|
||||||
'id': '9329207',
|
'ext': 'mp4',
|
||||||
|
'title': 'Gloria, Gloria',
|
||||||
},
|
},
|
||||||
'playlist_count': 86,
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['TVPEmbed'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _entries(self, display_id, playlist_id):
|
||||||
|
url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id)
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
page = self._download_webpage(
|
||||||
|
url, display_id, 'Downloading page %d' % page_num,
|
||||||
|
query={'page': page_num})
|
||||||
|
|
||||||
|
video_ids = orderedSet(re.findall(
|
||||||
|
r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id,
|
||||||
|
page))
|
||||||
|
|
||||||
|
if not video_ids:
|
||||||
|
break
|
||||||
|
|
||||||
|
for video_id in video_ids:
|
||||||
|
yield self.url_result(
|
||||||
|
'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(),
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
webpage = self._download_webpage(url, display_id, tries=5)
|
display_id, playlist_id = mobj.group('display_id', 'id')
|
||||||
|
return self.playlist_result(
|
||||||
title = self._html_search_regex(
|
self._entries(display_id, playlist_id), playlist_id)
|
||||||
r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series')
|
|
||||||
playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id')
|
|
||||||
playlist = self._download_webpage(
|
|
||||||
'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend'
|
|
||||||
'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5,
|
|
||||||
note='Downloading playlist')
|
|
||||||
|
|
||||||
videos_paths = re.findall(
|
|
||||||
'(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
|
|
||||||
entries = [
|
|
||||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
|
|
||||||
for v_path in videos_paths]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': playlist_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
||||||
|
@ -493,10 +493,9 @@ class TVPlayHomeIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
|
r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
|
||||||
default=None)
|
|
||||||
|
|
||||||
if video_id:
|
if len(video_id) < 8:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
@ -537,8 +536,9 @@ class TVPlayHomeIE(InfoExtractor):
|
|||||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||||
default=None))
|
default=None))
|
||||||
episode = self._search_regex(
|
episode = self._search_regex(
|
||||||
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
|
(r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
default=None, group='value')
|
r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||||
|
'episode', default=None, group='value')
|
||||||
episode_number = int_or_none(self._search_regex(
|
episode_number = int_or_none(self._search_regex(
|
||||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||||
default=None))
|
default=None))
|
||||||
|
@ -136,7 +136,12 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
source = next(f for f in formats if f['format_id'] == 'Source')
|
source = next(f for f in formats if f['format_id'] == 'Source')
|
||||||
source['preference'] = 10
|
source['preference'] = 10
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass # No Source stream present
|
for f in formats:
|
||||||
|
if '/chunked/' in f['url']:
|
||||||
|
f.update({
|
||||||
|
'source_preference': 10,
|
||||||
|
'format_note': 'Source',
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
IE_NAME = 'udemy'
|
IE_NAME = 'udemy'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
www\.udemy\.com/
|
(?:[^/]+\.)?udemy\.com/
|
||||||
(?:
|
(?:
|
||||||
[^#]+\#/lecture/|
|
[^#]+\#/lecture/|
|
||||||
lecture/view/?\?lectureId=|
|
lecture/view/?\?lectureId=|
|
||||||
@ -64,6 +64,9 @@ class UdemyIE(InfoExtractor):
|
|||||||
# only outputs rendition
|
# only outputs rendition
|
||||||
'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
|
'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_course_info(self, webpage, video_id):
|
def _extract_course_info(self, webpage, video_id):
|
||||||
@ -123,10 +126,22 @@ class UdemyIE(InfoExtractor):
|
|||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
headers = kwargs.get('headers', {}).copy()
|
headers = kwargs.get('headers', {}).copy()
|
||||||
headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
|
headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
|
||||||
kwargs['headers'] = headers
|
kwargs['headers'] = headers
|
||||||
return super(UdemyIE, self)._download_webpage_handle(
|
ret = super(UdemyIE, self)._download_webpage_handle(
|
||||||
*args, **compat_kwargs(kwargs))
|
*args, **compat_kwargs(kwargs))
|
||||||
|
if not ret:
|
||||||
|
return ret
|
||||||
|
webpage, _ = ret
|
||||||
|
if any(p in webpage for p in (
|
||||||
|
'>Please verify you are a human',
|
||||||
|
'Access to this page has been denied because we believe you are using automation tools to browse the website',
|
||||||
|
'"_pxCaptcha"')):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Udemy asks you to solve a CAPTCHA. Login with browser, '
|
||||||
|
'solve CAPTCHA, then export cookies and pass cookie file to '
|
||||||
|
'youtube-dl with --cookies.', expected=True)
|
||||||
|
return ret
|
||||||
|
|
||||||
def _download_json(self, url_or_request, *args, **kwargs):
|
def _download_json(self, url_or_request, *args, **kwargs):
|
||||||
headers = {
|
headers = {
|
||||||
@ -403,8 +418,14 @@ class UdemyIE(InfoExtractor):
|
|||||||
|
|
||||||
class UdemyCourseIE(UdemyIE):
|
class UdemyCourseIE(UdemyIE):
|
||||||
IE_NAME = 'udemy:course'
|
IE_NAME = 'udemy:course'
|
||||||
_VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = []
|
_TESTS = [{
|
||||||
|
'url': 'https://www.udemy.com/java-tutorial/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://wipro.udemy.com/java-tutorial/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
|
@ -502,7 +502,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
orig_url = url
|
orig_url = url
|
||||||
if mobj.group('pro') or mobj.group('player'):
|
if mobj.group('pro'):
|
||||||
|
# some videos require portfolio_id to be present in player url
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/20070
|
||||||
|
url = self._extract_url(url, self._download_webpage(url, video_id))
|
||||||
|
elif mobj.group('player'):
|
||||||
url = 'https://player.vimeo.com/video/' + video_id
|
url = 'https://player.vimeo.com/video/' + video_id
|
||||||
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
|
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
|
||||||
url = 'https://vimeo.com/' + video_id
|
url = 'https://vimeo.com/' + video_id
|
||||||
|
@ -1,123 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
parse_duration,
|
|
||||||
str_to_int,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VpornIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
|
|
||||||
'md5': 'facf37c1b86546fa0208058546842c55',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '497944',
|
|
||||||
'display_id': 'violet-on-her-th-birthday',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Violet on her 19th birthday',
|
|
||||||
'description': 'Violet dances in front of the camera which is sure to get you horny.',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'kileyGrope',
|
|
||||||
'categories': ['Masturbation', 'Teen'],
|
|
||||||
'duration': 393,
|
|
||||||
'age_limit': 18,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'skip': 'video removed',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.vporn.com/female/hana-shower/523564/',
|
|
||||||
'md5': 'ced35a4656198a1664cf2cda1575a25f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '523564',
|
|
||||||
'display_id': 'hana-shower',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Hana Shower',
|
|
||||||
'description': 'Hana showers at the bathroom.',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'Hmmmmm',
|
|
||||||
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
|
|
||||||
'duration': 588,
|
|
||||||
'age_limit': 18,
|
|
||||||
'view_count': int,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!'
|
|
||||||
if errmsg in webpage:
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'class="(?:descr|description_txt)">(.*?)</div>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
|
|
||||||
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
|
|
||||||
default=None))
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
|
|
||||||
webpage, 'uploader', fatal=False)
|
|
||||||
|
|
||||||
categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
|
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
|
||||||
r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
|
|
||||||
webpage, 'duration', fatal=False))
|
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
|
||||||
r'class="views">([\d,\.]+) [Vv]iews<',
|
|
||||||
webpage, 'view count', fatal=False))
|
|
||||||
comment_count = str_to_int(self._html_search_regex(
|
|
||||||
r"'Comments \(([\d,\.]+)\)'",
|
|
||||||
webpage, 'comment count', default=None))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
|
|
||||||
video_url = video[1]
|
|
||||||
fmt = {
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': video[0],
|
|
||||||
}
|
|
||||||
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
|
|
||||||
if m:
|
|
||||||
fmt.update({
|
|
||||||
'width': int(m.group('width')),
|
|
||||||
'height': int(m.group('height')),
|
|
||||||
'vbr': int(m.group('vbr')),
|
|
||||||
})
|
|
||||||
formats.append(fmt)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'uploader': uploader,
|
|
||||||
'categories': categories,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
'age_limit': 18,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
@ -48,7 +48,7 @@ class VShareIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
|
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
|
||||||
video_id)
|
video_id, headers={'Referer': url})
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^<]+)</title>', webpage, 'title')
|
r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
@ -352,6 +352,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:www\.)?yourepeat\.com/|
|
(?:www\.)?yourepeat\.com/|
|
||||||
tube\.majestyc\.net/|
|
tube\.majestyc\.net/|
|
||||||
(?:www\.)?invidio\.us/|
|
(?:www\.)?invidio\.us/|
|
||||||
|
(?:www\.)?invidious\.snopyta\.org/|
|
||||||
|
(?:www\.)?invidious\.kabi\.tk/|
|
||||||
|
(?:www\.)?vid\.wxzm\.sx/|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
|
@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
|
|||||||
])
|
])
|
||||||
|
|
||||||
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
||||||
JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
|
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
|
||||||
|
|
||||||
|
|
||||||
def preferredencoding():
|
def preferredencoding():
|
||||||
@ -1141,6 +1141,8 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
|||||||
|
|
||||||
|
|
||||||
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
||||||
|
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||||
|
|
||||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
# Store session cookies with `expires` set to 0 instead of an empty
|
# Store session cookies with `expires` set to 0 instead of an empty
|
||||||
# string
|
# string
|
||||||
@ -1150,7 +1152,21 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
|||||||
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
|
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
|
||||||
|
|
||||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
|
"""Load cookies from a file."""
|
||||||
|
if filename is None:
|
||||||
|
if self.filename is not None:
|
||||||
|
filename = self.filename
|
||||||
|
else:
|
||||||
|
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
|
cf = io.StringIO()
|
||||||
|
with open(filename) as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith(self._HTTPONLY_PREFIX):
|
||||||
|
line = line[len(self._HTTPONLY_PREFIX):]
|
||||||
|
cf.write(compat_str(line))
|
||||||
|
cf.seek(0)
|
||||||
|
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||||
# Session cookies are denoted by either `expires` field set to
|
# Session cookies are denoted by either `expires` field set to
|
||||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||||
# (see [1]). So we need force the latter to be recognized as session
|
# (see [1]). So we need force the latter to be recognized as session
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2019.01.30.1'
|
__version__ = '2019.03.01'
|
||||||
|
Loading…
Reference in New Issue
Block a user