1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2025-01-07 13:47:54 +01:00

Merge branch 'master' into fix-zing-mp3

This commit is contained in:
Thinh Nguyen 2018-08-28 19:08:27 -04:00
commit ac4fdaa75a
No known key found for this signature in database
GPG Key ID: C565BDA2570826DE
99 changed files with 2158 additions and 713 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.08.28*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.10** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.08.28**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.07.10 [debug] youtube-dl version 2018.08.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

3
.gitignore vendored
View File

@ -48,3 +48,6 @@ youtube-dl.zsh
tmp/ tmp/
venv/ venv/
# VS Code related files
.vscode

View File

@ -239,3 +239,10 @@ Martin Weinelt
Surya Oktafendri Surya Oktafendri
TingPing TingPing
Alexandre Macabies Alexandre Macabies
Bastian de Groot
Niklas Haas
András Veres-Szentkirályi
Enes Solak
Nathan Rossi
Thomas van der Berg
Luca Cherubin

View File

@ -1,3 +1,94 @@
version 2018.08.28
Extractors
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
(#17361)
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
* [webofstories:playlist] Fix extraction (#16914)
+ [tvplayhome] Add support for new tvplay URLs (#17344)
+ [generic] Allow relative src for videojs embeds (#17324)
+ [xfileshare] Add support for vidto.se (#17317)
+ [vidzi] Add support for vidzi.nu (#17316)
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
version 2018.08.22
Core
* [utils] Use pure browser header for User-Agent (#17236)
Extractors
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
+ [yourporn] Add support for yourporn.sexy (#17298)
+ [go] Add support for disneynow.go.com (#16299, #17264)
+ [6play] Add support for play.rtl.hr (#17249)
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
(#16788, #17254)
* [lci] Fix extraction (#17274)
* [bbccouk] Extend id URL regular expression (#17270)
* [cwtv] Fix extraction (#17256)
* [nova] Fix extraction (#17241)
+ [generic] Add support for expressen embeds
* [raywenderlich] Adapt to site redesign (#17225)
+ [redbulltv] Add support redbull.com tv URLs (#17218)
+ [bitchute] Add support for bitchute.com (#14052)
+ [clyp] Add support for token protected media (#17184)
* [imdb] Fix extension extraction (#17167)
version 2018.08.04
Extractors
* [funk:channel] Improve byChannelAlias extraction (#17142)
* [twitch] Fix authentication (#17024, #17126)
* [twitch:vod] Improve URL regular expression (#17135)
* [watchbox] Fix extraction (#17107)
* [pbs] Fix extraction (#17109)
* [theplatform] Relax URL regular expression (#16181, #17097)
+ [viqeo] Add support for viqeo.tv (#17066)
version 2018.07.29
Extractors
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
+ [pornhub] Add support for subtitles (#16924, #17088)
* [ceskatelevize] Use https for API call (#16997, #16999)
* [dailymotion:playlist] Fix extraction (#16894)
* [ted] Improve extraction
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
* [telecinco] Fix extraction (#17080)
* [mitele] Reduce number of requests
* [rai] Return non HTTP relinker URL intact (#17055)
* [vk] Fix extraction for inline only videos (#16923)
* [streamcloud] Fix extraction (#17054)
* [facebook] Fix tahoe player extraction with authentication (#16655)
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
version 2018.07.21
Core
+ [utils] Introduce url_or_none
* [utils] Allow JSONP without function name (#17028)
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
Extractors
+ [bbc] Add support for BBC Radio Play pages (#17022)
* [iwara] Fix download URLs (#17026)
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
+ [viu] Pass Referer and Origin headers and area id (#16992)
+ [vimeo] Add another config regular expression (#17013)
+ [facebook] Extract view count (#16942)
* [dailymotion] Improve description extraction (#16984)
* [slutload] Fix and improve extraction (#17001)
* [mediaset] Fix extraction (#16977)
+ [theplatform] Add support for theplatform TLD customization (#16977)
* [imgur] Relax URL regular expression (#16987)
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
#16959)
version 2018.07.10 version 2018.07.10
Core Core

View File

@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.

View File

@ -108,6 +108,8 @@
- **BiliBili** - **BiliBili**
- **BioBioChileTV** - **BioBioChileTV**
- **BIQLE** - **BIQLE**
- **BitChute**
- **BitChuteChannel**
- **BleacherReport** - **BleacherReport**
- **BleacherReportCMS** - **BleacherReportCMS**
- **blinkx** - **blinkx**
@ -405,6 +407,7 @@
- **Ketnet** - **Ketnet**
- **KhanAcademy** - **KhanAcademy**
- **KickStarter** - **KickStarter**
- **KinoPoisk**
- **KonserthusetPlay** - **KonserthusetPlay**
- **kontrtube**: KontrTube.ru - Труба зовёт - **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью - **KrasView**: Красвью
@ -577,6 +580,7 @@
- **Normalboots** - **Normalboots**
- **NosVideo** - **NosVideo**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **NovaEmbed**
- **nowness** - **nowness**
- **nowness:playlist** - **nowness:playlist**
- **nowness:series** - **nowness:series**
@ -672,6 +676,8 @@
- **PrimeShareTV** - **PrimeShareTV**
- **PromptFile** - **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital - **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
- **puhutv:serie**
- **Puls4** - **Puls4**
- **Pyvideo** - **Pyvideo**
- **qqmusic**: QQ音乐 - **qqmusic**: QQ音乐
@ -694,6 +700,7 @@
- **RaiPlayLive** - **RaiPlayLive**
- **RaiPlayPlaylist** - **RaiPlayPlaylist**
- **RayWenderlich** - **RayWenderlich**
- **RayWenderlichCourse**
- **RBMARadio** - **RBMARadio**
- **RDS**: RDS.ca - **RDS**: RDS.ca
- **RedBullTV** - **RedBullTV**
@ -910,6 +917,7 @@
- **tvp:embed**: Telewizja Polska - **tvp:embed**: Telewizja Polska
- **tvp:series** - **tvp:series**
- **TVPlayer** - **TVPlayer**
- **TVPlayHome**
- **Tweakers** - **Tweakers**
- **twitch:chapter** - **twitch:chapter**
- **twitch:clips** - **twitch:clips**
@ -999,6 +1007,7 @@
- **Vimple**: Vimple - one-click video hosting - **Vimple**: Vimple - one-click video hosting
- **Vine** - **Vine**
- **vine:user** - **vine:user**
- **Viqeo**
- **Viu** - **Viu**
- **viu:ott** - **viu:ott**
- **viu:playlist** - **viu:playlist**
@ -1090,6 +1099,7 @@
- **YouNowLive** - **YouNowLive**
- **YouNowMoment** - **YouNowMoment**
- **YouPorn** - **YouPorn**
- **YourPorn**
- **YourUpload** - **YourUpload**
- **youtube**: YouTube.com - **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels - **youtube:channel**: YouTube.com channels

View File

@ -78,6 +78,7 @@ from youtube_dl.utils import (
uppercase_escape, uppercase_escape,
lowercase_escape, lowercase_escape,
url_basename, url_basename,
url_or_none,
base_url, base_url,
urljoin, urljoin,
urlencode_postdata, urlencode_postdata,
@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
def test_url_or_none(self):
self.assertEqual(url_or_none(None), None)
self.assertEqual(url_or_none(''), None)
self.assertEqual(url_or_none('foo'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
self.assertEqual(url_or_none('http$://foo.de'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None) self.assertEqual(parse_age_limit(False), None)
@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'}) self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
strip_or_none, strip_or_none,
url_or_none,
) )
@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
if not video_id: if not video_id:
entries = [] entries = []
for episode in video_data.get('archiveEpisodes', []): for episode in video_data.get('archiveEpisodes', []):
episode_url = episode.get('url') episode_url = url_or_none(episode.get('url'))
if not episode_url: if not episode_url:
continue continue
entries.append(self.url_result( entries.append(self.url_result(

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
url_or_none,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
) )
@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
file_elements = video_element.findall(compat_xpath('./file')) file_elements = video_element.findall(compat_xpath('./file'))
one = len(file_elements) == 1 one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1): for file_num, file_element in enumerate(file_elements, start=1):
file_url = file_element.text file_url = url_or_none(file_element.text)
if not file_url: if not file_url:
continue continue
key = file_element.get('key', '') key = file_element.get('key', '')

View File

@ -3,11 +3,12 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none,
parse_iso8601,
mimetype2ext,
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none,
mimetype2ext,
parse_iso8601,
url_or_none,
) )
@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
media_thumbnail = [media_thumbnail] media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail: for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data.get('@attributes', {}) thumbnail = thumbnail_data.get('@attributes', {})
thumbnail_url = thumbnail.get('url') thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url: if not thumbnail_url:
continue continue
thumbnails.append({ thumbnails.append({
@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
media_subtitle = [media_subtitle] media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle: for subtitle_data in media_subtitle:
subtitle = subtitle_data.get('@attributes', {}) subtitle = subtitle_data.get('@attributes', {})
subtitle_href = subtitle.get('href') subtitle_href = url_or_none(subtitle.get('href'))
if not subtitle_href: if not subtitle_href:
continue continue
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({ subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
media_content = [media_content] media_content = [media_content]
for media_data in media_content: for media_data in media_content:
media = media_data.get('@attributes', {}) media = media_data.get('@attributes', {})
media_url = media.get('url') media_url = url_or_none(media.get('url'))
if not media_url: if not media_url:
continue continue
ext = mimetype2ext(media.get('type')) or determine_ext(media_url) ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
else: else:
formats.append({ formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
'url': media['url'], 'url': media_url,
'tbr': int_or_none(media.get('bitrate')), 'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')), 'filesize': int_or_none(media.get('fileSize')),
'ext': ext, 'ext': ext,

View File

@ -8,6 +8,7 @@ from ..utils import (
determine_ext, determine_ext,
extract_attributes, extract_attributes,
ExtractorError, ExtractorError,
url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
}, fatal=False) }, fatal=False)
if not playlist: if not playlist:
continue continue
stream_url = playlist.get('streamurl') stream_url = url_or_none(playlist.get('streamurl'))
if stream_url: if stream_url:
rtmp = re.search( rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)', r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',

View File

@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
} }
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1' _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
_TESTS = [{
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
'info_dict': {
'id': '4465496',
'ext': 'mp4',
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
'duration': 22,
'timestamp': 1534855680,
'upload_date': '20180821',
'uploader': 'ANV',
},
'params': {
'skip_download': True,
},
}, {
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
'only_matching': True,
}]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(AnvatoIE, self).__init__(*args, **kwargs) super(AnvatoIE, self).__init__(*args, **kwargs)
self.__server_time = None self.__server_time = None
@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
'api': { 'api': {
'anvrid': anvrid, 'anvrid': anvrid,
'anvstk': md5_text('%s|%s|%d|%s' % ( 'anvstk': md5_text('%s|%s|%d|%s' % (
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])), access_key, anvrid, server_time,
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
'anvts': server_time, 'anvts': server_time,
}, },
} }
@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id') access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE: if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key] access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
access_key) or access_key
return self._get_anvato_videos(access_key, video_id) return self._get_anvato_videos(access_key, video_id)

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
url_or_none,
) )
@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
for rendition in video_data.get('renditions', []): for rendition in video_data.get('renditions', []):
video_url = rendition.get('url') video_url = url_or_none(rendition.get('url'))
if not video_url: if not video_url:
continue continue
ext = rendition.get('format') ext = rendition.get('format')

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
js_to_json, js_to_json,
url_or_none,
) )
@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
for source in sources: for source in sources:
if not isinstance(source, dict): if not isinstance(source, dict):
continue continue
source_url = source.get('file') source_url = url_or_none(source.get('file'))
if not source_url or not isinstance(source_url, compat_str): if not source_url:
continue continue
ext = determine_ext(source_url) ext = determine_ext(source_url)
if ext == 'm3u8': if ext == 'm3u8':

View File

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
url_or_none,
) )
@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
formats = [] formats = []
for item in file_list[0]: for item in file_list[0]:
file_url = item.get('file') file_url = url_or_none(item.get('file'))
if not file_url: if not file_url:
continue continue
ext = mimetype2ext(item.get('type')) ext = mimetype2ext(item.get('type'))

View File

@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .generic import GenericIE from .generic import GenericIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -15,6 +14,7 @@ from ..utils import (
unified_strdate, unified_strdate,
xpath_text, xpath_text,
update_url_query, update_url_query,
url_or_none,
) )
from ..compat import compat_etree_fromstring from ..compat import compat_etree_fromstring
@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
quality = stream.get('_quality') quality = stream.get('_quality')
server = stream.get('_server') server = stream.get('_server')
for stream_url in stream_urls: for stream_url in stream_urls:
if not isinstance(stream_url, compat_str) or '//' not in stream_url: if not url_or_none(stream_url):
continue continue
ext = determine_ext(stream_url) ext = determine_ext(stream_url)
if quality != 'auto' and ext in ('f4m', 'm3u8'): if quality != 'auto' and ext in ('f4m', 'm3u8'):

View File

@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
unified_strdate, unified_strdate,
url_or_none,
) )
@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
fatal=False) fatal=False)
if not stat: if not stat:
continue continue
retry_url = stat.get('retry_url') retry_url = url_or_none(stat.get('retry_url'))
if not isinstance(retry_url, compat_str): if not retry_url:
continue continue
formats.append({ formats.append({
'url': self._proto_relative_url(retry_url, 'http:'), 'url': self._proto_relative_url(retry_url, 'http:'),
@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
formats = [] formats = []
for format_id, format_url in show['audio_stream'].items(): for format_id, format_url in show['audio_stream'].items():
if not isinstance(format_url, compat_str): if not url_or_none(format_url):
continue continue
for known_ext in KNOWN_EXTENSIONS: for known_ext in KNOWN_EXTENSIONS:
if known_ext in format_id: if known_ext in format_id:

View File

@ -29,7 +29,7 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor): class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pbw][\da-z]{7}' _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)?bbc\.co\.uk/ (?:www\.)?bbc\.co\.uk/
@ -236,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
}, { }, {
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9', 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
'only_matching': True,
}] }]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@ -778,6 +784,17 @@ class BBCIE(BBCCoUkIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} }
}, {
# window.__PRELOADED_STATE__
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
'info_dict': {
'id': 'b0b9z4vz',
'ext': 'mp4',
'title': 'Prom 6: An American in Paris and Turangalila',
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
}] }]
@classmethod @classmethod
@ -1000,6 +1017,36 @@ class BBCIE(BBCCoUkIE):
'subtitles': subtitles, 'subtitles': subtitles,
} }
preload_state = self._parse_json(self._search_regex(
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if preload_state:
current_programme = preload_state.get('programmes', {}).get('current') or {}
programme_id = current_programme.get('id')
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
self._sort_formats(formats)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
current_programme.get('duration', {}).get('value'))
thumbnail = None
image_url = current_programme.get('image_url')
if image_url:
thumbnail = image_url.replace('{recipe}', '1920x1920')
return {
'id': programme_id,
'title': title,
'description': dict_get(synopses, ('long', 'medium', 'short')),
'thumbnail': thumbnail,
'duration': duration,
'uploader': network.get('short_title'),
'uploader_id': network.get('id'),
'formats': formats,
'subtitles': subtitles,
}
bbc3_config = self._parse_json( bbc3_config = self._parse_json(
self._search_regex( self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage, r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,

View File

@ -0,0 +1,120 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
from ..utils import urlencode_postdata
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
'info_dict': {
'id': 'szoMrox2JEI',
'ext': 'mp4',
'title': 'Fuck bitches get money',
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Victoria X Rave',
},
}, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
'only_matching': True,
}, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
})
title = self._search_regex(
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
webpage, 'title', default=None) or self._html_search_meta(
'description', webpage, 'title',
default=None) or self._og_search_description(webpage)
formats = [
{'url': mobj.group('url')}
for mobj in re.finditer(
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
self._sort_formats(formats)
description = self._html_search_regex(
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex(
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
'uploader', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': formats,
}
class BitChuteChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
'playlist_mincount': 185,
'info_dict': {
'id': 'victoriaxrave',
},
}
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
def _entries(self, channel_id):
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
offset = 0
for page_num in itertools.count(1):
data = self._download_json(
'%sextend/' % channel_url, channel_id,
'Downloading channel page %d' % page_num,
data=urlencode_postdata({
'csrfmiddlewaretoken': self._TOKEN,
'name': '',
'offset': offset,
}), headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': channel_url,
'X-Requested-With': 'XMLHttpRequest',
'Cookie': 'csrftoken=%s' % self._TOKEN,
})
if data.get('success') is False:
break
html = data.get('html')
if not html:
break
video_ids = re.findall(
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
html)
if not video_ids:
break
offset += len(video_ids)
for video_id in video_ids:
yield self.url_result(
'https://www.bitchute.com/video/%s' % video_id,
ie=BitChuteIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
channel_id = self._match_id(url)
return self.playlist_result(
self._entries(channel_id), playlist_id=channel_id)

View File

@ -4,8 +4,10 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import compat_str from ..utils import (
from ..utils import int_or_none int_or_none,
url_or_none,
)
class BreakIE(InfoExtractor): class BreakIE(InfoExtractor):
@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
formats = [] formats = []
for video in content: for video in content:
video_url = video.get('url') video_url = url_or_none(video.get('url'))
if not video_url or not isinstance(video_url, compat_str): if not video_url:
continue continue
bitrate = int_or_none(self._search_regex( bitrate = int_or_none(self._search_regex(
r'(\d+)_kbps', video_url, 'tbr', default=None)) r'(\d+)_kbps', video_url, 'tbr', default=None))

View File

@ -2,10 +2,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
url_or_none,
) )
@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
for media in encodings: for media in encodings:
if not isinstance(media, dict): if not isinstance(media, dict):
continue continue
media_url = media.get('location') media_url = url_or_none(media.get('location'))
if not media_url or not isinstance(media_url, compat_str): if not media_url:
continue continue
format_id_list = [format_id] format_id_list = [format_id]

View File

@ -11,6 +11,7 @@ from ..utils import (
strip_or_none, strip_or_none,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts,
parse_iso8601, parse_iso8601,
) )
@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
webpage, urlh = self._download_webpage_handle(url, display_id) webpage, urlh = self._download_webpage_handle(url, display_id)
title = self._html_search_regex( info = self._search_json_ld(webpage, display_id, default={})
# title is optional here since it may be extracted by extractor
# that is delegated from here
title = strip_or_none(self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>', r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip() webpage, 'title', default=None))
description = self._html_search_regex( description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>', r'(?ms)<div class="content__description">(.+?)</div>',
@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
# the first one # the first one
video_id = list(video.values())[0].get('videoid') video_id = list(video.values())[0].get('videoid')
return { return merge_dicts(info, {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(), 'ie_key': CanvasIE.ie_key(),
@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
'season_number': season_number, 'season_number': season_number,
'episode_number': episode_number, 'episode_number': episode_number,
'release_date': release_date, 'release_date': release_date,
} })

View File

@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
parse_resolution, parse_resolution,
url_or_none,
) )
@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
media_url = media['media']['url'] media_url = media['media']['url']
if isinstance(media_url, list): if isinstance(media_url, list):
for format_ in media_url: for format_ in media_url:
format_url = format_.get('file') format_url = url_or_none(format_.get('file'))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
continue continue
label = format_.get('label') label = format_.get('label')
f = parse_resolution(label) f = parse_resolution(label)

View File

@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
for user_agent in (None, USER_AGENTS['Safari']): for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request( req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=urlencode_postdata(data)) data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')

View File

@ -1,15 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
parse_iso8601, unified_timestamp,
) )
class ClypIE(InfoExtractor): class ClypIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
_TEST = { _TESTS = [{
'url': 'https://clyp.it/ojz2wfah', 'url': 'https://clyp.it/ojz2wfah',
'md5': '1d4961036c41247ecfdcc439c0cddcbb', 'md5': '1d4961036c41247ecfdcc439c0cddcbb',
'info_dict': { 'info_dict': {
@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
'timestamp': 1443515251, 'timestamp': 1443515251,
'upload_date': '20150929', 'upload_date': '20150929',
}, },
} }, {
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
'info_dict': {
'id': 'b04p1odi',
'ext': 'mp3',
'title': 'GJ! (Reward Edit)',
'description': 'Metal Resistance (THE ONE edition)',
'duration': 177.789,
'timestamp': 1528241278,
'upload_date': '20180605',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
audio_id = self._match_id(url) audio_id = self._match_id(url)
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
token = qs.get('token', [None])[0]
query = {}
if token:
query['token'] = token
metadata = self._download_json( metadata = self._download_json(
'https://api.clyp.it/%s' % audio_id, audio_id) 'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
formats = [] formats = []
for secure in ('', 'Secure'): for secure in ('', 'Secure'):
@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
title = metadata['Title'] title = metadata['Title']
description = metadata.get('Description') description = metadata.get('Description')
duration = float_or_none(metadata.get('Duration')) duration = float_or_none(metadata.get('Duration'))
timestamp = parse_iso8601(metadata.get('DateCreated')) timestamp = unified_timestamp(metadata.get('DateCreated'))
return { return {
'id': audio_id, 'id': audio_id,

View File

@ -1859,9 +1859,7 @@ class InfoExtractor(object):
'height': height, 'height': height,
}) })
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
continue elif src_ext == 'f4m':
if src_ext == 'f4m':
f4m_url = src_url f4m_url = src_url
if not f4m_params: if not f4m_params:
f4m_params = { f4m_params = {
@ -1871,9 +1869,13 @@ class InfoExtractor(object):
f4m_url += '&' if '?' in f4m_url else '?' f4m_url += '&' if '?' in f4m_url else '?'
f4m_url += compat_urllib_parse_urlencode(f4m_params) f4m_url += compat_urllib_parse_urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
continue elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
if src_url.startswith('http') and self._is_valid_url(src, video_id): src_url, video_id, mpd_id='dash', fatal=False))
elif re.search(r'\.ism/[Mm]anifest', src_url):
formats.extend(self._extract_ism_formats(
src_url, video_id, ism_id='mss', fatal=False))
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1 http_count += 1
formats.append({ formats.append({
'url': src_url, 'url': src_url,
@ -1884,7 +1886,6 @@ class InfoExtractor(object):
'width': width, 'width': width,
'height': height, 'height': height,
}) })
continue
return formats return formats

View File

@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_HTTPError
compat_str,
compat_HTTPError,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
url_or_none,
ExtractorError ExtractorError
) )
@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
for e in media['MediaURLs']: for e in media['MediaURLs']:
if e.get('UseDRM') is True: if e.get('UseDRM') is True:
continue continue
format_url = e.get('Path') format_url = url_or_none(e.get('Path'))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
continue continue
ext = determine_ext(format_url) ext = determine_ext(format_url)
if ext == 'm3u8': if ext == 'm3u8':
@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
for cc_file in cc_files: for cc_file in cc_files:
if not isinstance(cc_file, dict): if not isinstance(cc_file, dict):
continue continue
cc_url = cc_file.get('Path') cc_url = url_or_none(cc_file.get('Path'))
if not cc_url or not isinstance(cc_url, compat_str): if not cc_url:
continue continue
lang = cc_file.get('Locale') or 'en' lang = cc_file.get('Locale') or 'en'
subtitles.setdefault(lang, []).append({'url': cc_url}) subtitles.setdefault(lang, []).append({'url': cc_url})

View File

@ -262,6 +262,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# Just test metadata extraction # Just test metadata extraction
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://www.crunchyroll.com/media-723735',
'only_matching': True,
}] }]
_FORMAT_IDS = { _FORMAT_IDS = {
@ -580,7 +583,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:playlist' IE_NAME = 'crunchyroll:playlist'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)' _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_age_limit,
parse_iso8601, parse_iso8601,
smuggle_url,
str_or_none,
) )
@ -40,10 +43,15 @@ class CWTVIE(InfoExtractor):
'duration': 1263, 'duration': 1263,
'series': 'Whose Line Is It Anyway?', 'series': 'Whose Line Is It Anyway?',
'season_number': 11, 'season_number': 11,
'season': '11',
'episode_number': 20, 'episode_number': 20,
'upload_date': '20151006', 'upload_date': '20151006',
'timestamp': 1444107300, 'timestamp': 1444107300,
'age_limit': 14,
'uploader': 'CWTV',
},
'params': {
# m3u8 download
'skip_download': True,
}, },
}, { }, {
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
@ -58,60 +66,28 @@ class CWTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = None video_data = self._download_json(
formats = [] 'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
for partner in (154, 213): video_id)['video']
vdata = self._download_json( title = video_data['title']
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False) mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
if not vdata:
continue
video_data = vdata
for quality, quality_data in vdata.get('videos', {}).items():
quality_url = quality_data.get('uri')
if not quality_url:
continue
if quality == 'variantplaylist':
formats.extend(self._extract_m3u8_formats(
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
tbr = int_or_none(quality_data.get('bitrate'))
format_id = 'http' + ('-%d' % tbr if tbr else '')
if self._is_valid_url(quality_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': quality_url,
'tbr': tbr,
})
video_metadata = video_data['assetFields']
ism_url = video_metadata.get('smoothStreamingUrl')
if ism_url:
formats.extend(self._extract_ism_formats(
ism_url, video_id, ism_id='mss', fatal=False))
self._sort_formats(formats)
thumbnails = [{ season = str_or_none(video_data.get('season'))
'url': image['uri'], episode = str_or_none(video_data.get('episode'))
'width': image.get('width'), if episode and season:
'height': image.get('height'), episode = episode.lstrip(season)
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
subtitles = {
'en': [{
'url': video_metadata['UnicornCcUrl'],
}],
} if video_metadata.get('UnicornCcUrl') else None
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
'title': video_metadata['title'], 'title': title,
'description': video_metadata.get('description'), 'url': smuggle_url(mpx_url, {'force_smil_url': True}),
'duration': int_or_none(video_metadata.get('duration')), 'description': video_data.get('description_long'),
'series': video_metadata.get('seriesName'), 'duration': int_or_none(video_data.get('duration_secs')),
'season_number': int_or_none(video_metadata.get('seasonNumber')), 'series': video_data.get('series_name'),
'season': video_metadata.get('seasonName'), 'season_number': int_or_none(season),
'episode_number': int_or_none(video_metadata.get('episodeNumber')), 'episode_number': int_or_none(episode),
'timestamp': parse_iso8601(video_data.get('startTime')), 'timestamp': parse_iso8601(video_data.get('start_time')),
'thumbnails': thumbnails, 'age_limit': parse_age_limit(video_data.get('rating')),
'formats': formats, 'ie_key': 'ThePlatform',
'subtitles': subtitles,
} }

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
import functools
import hashlib import hashlib
import itertools import itertools
import json import json
@ -16,11 +17,13 @@ from ..utils import (
error_to_compat_str, error_to_compat_str,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
mimetype2ext,
OnDemandPagedList,
parse_iso8601, parse_iso8601,
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
mimetype2ext, urlencode_postdata,
) )
@ -144,7 +147,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
description = self._og_search_description(webpage) or self._html_search_meta( description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'description', webpage, 'description') 'description', webpage, 'description')
view_count_str = self._search_regex( view_count_str = self._search_regex(
@ -342,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist' IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_TESTS = [{ _TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': { 'info_dict': {
'title': 'SPORT', 'title': 'SPORT',
'id': 'xv4bw_nqtv_sport', 'id': 'xv4bw',
}, },
'playlist_mincount': 20, 'playlist_mincount': 20,
}] }]
_PAGE_SIZE = 100
def _fetch_page(self, playlist_id, authorizaion, page):
page += 1
videos = self._download_json(
'https://graphql.api.dailymotion.com',
playlist_id, 'Downloading page %d' % page,
data=json.dumps({
'query': '''{
collection(xid: "%s") {
videos(first: %d, page: %d) {
pageInfo {
hasNextPage
nextPage
}
edges {
node {
xid
url
}
}
}
}
}''' % (playlist_id, self._PAGE_SIZE, page)
}).encode(), headers={
'Authorization': authorizaion,
'Origin': 'https://www.dailymotion.com',
})['data']['collection']['videos']
for edge in videos['edges']:
node = edge['node']
yield self.url_result(
node['url'], DailymotionIE.ie_key(), node['xid'])
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
api = self._parse_json(self._search_regex(
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
webpage, 'player config'), playlist_id)['context']['api']
auth = self._download_json(
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
playlist_id, data=urlencode_postdata({
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
'grant_type': 'client_credentials',
}))
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
entries = OnDemandPagedList(functools.partial(
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
return self.playlist_result(
entries, playlist_id,
self._og_search_title(webpage))
class DailymotionUserIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
'title': 'UnderProject',
},
'playlist_mincount': 1800,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time',
}]
def _extract_entries(self, id): def _extract_entries(self, id):
video_ids = set() video_ids = set()
@ -378,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break break
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
'title': 'UnderProject',
},
'playlist_mincount': 1800,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
user = mobj.group('user') user = mobj.group('user')

View File

@ -7,6 +7,7 @@ from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
unified_timestamp, unified_timestamp,
url_or_none,
) )
@ -69,7 +70,7 @@ class DctpTvIE(InfoExtractor):
endpoint = next( endpoint = next(
server['endpoint'] server['endpoint']
for server in servers for server in servers
if isinstance(server.get('endpoint'), compat_str) and if url_or_none(server.get('endpoint')) and
'cloudfront' in server['endpoint']) 'cloudfront' in server['endpoint'])
else: else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/' endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
@ -92,8 +93,8 @@ class DctpTvIE(InfoExtractor):
for image in images: for image in images:
if not isinstance(image, dict): if not isinstance(image, dict):
continue continue
image_url = image.get('url') image_url = url_or_none(image.get('url'))
if not image_url or not isinstance(image_url, compat_str): if not image_url:
continue continue
thumbnails.append({ thumbnails.append({
'url': image_url, 'url': image_url,

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
extract_attributes, extract_attributes,
@ -12,6 +11,7 @@ from ..utils import (
parse_age_limit, parse_age_limit,
remove_end, remove_end,
unescapeHTML, unescapeHTML,
url_or_none,
) )
@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
captions = stream.get('captions') captions = stream.get('captions')
if isinstance(captions, list): if isinstance(captions, list):
for caption in captions: for caption in captions:
subtitle_url = caption.get('fileUrl') subtitle_url = url_or_none(caption.get('fileUrl'))
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or if not subtitle_url or not subtitle_url.startswith('http'):
not subtitle_url.startswith('http')):
continue continue
lang = caption.get('fileLang', 'en') lang = caption.get('fileLang', 'en')
ext = determine_ext(subtitle_url) ext = determine_ext(subtitle_url)

View File

@ -7,7 +7,6 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@ -17,6 +16,7 @@ from ..utils import (
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
unified_timestamp, unified_timestamp,
url_or_none,
) )
@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
for sub in subs: for sub in subs:
if not isinstance(sub, dict): if not isinstance(sub, dict):
continue continue
sub_url = sub.get('url') sub_url = url_or_none(sub.get('url'))
if not sub_url or not isinstance(sub_url, compat_str): if not sub_url:
continue continue
subtitles.setdefault( subtitles.setdefault(
sub.get('code') or sub.get('language') or 'en', []).append({ sub.get('code') or sub.get('language') or 'en', []).append({
@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
for format_id, format_dict in download_assets.items(): for format_id, format_dict in download_assets.items():
if not isinstance(format_dict, dict): if not isinstance(format_dict, dict):
continue continue
format_url = format_dict.get('url') format_url = url_or_none(format_dict.get('url'))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
continue continue
formats.append({ formats.append({
'url': format_url, 'url': format_url,

View File

@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_HTTPError
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unsmuggle_url, unsmuggle_url,
url_or_none,
) )
@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
video_id, 'Downloading mp4 JSON', fatal=False) video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data: if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items(): for format_id, format_url in mp4_data.get('data', {}).items():
if not isinstance(format_url, compat_str): if not url_or_none(format_url):
continue continue
height = int_or_none(format_id) height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height): if height is not None and m3u8_formats_dict.get(height):

View File

@ -8,6 +8,7 @@ from ..utils import (
int_or_none, int_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none,
) )
@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
entries = [] entries = []
for lesson in lessons: for lesson in lessons:
lesson_url = lesson.get('http_url') lesson_url = url_or_none(lesson.get('http_url'))
if not lesson_url or not isinstance(lesson_url, compat_str): if not lesson_url:
continue continue
lesson_id = lesson.get('id') lesson_id = lesson.get('id')
if lesson_id: if lesson_id:
@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
formats = [] formats = []
for _, format_url in lesson['media_urls'].items(): for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str): format_url = url_or_none(format_url)
if not format_url:
continue continue
ext = determine_ext(format_url) ext = determine_ext(format_url)
if ext == 'm3u8': if ext == 'm3u8':

View File

@ -11,6 +11,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
str_to_int, str_to_int,
url_or_none,
) )
@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
for format_id, format_dict in formats_dict.items(): for format_id, format_dict in formats_dict.items():
if not isinstance(format_dict, dict): if not isinstance(format_dict, dict):
continue continue
src = format_dict.get('src') src = url_or_none(format_dict.get('src'))
if not isinstance(src, compat_str) or not src.startswith('http'): if not src or not src.startswith('http'):
continue continue
if kind == 'hls': if kind == 'hls':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -11,7 +13,13 @@ from ..utils import (
class ExpressenIE(InfoExtractor): class ExpressenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?expressen\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
'md5': '2fbbe3ca14392a6b1b36941858d33a45', 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
}, { }, {
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/', 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)

View File

@ -118,6 +118,10 @@ from .bilibili import (
BiliBiliBangumiIE, BiliBiliBangumiIE,
) )
from .biobiochiletv import BioBioChileTVIE from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
BitChuteIE,
BitChuteChannelIE,
)
from .biqle import BIQLEIE from .biqle import BIQLEIE
from .bleacherreport import ( from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
@ -516,6 +520,7 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kinopoisk import KinoPoiskIE
from .keek import KeekIE from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
@ -736,7 +741,10 @@ from .nonktube import NonkTubeIE
from .noovo import NoovoIE from .noovo import NoovoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE
from .nova import NovaIE from .nova import (
NovaEmbedIE,
NovaIE,
)
from .novamov import ( from .novamov import (
AuroraVidIE, AuroraVidIE,
CloudTimeIE, CloudTimeIE,
@ -860,6 +868,10 @@ from .pornhub import (
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE from .pornoxo import PornoXOIE
from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
)
from .presstv import PressTVIE from .presstv import PressTVIE
from .primesharetv import PrimeShareTVIE from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE from .promptfile import PromptFileIE
@ -891,7 +903,10 @@ from .rai import (
RaiPlayPlaylistIE, RaiPlayPlaylistIE,
RaiIE, RaiIE,
) )
from .raywenderlich import RayWenderlichIE from .raywenderlich import (
RayWenderlichIE,
RayWenderlichCourseIE,
)
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .rds import RDSIE from .rds import RDSIE
from .redbulltv import RedBullTVIE from .redbulltv import RedBullTVIE
@ -1166,6 +1181,7 @@ from .tvp import (
from .tvplay import ( from .tvplay import (
TVPlayIE, TVPlayIE,
ViafreeIE, ViafreeIE,
TVPlayHomeIE,
) )
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE from .tweakers import TweakersIE
@ -1287,6 +1303,7 @@ from .viki import (
VikiIE, VikiIE,
VikiChannelIE, VikiChannelIE,
) )
from .viqeo import ViqeoIE
from .viu import ( from .viu import (
ViuIE, ViuIE,
ViuPlaylistIE, ViuPlaylistIE,
@ -1412,6 +1429,7 @@ from .younow import (
YouNowMomentIE, YouNowMomentIE,
) )
from .youporn import YouPornIE from .youporn import YouPornIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE from .yourupload import YourUploadIE
from .youtube import ( from .youtube import (
YoutubeIE, YoutubeIE,

View File

@ -20,6 +20,7 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
limit_length, limit_length,
parse_count,
sanitized_Request, sanitized_Request,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '274175099429670', 'id': '274175099429670',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Asif Nawab Butt posted a video to his Timeline.', 'title': 're:^Asif Nawab Butt posted a video',
'uploader': 'Asif Nawab Butt', 'uploader': 'Asif Nawab Butt',
'upload_date': '20140506', 'upload_date': '20140506',
'timestamp': 1399398998, 'timestamp': 1399398998,
@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
}, { }, {
# have 1080P, but only up to 720p in swf params # have 1080P, but only up to 720p in swf params
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '0d9813160b146b3bc8744e006027fcc6', 'md5': '9571fae53d4165bbbadb17a94651dcdc',
'info_dict': { 'info_dict': {
'id': '10155529876156509', 'id': '10155529876156509',
'ext': 'mp4', 'ext': 'mp4',
@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20161030', 'upload_date': '20161030',
'uploader': 'CNN', 'uploader': 'CNN',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'view_count': int,
}, },
}, { }, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1417995061575415', 'id': '1417995061575415',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe', 'title': 'md5:1db063d6a8c13faa8da727817339c857',
'timestamp': 1486648217, 'timestamp': 1486648217,
'upload_date': '20170209', 'upload_date': '20170209',
'uploader': 'Yaroslav Korpan', 'uploader': 'Yaroslav Korpan',
@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1396382447100162', 'id': '1396382447100162',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3', 'title': 'md5:19a428bbde91364e3de815383b54a235',
'timestamp': 1486035494, 'timestamp': 1486035494,
'upload_date': '20170202', 'upload_date': '20170202',
'uploader': 'Elisabeth Ahtn', 'uploader': 'Elisabeth Ahtn',
@ -353,7 +355,6 @@ class FacebookIE(InfoExtractor):
tahoe_data = self._download_webpage( tahoe_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id, self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
data=urlencode_postdata({ data=urlencode_postdata({
'__user': 0,
'__a': 1, '__a': 1,
'__pc': self._search_regex( '__pc': self._search_regex(
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
@ -361,6 +362,9 @@ class FacebookIE(InfoExtractor):
'__rev': self._search_regex( '__rev': self._search_regex(
r'client_revision["\']\s*:\s*(\d+),', webpage, r'client_revision["\']\s*:\s*(\d+),', webpage,
'client revision', default='3944515'), 'client revision', default='3944515'),
'fb_dtsg': self._search_regex(
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
webpage, 'dtsg token', default=''),
}), }),
headers={ headers={
'Content-Type': 'application/x-www-form-urlencoded', 'Content-Type': 'application/x-www-form-urlencoded',
@ -426,6 +430,10 @@ class FacebookIE(InfoExtractor):
'timestamp', default=None)) 'timestamp', default=None))
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
view_count = parse_count(self._search_regex(
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
default=None))
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
@ -433,6 +441,7 @@ class FacebookIE(InfoExtractor):
'uploader': uploader, 'uploader': uploader,
'timestamp': timestamp, 'timestamp': timestamp,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'view_count': view_count,
} }
return webpage, info_dict return webpage, info_dict

View File

@ -10,6 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
unified_strdate, unified_strdate,
url_or_none,
) )
@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
formats = [] formats = []
path = None path = None
for f in item.get('mbr', []): for f in item.get('mbr', []):
src = f.get('src') src = url_or_none(f.get('src'))
if not src or not isinstance(src, compat_str): if not src:
continue continue
tbr = int_or_none(self._search_regex( tbr = int_or_none(self._search_regex(
r'_(\d{3,})\.mp4', src, 'tbr', default=None)) r'_(\d{3,})\.mp4', src, 'tbr', default=None))

View File

@ -16,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
try_get, try_get,
url_or_none,
) )
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
def sign(manifest_url, manifest_id): def sign(manifest_url, manifest_id):
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
signed_url = self._download_webpage( signed_url = url_or_none(self._download_webpage(
'https://%s/esi/TA' % host, video_id, 'https://%s/esi/TA' % host, video_id,
'Downloading signed %s manifest URL' % manifest_id, 'Downloading signed %s manifest URL' % manifest_id,
fatal=False, query={ fatal=False, query={
'url': manifest_url, 'url': manifest_url,
}) }))
if (signed_url and isinstance(signed_url, compat_str) and if signed_url:
re.search(r'^(?:https?:)?//', signed_url)):
return signed_url return signed_url
return manifest_url return manifest_url

View File

@ -11,6 +11,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
parse_duration, parse_duration,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -80,7 +81,7 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
chapters = [] chapters = []
lesson_elements = course.get('lessonElements') lesson_elements = course.get('lessonElements')
if isinstance(lesson_elements, list): if isinstance(lesson_elements, list):
chapters = [e for e in lesson_elements if isinstance(e, compat_str)] chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
return chapters return chapters
@staticmethod @staticmethod

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -125,17 +126,31 @@ class FunkChannelIE(FunkBaseIE):
# Id-based channels are currently broken on their side: webplayer # Id-based channels are currently broken on their side: webplayer
# tries to process them via byChannelAlias endpoint and fails # tries to process them via byChannelAlias endpoint and fails
# predictably. # predictably.
by_channel_alias = self._download_json( for page_num in itertools.count():
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s' by_channel_alias = self._download_json(
% channel_id, 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
'Downloading byChannelAlias JSON', headers=headers, query={ % channel_id,
'size': 100, 'Downloading byChannelAlias JSON page %d' % (page_num + 1),
}, fatal=False) headers=headers, query={
if by_channel_alias: 'filterFsk': 'false',
'sort': 'creationDate,desc',
'size': 100,
'page': page_num,
}, fatal=False)
if not by_channel_alias:
break
video_list = try_get( video_list = try_get(
by_channel_alias, lambda x: x['_embedded']['videoList'], list) by_channel_alias, lambda x: x['_embedded']['videoList'], list)
if video_list: if not video_list:
break
try:
video = next(r for r in video_list if r.get('alias') == alias) video = next(r for r in video_list if r.get('alias') == alias)
break
except StopIteration:
pass
if not try_get(
by_channel_alias, lambda x: x['_links']['next']):
break
if not video: if not video:
by_id_list = self._download_json( by_id_list = self._download_json(

View File

@ -112,6 +112,8 @@ from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE from .indavideo import IndavideoEmbedIE
from .apa import APAIE from .apa import APAIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE
from .expressen import ExpressenIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2059,6 +2061,30 @@ class GenericIE(InfoExtractor):
}, },
'skip': 'TODO: fix nested playlists processing in tests', 'skip': 'TODO: fix nested playlists processing in tests',
}, },
{
# Viqeo embeds
'url': 'https://viqeo.tv/',
'info_dict': {
'id': 'viqeo',
'title': 'All-new video platform',
},
'playlist_count': 6,
},
{
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
'info_dict': {
'id': 'shell',
'ext': 'mp4',
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download MPD manifest'],
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -3093,6 +3119,16 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
sharevideos_urls, video_id, video_title) sharevideos_urls, video_id, video_title)
viqeo_urls = ViqeoIE._extract_urls(webpage)
if viqeo_urls:
return self.playlist_from_matches(
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
expressen_urls = ExpressenIE._extract_urls(webpage)
if expressen_urls:
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries:

View File

@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
'requestor_id': 'DisneyXD', 'requestor_id': 'DisneyXD',
} }
} }
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
_TESTS = [{ _TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': { 'info_dict': {
@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
}, { }, {
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland', 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
'only_matching': True, 'only_matching': True,
}, {
# brand 004
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
'only_matching': True,
}, {
# brand 008
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
}] }]
def _extract_videos(self, brand, video_id='-1', show_id='-1'): def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
def _real_extract(self, url): def _real_extract(self, url):
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
site_info = self._SITE_INFO[sub_domain] site_info = self._SITE_INFO.get(sub_domain, {})
brand = site_info['brand'] brand = site_info.get('brand')
if not video_id: if not video_id or not site_info:
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex( video_id = self._search_regex(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None) r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
default=None)
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
default='004')
site_info = next(
si for _, si in self._SITE_INFO.items()
if si.get('brand') == brand)
if not video_id: if not video_id:
# show extraction works for Disney, DisneyJunior and DisneyXD # show extraction works for Disney, DisneyJunior and DisneyXD
# ABC and Freeform has different layout # ABC and Freeform has different layout

View File

@ -8,6 +8,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
bitrates = rendition.get('bitrates') bitrates = rendition.get('bitrates')
if not isinstance(bitrates, dict): if not isinstance(bitrates, dict):
continue continue
m3u8_url = bitrates.get('hls') m3u8_url = url_or_none(bitrates.get('hls'))
if not isinstance(m3u8_url, compat_str): if not m3u8_url:
continue continue
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
if not isinstance(cc_file, list) or len(cc_file) < 3: if not isinstance(cc_file, list) or len(cc_file) < 3:
continue continue
cc_lang = cc_file[0] cc_lang = cc_file[0]
cc_url = cc_file[2] cc_url = url_or_none(cc_file[2])
if not isinstance(cc_lang, compat_str) or not isinstance( if not isinstance(cc_lang, compat_str) or not cc_url:
cc_url, compat_str):
continue continue
subtitles.setdefault(cc_lang, []).append({ subtitles.setdefault(cc_lang, []).append({
'url': cc_url, 'url': cc_url,

View File

@ -3,12 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
mimetype2ext, mimetype2ext,
parse_duration, parse_duration,
qualities, qualities,
url_or_none,
) )
@ -61,10 +61,11 @@ class ImdbIE(InfoExtractor):
for encoding in video_metadata.get('encodings', []): for encoding in video_metadata.get('encodings', []):
if not encoding or not isinstance(encoding, dict): if not encoding or not isinstance(encoding, dict):
continue continue
video_url = encoding.get('videoUrl') video_url = url_or_none(encoding.get('videoUrl'))
if not video_url or not isinstance(video_url, compat_str): if not video_url:
continue continue
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType'))) ext = mimetype2ext(encoding.get(
'mimeType')) or determine_ext(video_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native', video_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@ -12,7 +12,7 @@ from ..utils import (
class ImgurIE(InfoExtractor): class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
_TESTS = [{ _TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
}, { }, {
'url': 'http://imgur.com/r/aww/VQcQPhM', 'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -17,6 +17,7 @@ from ..utils import (
lowercase_escape, lowercase_escape,
std_headers, std_headers,
try_get, try_get,
url_or_none,
) )
@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
node = try_get(edge, lambda x: x['node'], dict) node = try_get(edge, lambda x: x['node'], dict)
if not node: if not node:
continue continue
node_video_url = try_get(node, lambda x: x['video_url'], compat_str) node_video_url = url_or_none(node.get('video_url'))
if not node_video_url: if not node_video_url:
continue continue
entries.append({ entries.append({

View File

@ -20,6 +20,7 @@ from ..utils import (
merge_dicts, merge_dicts,
parse_duration, parse_duration,
smuggle_url, smuggle_url,
url_or_none,
xpath_with_ns, xpath_with_ns,
xpath_element, xpath_element,
xpath_text, xpath_text,
@ -250,8 +251,8 @@ class ITVIE(InfoExtractor):
for sub in subs: for sub in subs:
if not isinstance(sub, dict): if not isinstance(sub, dict):
continue continue
href = sub.get('Href') href = url_or_none(sub.get('Href'))
if isinstance(href, compat_str): if href:
extract_subtitle(href) extract_subtitle(href)
if not info.get('duration'): if not info.get('duration'):
info['duration'] = parse_duration(video_data.get('Duration')) info['duration'] = parse_duration(video_data.get('Duration'))

View File

@ -7,6 +7,7 @@ from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
remove_end, remove_end,
url_or_none,
) )
@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
formats = [] formats = []
for a_format in video_data: for a_format in video_data:
format_uri = url_or_none(a_format.get('uri'))
if not format_uri:
continue
format_id = a_format.get('resolution') format_id = a_format.get('resolution')
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'(\d+)p', format_id, 'height', default=None)) r'(\d+)p', format_id, 'height', default=None))
formats.append({ formats.append({
'url': a_format['uri'], 'url': self._proto_relative_url(format_uri, 'https:'),
'format_id': format_id, 'format_id': format_id,
'ext': mimetype2ext(a_format.get('mime')) or 'mp4', 'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
'height': height, 'height': height,

View File

@ -4,16 +4,14 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_decrypt_text from ..aes import aes_decrypt_text
from ..compat import ( from ..compat import compat_urllib_parse_unquote
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
str_to_int, str_to_int,
strip_or_none, strip_or_none,
url_or_none,
) )
@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
encrypted = False encrypted = False
def extract_format(format_url, height=None): def extract_format(format_url, height=None):
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')): format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//')):
return return
if format_url in format_urls: if format_url in format_urls:
return return

View File

@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
dict_get,
int_or_none,
)
class KinoPoiskIE(InfoExtractor):
_GEO_COUNTRIES = ['RU']
_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.kinopoisk.ru/film/81041/watch/',
'md5': '4f71c80baea10dfa54a837a46111d326',
'info_dict': {
'id': '81041',
'ext': 'mp4',
'title': 'Алеша попович и тугарин змей',
'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
'thumbnail': r're:^https?://.*',
'duration': 4533,
'age_limit': 12,
},
'params': {
'format': 'bestvideo',
},
}, {
'url': 'https://www.kinopoisk.ru/film/81041',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
query={'kpId': video_id})
data = self._parse_json(
self._search_regex(
r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
webpage, 'data'),
video_id)['models']
film = data['filmStatus']
title = film.get('title') or film['originalTitle']
formats = self._extract_m3u8_formats(
data['playlistEntity']['uri'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
description = dict_get(
film, ('descriptscription', 'description',
'shortDescriptscription', 'shortDescription'))
thumbnail = film.get('coverUrl') or film.get('posterUrl')
duration = int_or_none(film.get('duration'))
age_limit = int_or_none(film.get('restrictionAge'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -2,11 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
url_or_none,
) )
@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
captions = source.get('captionsAvailableLanguages') captions = source.get('captionsAvailableLanguages')
if isinstance(captions, dict): if isinstance(captions, dict):
for lang, subtitle_url in captions.items(): for lang, subtitle_url in captions.items():
if lang != 'none' and isinstance(subtitle_url, compat_str): subtitle_url = url_or_none(subtitle_url)
if lang != 'none' and subtitle_url:
subtitles.setdefault(lang, []).append({'url': subtitle_url}) subtitles.setdefault(lang, []).append({'url': subtitle_url})
return { return {

View File

@ -20,5 +20,7 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id') wat_id = self._search_regex(
(r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
webpage, 'wat id')
return self.url_result('wat:' + wat_id, 'Wat', wat_id) return self.url_result('wat:' + wat_id, 'Wat', wat_id)

View File

@ -3,75 +3,75 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .theplatform import ThePlatformBaseIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, ExtractorError,
parse_duration, int_or_none,
try_get, update_url_query,
unified_strdate,
) )
class MediasetIE(InfoExtractor): class MediasetIE(ThePlatformBaseIE):
_TP_TLD = 'eu'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
mediaset:| mediaset:|
https?:// https?://
(?:www\.)?video\.mediaset\.it/ (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?: (?:
(?:video|on-demand)/(?:[^/]+/)+[^/]+_| (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid= player/index\.html\?.*?\bprogramGuid=
) )
)(?P<id>[0-9]+) )(?P<id>[0-9A-Z]{16})
''' '''
_TESTS = [{ _TESTS = [{
# full episode # full episode
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html', 'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d', 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
'info_dict': { 'info_dict': {
'id': '661824', 'id': 'FAFU000000661824',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Quarta puntata', 'title': 'Quarta puntata',
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1414, 'duration': 1414.26,
'creator': 'mediaset',
'upload_date': '20161107', 'upload_date': '20161107',
'series': 'Hello Goodbye', 'series': 'Hello Goodbye',
'categories': ['reality'], 'timestamp': 1478532900,
'uploader': 'Rete 4',
'uploader_id': 'R4',
}, },
'expected_warnings': ['is not a supported codec'],
}, { }, {
'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html', 'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
'md5': '1276f966ac423d16ba255ce867de073e', 'md5': '288532f0ad18307705b01e581304cd7b',
'info_dict': { 'info_dict': {
'id': '846685', 'id': 'F309013801000501',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Puntata del 25 maggio', 'title': 'Puntata del 25 maggio',
'description': 'md5:ee2e456e3eb1dba5e814596655bb5296', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 6565, 'duration': 6565.007,
'creator': 'mediaset', 'upload_date': '20180526',
'upload_date': '20180525',
'series': 'Matrix', 'series': 'Matrix',
'categories': ['infotainment'], 'timestamp': 1527326245,
'uploader': 'Canale 5',
'uploader_id': 'C5',
}, },
'expected_warnings': ['HTTP Error 403: Forbidden'], 'expected_warnings': ['HTTP Error 403: Forbidden'],
}, { }, {
# clip # clip
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html', 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
'only_matching': True, 'only_matching': True,
}, { }, {
# iframe simple # iframe simple
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true', 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
'only_matching': True, 'only_matching': True,
}, { }, {
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true', 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'mediaset:661824', 'url': 'mediaset:FAFU000000665924',
'only_matching': True, 'only_matching': True,
}] }]
@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) guid = self._match_id(url)
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
video = self._download_json( info = self._extract_theplatform_metadata(tp_path, guid)
'https://www.video.mediaset.it/html/metainfo.sjson',
video_id, 'Downloading media info', query={
'id': video_id
})['video']
title = video['title']
media_id = video.get('guid') or video_id
video_list = self._download_json(
'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
video_id, 'Downloading video CDN JSON', query={
'streamid': media_id,
'format': 'json',
})['videoList']
formats = [] formats = []
for format_url in video_list: subtitles = {}
ext = determine_ext(format_url) first_e = None
if ext == 'm3u8': for asset_type in ('SD', 'HD'):
formats.extend(self._extract_m3u8_formats( for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
format_url, video_id, 'mp4', entry_protocol='m3u8_native', try:
m3u8_id='hls', fatal=False)) tp_formats, tp_subtitles = self._extract_theplatform_smil(
elif ext == 'mpd': update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
formats.extend(self._extract_mpd_formats( 'mbr': 'true',
format_url, video_id, mpd_id='dash', fatal=False)) 'formats': f,
elif ext == 'ism' or '.ism' in format_url: 'assetTypes': asset_type,
formats.extend(self._extract_ism_formats( }), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
format_url, video_id, ism_id='mss', fatal=False)) except ExtractorError as e:
else: if not first_e:
formats.append({ first_e = e
'url': format_url, break
'format_id': determine_ext(format_url), for tp_f in tp_formats:
}) tp_f['quality'] = 1 if asset_type == 'HD' else 0
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if first_e and not formats:
raise first_e
self._sort_formats(formats) self._sort_formats(formats)
creator = try_get( fields = []
video, lambda x: x['brand-info']['publisher'], compat_str) for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
category = try_get( fields.extend(templ % repl for repl in repls)
video, lambda x: x['brand-info']['category'], compat_str) feed_data = self._download_json(
categories = [category] if category else None 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
guid, fatal=False, query={'fields': ','.join(fields)})
if feed_data:
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
info.update({
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
'series': feed_data.get('mediasetprogram$brandTitle'),
'uploader': publish_info.get('description'),
'uploader_id': publish_info.get('channel'),
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
})
return { info.update({
'id': video_id, 'id': guid,
'title': title,
'description': video.get('short-description'),
'thumbnail': video.get('thumbnail'),
'duration': parse_duration(video.get('duration')),
'creator': creator,
'upload_date': unified_strdate(video.get('production-date')),
'webpage_url': video.get('url'),
'series': video.get('brand-value'),
'season': video.get('season'),
'categories': categories,
'formats': formats, 'formats': formats,
} 'subtitles': subtitles,
})
return info

View File

@ -15,6 +15,7 @@ from ..utils import (
mimetype2ext, mimetype2ext,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
url_or_none,
urljoin, urljoin,
) )
@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
stream_formats = [] stream_formats = []
for unum, VideoUrl in enumerate(video_urls): for unum, VideoUrl in enumerate(video_urls):
video_url = VideoUrl.get('Location') video_url = url_or_none(VideoUrl.get('Location'))
if not video_url or not isinstance(video_url, compat_str): if not video_url:
continue continue
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS

View File

@ -1,84 +1,14 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
extract_attributes,
determine_ext,
smuggle_url, smuggle_url,
parse_duration, parse_duration,
) )
class MiTeleBaseIE(InfoExtractor):
def _get_player_info(self, url, webpage):
player_data = extract_attributes(self._search_regex(
r'(?s)(<ms-video-player.+?</ms-video-player>)',
webpage, 'ms video player'))
video_id = player_data['data-media-id']
if player_data.get('data-cms-id') == 'ooyala':
return self.url_result(
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
config = self._download_json(
config_url, video_id, 'Downloading config JSON')
mmc_url = config['services']['mmc']
duration = None
formats = []
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
mmc = self._download_json(
m_url, video_id, 'Downloading mmc JSON')
if not duration:
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
gcp = location.get('gcp')
ogn = location.get('ogn')
if None in (gat, gcp, ogn):
continue
token_data = {
'gcp': gcp,
'ogn': ogn,
'sta': 0,
}
media = self._download_json(
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
})
stream = media.get('stream') or media.get('file')
if not stream:
continue
ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
stream, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
'duration': duration,
}
class MiTeleIE(InfoExtractor): class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': { 'info_dict': {
'id': '57b0dfb9c715da65618b4afa', 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tor, la web invisible', 'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
# no explicit title # no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': { 'info_dict': {
'id': '57b0de3dc915da14058b4876', 'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Cuarto Milenio Temporada 6 Programa 226', 'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
gigya_url = self._search_regex(
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
appKey = self._search_regex(
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
gigya_sc, 'appKey')
session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session',
video_id, 'Downloading session keys', query={
'appKey': appKey,
'uuid': compat_str(uuid.uuid4()),
})
paths = self._download_json( paths = self._download_json(
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration', 'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
video_id, 'Downloading paths JSON', video_id, 'Downloading paths JSON')
query={'sessionKey': compat_str(session_json['sessionKey'])})
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
source = self._download_json( source = self._download_json(
'http://%s%s%s/docs/%s' % ( '%s://%s%s%s/docs/%s' % (
ooyala_s['base_url'], ooyala_s['full_path'], ooyala_s.get('protocol', 'https'), base_url, full_path,
ooyala_s['provider_id'], video_id), ooyala_s.get('provider_id', '104951'), video_id),
video_id, 'Downloading data JSON', query={ video_id, 'Downloading data JSON', query={
'include_titles': 'Series,Season', 'include_titles': 'Series,Season',
'product_name': 'test', 'product_name': ooyala_s.get('product_name', 'test'),
'format': 'full', 'format': 'full',
})['hits']['hits'][0]['_source'] })['hits']['hits'][0]['_source']

View File

@ -6,28 +6,90 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none,
js_to_json,
qualities,
unified_strdate, unified_strdate,
url_or_none,
) )
class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
'md5': 'b3834f6de5401baabf31ed57456463f7',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
'title': '2180. díl',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2578,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
bitrates = self._parse_json(
self._search_regex(
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
formats = []
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
continue
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
f = {
'url': format_url,
}
f_id = format_id
for quality in QUALITIES:
if '%s.mp4' % quality in format_url:
f_id += '-%s' % quality
f.update({
'quality': quality_key(quality),
'format_note': quality.upper(),
})
break
f['format_id'] = f_id
formats.append(f)
self._sort_formats(formats)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
(r'<value>(?P<title>[^<]+)',
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'title', group='value')
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex(
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='value')
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
class NovaIE(InfoExtractor): class NovaIE(InfoExtractor):
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
'info_dict': {
'id': '1608920',
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
'ext': 'flv',
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
'info_dict': { 'info_dict': {
@ -38,33 +100,6 @@ class NovaIE(InfoExtractor):
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
'thumbnail': r're:^https?://.*\.(?:jpg)', 'thumbnail': r're:^https?://.*\.(?:jpg)',
} }
}, {
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
'info_dict': {
'id': '1756825',
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
'ext': 'flv',
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
'info_dict': {
'id': '1756858',
'ext': 'flv',
'title': 'Televizní noviny - 30. 5. 2015',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'upload_date': '20150530',
},
'params': {
# rtmp download
'skip_download': True,
}
}, { }, {
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
'info_dict': { 'info_dict': {
@ -79,6 +114,20 @@ class NovaIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
} }
}, {
# media.cms.nova.cz embed
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
'title': '2180. díl',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2578,
},
'params': {
'skip_download': True,
},
'add_ie': [NovaEmbedIE.ie_key()],
}, { }, {
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
'only_matching': True, 'only_matching': True,
@ -103,6 +152,15 @@ class NovaIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
# novaplus
embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
webpage, 'embed url', default=None)
if embed_id:
return self.url_result(
'https://media.cms.nova.cz/embed/%s' % embed_id,
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
video_id = self._search_regex( video_id = self._search_regex(
[r"(?:media|video_id)\s*:\s*'(\d+)'", [r"(?:media|video_id)\s*:\s*'(\d+)'",
r'media=(\d+)', r'media=(\d+)',
@ -111,8 +169,21 @@ class NovaIE(InfoExtractor):
webpage, 'video id') webpage, 'video id')
config_url = self._search_regex( config_url = self._search_regex(
r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
webpage, 'config url', default=None) webpage, 'config url', default=None)
config_params = {}
if not config_url:
player = self._parse_json(
self._search_regex(
r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
'player', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if player:
config_url = url_or_none(player.get('configUrl'))
params = player.get('configParams')
if isinstance(params, dict):
config_params = params
if not config_url: if not config_url:
DEFAULT_SITE_ID = '23000' DEFAULT_SITE_ID = '23000'
@ -127,14 +198,20 @@ class NovaIE(InfoExtractor):
} }
site_id = self._search_regex( site_id = self._search_regex(
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID) r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
site, DEFAULT_SITE_ID)
config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig' config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
% (site_id, video_id)) config_params = {
'site': site_id,
'media': video_id,
'quality': 3,
'version': 1,
}
config = self._download_json( config = self._download_json(
config_url, display_id, config_url, display_id,
'Downloading config JSON', 'Downloading config JSON', query=config_params,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
mediafile = config['mediafile'] mediafile = config['mediafile']

View File

@ -15,6 +15,7 @@ from ..utils import (
strip_jsonp, strip_jsonp,
strip_or_none, strip_or_none,
unified_strdate, unified_strdate,
url_or_none,
US_RATINGS, US_RATINGS,
) )
@ -557,6 +558,13 @@ class PBSIE(InfoExtractor):
if redirect_url and redirect_url not in redirect_urls: if redirect_url and redirect_url not in redirect_urls:
redirects.append(redirect) redirects.append(redirect)
redirect_urls.add(redirect_url) redirect_urls.add(redirect_url)
encodings = info.get('encodings')
if isinstance(encodings, list):
for encoding in encodings:
encoding_url = url_or_none(encoding)
if encoding_url and encoding_url not in redirect_urls:
redirects.append({'url': encoding_url})
redirect_urls.add(encoding_url)
chapters = [] chapters = []
# Player pages may also serve different qualities # Player pages may also serve different qualities

View File

@ -10,6 +10,7 @@ from ..utils import (
parse_resolution, parse_resolution,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none,
urljoin, urljoin,
) )
@ -200,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
for file_ in video['files']: for file_ in video['files']:
if not isinstance(file_, dict): if not isinstance(file_, dict):
continue continue
file_url = file_.get('fileUrl') file_url = url_or_none(file_.get('fileUrl'))
if not file_url or not isinstance(file_url, compat_str): if not file_url:
continue continue
file_size = int_or_none(file_.get('size')) file_size = int_or_none(file_.get('size'))
format_id = try_get( format_id = try_get(

View File

@ -18,6 +18,7 @@ from ..utils import (
orderedSet, orderedSet,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
url_or_none,
) )
@ -68,6 +69,31 @@ class PornHubIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# subtitles
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
'info_dict': {
'id': 'ph5af5fef7c2aa7',
'ext': 'mp4',
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
'uploader': 'BFFs',
'duration': 622,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 18,
'tags': list,
'categories': list,
'subtitles': {
'en': [{
"ext": 'srt'
}]
},
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True, 'only_matching': True,
@ -139,12 +165,19 @@ class PornHubIE(InfoExtractor):
video_urls = [] video_urls = []
video_urls_set = set() video_urls_set = set()
subtitles = {}
flashvars = self._parse_json( flashvars = self._parse_json(
self._search_regex( self._search_regex(
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
video_id) video_id)
if flashvars: if flashvars:
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
if subtitle_url:
subtitles.setdefault('en', []).append({
'url': subtitle_url,
'ext': 'srt',
})
thumbnail = flashvars.get('image_url') thumbnail = flashvars.get('image_url')
duration = int_or_none(flashvars.get('video_duration')) duration = int_or_none(flashvars.get('video_duration'))
media_definitions = flashvars.get('mediaDefinitions') media_definitions = flashvars.get('mediaDefinitions')
@ -256,6 +289,7 @@ class PornHubIE(InfoExtractor):
'age_limit': 18, 'age_limit': 18,
'tags': tags, 'tags': tags,
'categories': categories, 'categories': categories,
'subtitles': subtitles,
} }

View File

@ -0,0 +1,247 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
parse_resolution,
str_or_none,
try_get,
unified_timestamp,
url_or_none,
urljoin,
)
class PuhuTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
IE_NAME = 'puhutv'
_TESTS = [{
# film
'url': 'https://puhutv.com/sut-kardesler-izle',
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
'info_dict': {
'id': '5085',
'display_id': 'sut-kardesler',
'ext': 'mp4',
'title': 'Süt Kardeşler',
'description': 'md5:405fd024df916ca16731114eb18e511a',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4832.44,
'creator': 'Arzu Film',
'timestamp': 1469778212,
'upload_date': '20160729',
'release_year': 1976,
'view_count': int,
'tags': ['Aile', 'Komedi', 'Klasikler'],
},
}, {
# episode, geo restricted, bypassable with --geo-verification-proxy
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
'only_matching': True,
}, {
# 4k, with subtitles
'url': 'https://puhutv.com/dip-1-bolum-izle',
'only_matching': True,
}]
_SUBTITLE_LANGS = {
'English': 'en',
'Deutsch': 'de',
'عربى': 'ar'
}
def _real_extract(self, url):
display_id = self._match_id(url)
info = self._download_json(
urljoin(url, '/api/slug/%s-izle' % display_id),
display_id)['data']
video_id = compat_str(info['id'])
title = info.get('name') or info['title']['name']
if info.get('display_name'):
title = '%s %s' % (title, info.get('display_name'))
try:
videos = self._download_json(
'https://puhutv.com/api/assets/%s/videos' % video_id,
display_id, 'Downloading video JSON',
headers=self.geo_verification_headers())
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_geo_restricted()
raise
formats = []
for video in videos['data']['videos']:
media_url = url_or_none(video.get('url'))
if not media_url:
continue
playlist = video.get('is_playlist')
if video.get('stream_type') == 'hls' and playlist is True:
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
quality = int_or_none(video.get('quality'))
f = {
'url': media_url,
'ext': 'mp4',
'height': quality
}
video_format = video.get('video_format')
if video_format == 'hls' and playlist is False:
format_id = 'hls'
f['protocol'] = 'm3u8_native'
elif video_format == 'mp4':
format_id = 'http'
else:
continue
if quality:
format_id += '-%sp' % quality
f['format_id'] = format_id
formats.append(f)
self._sort_formats(formats)
description = try_get(
info, lambda x: x['title']['description'],
compat_str) or info.get('description')
timestamp = unified_timestamp(info.get('created_at'))
creator = try_get(
info, lambda x: x['title']['producer']['name'], compat_str)
duration = float_or_none(
try_get(info, lambda x: x['content']['duration_in_ms'], int),
scale=1000)
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
images = try_get(
info, lambda x: x['content']['images']['wide'], dict) or {}
thumbnails = []
for image_id, image_url in images.items():
if not isinstance(image_url, compat_str):
continue
if not image_url.startswith(('http', '//')):
image_url = 'https://%s' % image_url
t = parse_resolution(image_id)
t.update({
'id': image_id,
'url': image_url
})
thumbnails.append(t)
release_year = try_get(info, lambda x: x['title']['released_at'], int)
season_number = int_or_none(info.get('season_number'))
season_id = str_or_none(info.get('season_id'))
episode_number = int_or_none(info.get('episode_number'))
tags = []
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
if not isinstance(genre, dict):
continue
genre_name = genre.get('name')
if genre_name and isinstance(genre_name, compat_str):
tags.append(genre_name)
subtitles = {}
for subtitle in try_get(
info, lambda x: x['content']['subtitles'], list) or []:
if not isinstance(subtitle, dict):
continue
lang = subtitle.get('language')
sub_url = url_or_none(subtitle.get('url'))
if not lang or not isinstance(lang, compat_str) or not sub_url:
continue
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
'url': sub_url
}]
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'season_id': season_id,
'season_number': season_number,
'episode_number': episode_number,
'release_year': release_year,
'timestamp': timestamp,
'creator': creator,
'view_count': view_count,
'duration': duration,
'tags': tags,
'subtitles': subtitles,
'thumbnails': thumbnails,
'formats': formats
}
class PuhuTVSerieIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
IE_NAME = 'puhutv:serie'
_TESTS = [{
'url': 'https://puhutv.com/deniz-yildizi-detay',
'info_dict': {
'title': 'Deniz Yıldızı',
'id': 'deniz-yildizi',
},
'playlist_mincount': 205,
}, {
# a film detail page which is using same url with serie page
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
'only_matching': True,
}]
def _extract_entries(self, seasons):
for season in seasons:
season_id = season.get('id')
if not season_id:
continue
page = 1
has_more = True
while has_more is True:
season = self._download_json(
'https://galadriel.puhutv.com/seasons/%s' % season_id,
season_id, 'Downloading page %s' % page, query={
'page': page,
'per': 40,
})
episodes = season.get('episodes')
if isinstance(episodes, list):
for ep in episodes:
slug_path = str_or_none(ep.get('slugPath'))
if not slug_path:
continue
video_id = str_or_none(int_or_none(ep.get('id')))
yield self.url_result(
'https://puhutv.com/%s' % slug_path,
ie=PuhuTVIE.ie_key(), video_id=video_id,
video_title=ep.get('name') or ep.get('eventLabel'))
page += 1
has_more = season.get('hasMore')
def _real_extract(self, url):
playlist_id = self._match_id(url)
info = self._download_json(
urljoin(url, '/api/slug/%s-detay' % playlist_id),
playlist_id)['data']
seasons = info.get('seasons')
if seasons:
return self.playlist_result(
self._extract_entries(seasons), playlist_id, info.get('name'))
# For films, these are using same url with series
video_id = info.get('slug') or info['assets'][0]['slug']
return self.url_result(
'https://puhutv.com/%s-izle' % video_id,
PuhuTVIE.ie_key(), video_id)

View File

@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
_GEO_BYPASS = False _GEO_BYPASS = False
def _extract_relinker_info(self, relinker_url, video_id): def _extract_relinker_info(self, relinker_url, video_id):
if not re.match(r'https?://', relinker_url):
return {'formats': [{'url': relinker_url}]}
formats = [] formats = []
geoprotection = None geoprotection = None
is_live = None is_live = None
@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
'only_matching': True,
}] }]
def _extract_from_content_id(self, content_id, url): def _extract_from_content_id(self, content_id, url):

View File

@ -4,24 +4,37 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .vimeo import VimeoIE from .vimeo import VimeoIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
extract_attributes,
ExtractorError, ExtractorError,
smuggle_url, int_or_none,
unsmuggle_url, merge_dicts,
try_get,
unescapeHTML,
unified_timestamp,
urljoin, urljoin,
) )
class RayWenderlichIE(InfoExtractor): class RayWenderlichIE(InfoExtractor):
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:
videos\.raywenderlich\.com/courses|
(?:www\.)?raywenderlich\.com
)/
(?P<course_id>[^/]+)/lessons/(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
'info_dict': { 'info_dict': {
'id': '248377018', 'id': '248377018',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Testing In iOS Episode 1: Introduction', 'title': 'Introduction',
'description': 'md5:804d031b3efa9fcb49777d512d74f722',
'timestamp': 1513906277,
'upload_date': '20171222',
'duration': 133, 'duration': 133,
'uploader': 'Ray Wenderlich', 'uploader': 'Ray Wenderlich',
'uploader_id': 'user3304672', 'uploader_id': 'user3304672',
@ -34,69 +47,133 @@ class RayWenderlichIE(InfoExtractor):
'expected_warnings': ['HTTP Error 403: Forbidden'], 'expected_warnings': ['HTTP Error 403: Forbidden'],
}, { }, {
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
'only_matching': True,
}]
@staticmethod
def _extract_video_id(data, lesson_id):
if not data:
return
groups = try_get(data, lambda x: x['groups'], list) or []
if not groups:
return
for group in groups:
if not isinstance(group, dict):
continue
contents = try_get(data, lambda x: x['contents'], list) or []
for content in contents:
if not isinstance(content, dict):
continue
ordinal = int_or_none(content.get('ordinal'))
if ordinal != lesson_id:
continue
video_id = content.get('identifier')
if video_id:
return compat_str(video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id, lesson_id = mobj.group('course_id', 'id')
display_id = '%s/%s' % (course_id, lesson_id)
webpage = self._download_webpage(url, display_id)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'twitter:image', webpage, 'thumbnail')
if '>Subscribe to unlock' in webpage:
raise ExtractorError(
'This content is only available for subscribers',
expected=True)
info = {
'thumbnail': thumbnail,
}
vimeo_id = self._search_regex(
r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
if not vimeo_id:
data = self._parse_json(
self._search_regex(
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
'data collection', default='{}', group='data'),
display_id, transform_source=unescapeHTML, fatal=False)
video_id = self._extract_video_id(
data, lesson_id) or self._search_regex(
r'/videos/(\d+)/', thumbnail, 'video id')
headers = {
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
}
csrf_token = self._html_search_meta(
'csrf-token', webpage, 'csrf token', default=None)
if csrf_token:
headers['X-CSRF-Token'] = csrf_token
video = self._download_json(
'https://videos.raywenderlich.com/api/v1/videos/%s.json'
% video_id, display_id, headers=headers)['video']
vimeo_id = video['clips'][0]['provider_id']
info.update({
'_type': 'url_transparent',
'title': video.get('name'),
'description': video.get('description') or video.get(
'meta_description'),
'duration': int_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('created_at')),
})
return merge_dicts(info, self.url_result(
VimeoIE._smuggle_referrer(
'https://player.vimeo.com/video/%s' % vimeo_id, url),
ie=VimeoIE.ie_key(), video_id=vimeo_id))
class RayWenderlichCourseIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
videos\.raywenderlich\.com/courses|
(?:www\.)?raywenderlich\.com
)/
(?P<id>[^/]+)
'''
_TEST = {
'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
'info_dict': { 'info_dict': {
'title': 'Testing in iOS', 'title': 'Testing in iOS',
'id': '105-testing-in-ios', 'id': '3530-testing-in-ios',
}, },
'params': { 'params': {
'noplaylist': False, 'noplaylist': False,
}, },
'playlist_count': 29, 'playlist_count': 29,
}] }
@classmethod
def suitable(cls, url):
return False if RayWenderlichIE.suitable(url) else super(
RayWenderlichCourseIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) course_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url) webpage = self._download_webpage(url, course_id)
course_id, lesson_id = mobj.group('course_id', 'id')
video_id = '%s/%s' % (course_id, lesson_id)
webpage = self._download_webpage(url, video_id)
no_playlist = self._downloader.params.get('noplaylist')
if no_playlist or smuggled_data.get('force_video', False):
if no_playlist:
self.to_screen(
'Downloading just video %s because of --no-playlist'
% video_id)
if '>Subscribe to unlock' in webpage:
raise ExtractorError(
'This content is only available for subscribers',
expected=True)
vimeo_id = self._search_regex(
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
return self.url_result(
VimeoIE._smuggle_referrer(
'https://player.vimeo.com/video/%s' % vimeo_id, url),
ie=VimeoIE.ie_key(), video_id=vimeo_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% course_id)
lesson_ids = set((lesson_id, ))
for lesson in re.findall(
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
attrs = extract_attributes(lesson)
if not attrs:
continue
lesson_url = attrs.get('href')
if not lesson_url:
continue
lesson_id = self._search_regex(
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
if not lesson_id:
continue
lesson_ids.add(lesson_id)
entries = [] entries = []
for lesson_id in sorted(lesson_ids): lesson_urls = set()
for lesson_url in re.findall(
r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
if lesson_url in lesson_urls:
continue
lesson_urls.add(lesson_url)
entries.append(self.url_result( entries.append(self.url_result(
smuggle_url(urljoin(url, lesson_id), {'force_video': True}), urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
ie=RayWenderlichIE.ie_key()))
title = self._search_regex( title = self._og_search_title(
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title', webpage, default=None) or self._html_search_meta(
default=None) 'twitter:title', webpage, 'title', default=None)
return self.playlist_result(entries, course_id, title) return self.playlist_result(entries, course_id, title)

View File

@ -10,7 +10,7 @@ from ..utils import (
class RedBullTVIE(InfoExtractor): class RedBullTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)' _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
_TESTS = [{ _TESTS = [{
# film # film
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
@ -35,6 +35,9 @@ class RedBullTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -3,12 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
str_to_int, str_to_int,
unified_strdate, unified_strdate,
url_or_none,
) )
@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
video_id, fatal=False) video_id, fatal=False)
if medias and isinstance(medias, list): if medias and isinstance(medias, list):
for media in medias: for media in medias:
format_url = media.get('videoUrl') format_url = url_or_none(media.get('videoUrl'))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
continue continue
format_id = media.get('quality') format_id = media.get('quality')
formats.append({ formats.append({

View File

@ -6,6 +6,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
url_or_none,
) )
@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
title = config['title'] title = config['title']
formats = [] formats = []
for video in config['src']: for video in config['src']:
src = video.get('src') src = url_or_none(video.get('src'))
if not src or not isinstance(src, compat_str): if not src:
continue continue
ext = determine_ext(src) ext = determine_ext(src)
if ext == 'm3u8': if ext == 'm3u8':

View File

@ -16,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none,
) )
@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
break break
for result in results: for result in results:
video_url = result.get('video_url') video_url = url_or_none(result.get('video_url'))
if not video_url or not isinstance(video_url, compat_str): if not video_url:
continue continue
entry = self._extract_video(result, require_title=False) entry = self._extract_video(result, require_title=False)
entry.update({ entry.update({

View File

@ -19,7 +19,7 @@ from ..utils import (
class SixPlayIE(InfoExtractor): class SixPlayIE(InfoExtractor):
IE_NAME = '6play' IE_NAME = '6play'
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)' _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
'md5': '31fcd112637baa0c2ab92c4fcd8baf27', 'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
@ -32,6 +32,9 @@ class SixPlayIE(InfoExtractor):
}, { }, {
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869', 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -39,6 +42,7 @@ class SixPlayIE(InfoExtractor):
service, consumer_name = { service, consumer_name = {
'6play.fr': ('6play', 'm6web'), '6play.fr': ('6play', 'm6web'),
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
}.get(domain, ('6play', 'm6web')) }.get(domain, ('6play', 'm6web'))
data = self._download_json( data = self._download_json(

View File

@ -1,12 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class SlutloadIE(InfoExtractor): class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '868309628ba00fd488cf516a113fd717', 'md5': '868309628ba00fd488cf516a113fd717',
@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
'title': 'virginie baisee en cam', 'title': 'virginie baisee en cam',
'age_limit': 18, 'age_limit': 18,
'thumbnail': r're:https?://.*?\.jpg' 'thumbnail': r're:https?://.*?\.jpg'
} },
}, { }, {
# mobile site # mobile site
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
'only_matching': True,
}, {
'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url) embed_page = self._download_webpage(
webpage = self._download_webpage(desktop_url, video_id) 'http://www.slutload.com/embed_player/%s' % video_id, video_id,
'Downloading embed page', fatal=False)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', if embed_page:
webpage, 'title').strip() def extract(what):
return self._html_search_regex(
r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
embed_page, 'video %s' % what, default=None, group='url')
video_url = self._html_search_regex( video_url = extract('url')
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', if video_url:
webpage, 'video URL') title = self._html_search_regex(
thumbnail = self._html_search_regex( r'<title>([^<]+)', embed_page, 'title', default=video_id)
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"', return {
webpage, 'thumbnail', fatal=False) 'id': video_id,
'url': video_url,
'title': title,
'thumbnail': extract('preview'),
'age_limit': 18
}
return { webpage = self._download_webpage(
'http://www.slutload.com/video/_/%s/' % video_id, video_id)
title = self._html_search_regex(
r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
info.update({
'id': video_id, 'id': video_id,
'url': video_url, 'title': title,
'title': video_title, 'age_limit': 18,
'thumbnail': thumbnail, })
'age_limit': 18 return info
}

View File

@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
'title': title, 'title': title,
'url': video_url, 'url': video_url,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'http_headers': {
'Referer': url,
},
} }

View File

@ -7,8 +7,10 @@ from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
float_or_none,
int_or_none, int_or_none,
try_get, try_get,
url_or_none,
) )
@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
'md5': '0de43ac406aa3e4ea74b66c9c7789b13', 'md5': 'b0ce2b05ca215042124fbc9e3886493a',
'info_dict': { 'info_dict': {
'id': '102', 'id': '102',
'ext': 'mp4', 'ext': 'mp4',
@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
'uploader': 'Dan Dennett', 'uploader': 'Dan Dennett',
'width': 853, 'width': 853,
'duration': 1308, 'duration': 1308,
} 'view_count': int,
}, { 'comment_count': int,
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', 'tags': list,
'md5': 'b899ac15e345fb39534d913f7606082b', },
'info_dict': { 'params': {
'id': 'tSVI8ta_P4w', 'skip_download': True,
'ext': 'mp4', },
'title': 'Vishal Sikka: The beauty and power of algorithms', }, {
'thumbnail': r're:^https?://.+\.jpg', # missing HTTP bitrates
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4', 'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
'upload_date': '20140122', 'info_dict': {
'uploader_id': 'TEDInstitute', 'id': '6069',
'uploader': 'TED Institute', 'ext': 'mp4',
'title': 'The beauty and power of algorithms',
'thumbnail': r're:^https?://.+\.jpg',
'description': 'md5:734e352710fb00d840ab87ae31aaf688',
'uploader': 'Vishal Sikka',
},
'params': {
'skip_download': True,
}, },
'add_ie': ['Youtube'],
}, { }, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best', 'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
'md5': '71b3ab2f4233012dce09d515c9c39ce2', 'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
'info_dict': { 'info_dict': {
'id': '1972', 'id': '1972',
'ext': 'mp4', 'ext': 'mp4',
@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
'description': 'md5:5174aed4d0f16021b704120360f72b92', 'description': 'md5:5174aed4d0f16021b704120360f72b92',
'duration': 1128, 'duration': 1128,
}, },
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.ted.com/playlists/who_are_the_hackers', 'url': 'http://www.ted.com/playlists/who_are_the_hackers',
'info_dict': { 'info_dict': {
@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# YouTube video # no nativeDownloads
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', 'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
'add_ie': ['Youtube'],
'info_dict': { 'info_dict': {
'id': 'aFBIPO-P7LM', 'id': '1792',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', 'title': 'The orchestra in my mouth',
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', 'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
'uploader': 'TEDx Talks', 'uploader': 'Tom Thum',
'uploader_id': 'TEDxTalks', 'view_count': int,
'upload_date': '20111216', 'comment_count': int,
'tags': list,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
info = self._extract_info(webpage) info = self._extract_info(webpage)
talk_info = try_get( data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
info, lambda x: x['__INITIAL_DATA__']['talks'][0], talk_info = data['talks'][0]
dict) or info['talks'][0]
title = talk_info['title'].strip() title = talk_info['title'].strip()
external = talk_info.get('external')
if external:
service = external['service']
self.to_screen('Found video from %s' % service)
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return {
'_type': 'url',
'url': ext_url or external['uri'],
}
native_downloads = try_get( native_downloads = try_get(
talk_info, lambda x: x['downloads']['nativeDownloads'], talk_info,
dict) or talk_info['nativeDownloads'] (lambda x: x['downloads']['nativeDownloads'],
lambda x: x['nativeDownloads']),
dict) or {}
formats = [{ formats = [{
'url': format_url, 'url': format_url,
@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
player_talk = talk_info['player_talks'][0] player_talk = talk_info['player_talks'][0]
external = player_talk.get('external')
if isinstance(external, dict):
service = external.get('service')
if isinstance(service, compat_str):
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return {
'_type': 'url',
'url': ext_url or external['uri'],
}
resources_ = player_talk.get('resources') or talk_info.get('resources') resources_ = player_talk.get('resources') or talk_info.get('resources')
http_url = None http_url = None
for format_id, resources in resources_.items(): for format_id, resources in resources_.items():
if not isinstance(resources, dict):
continue
if format_id == 'h264': if format_id == 'h264':
for resource in resources: for resource in resources:
h264_url = resource.get('file') h264_url = resource.get('file')
@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
'tbr': int_or_none(resource.get('bitrate')), 'tbr': int_or_none(resource.get('bitrate')),
}) })
elif format_id == 'hls': elif format_id == 'hls':
stream_url = url_or_none(resources.get('stream'))
if not stream_url:
continue
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False)) stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
m3u8_formats = list(filter( m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
if not bitrate: if not bitrate:
continue continue
bitrate_url = re.sub(r'\d+k', bitrate, http_url)
if not self._is_valid_url(
bitrate_url, video_name, '%s bitrate' % bitrate):
continue
f = m3u8_format.copy() f = m3u8_format.copy()
f.update({ f.update({
'url': re.sub(r'\d+k', bitrate, http_url), 'url': bitrate_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http', 'protocol': 'http',
}) })
@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'subtitles': self._get_subtitles(video_id, talk_info), 'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats, 'formats': formats,
'duration': talk_info.get('duration'), 'duration': float_or_none(talk_info.get('duration')),
'view_count': int_or_none(data.get('viewed_count')),
'comment_count': int_or_none(
try_get(data, lambda x: x['comments']['count'])),
'tags': try_get(talk_info, lambda x: x['tags'], list),
} }
def _get_subtitles(self, video_id, talk_info): def _get_subtitles(self, video_id, talk_info):

View File

@ -1,26 +1,43 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .mitele import MiTeleBaseIE import json
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import (
clean_html,
determine_ext,
int_or_none,
str_or_none,
urljoin,
)
class TelecincoIE(MiTeleBaseIE): class TelecincoIE(InfoExtractor):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
'info_dict': { 'info_dict': {
'id': 'JEA5ijCnF6p5W08A1rNKn7', 'id': '1876350223',
'ext': 'mp4',
'title': 'Bacalao con kokotxas al pil-pil', 'title': 'Bacalao con kokotxas al pil-pil',
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb', 'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
'duration': 662,
}, },
'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7',
'info_dict': {
'id': 'JEA5ijCnF6p5W08A1rNKn7',
'ext': 'mp4',
'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
'duration': 662,
},
}]
}, { }, {
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
'md5': '284393e5387b3b947b77c613ef04749a', 'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
'info_dict': { 'info_dict': {
'id': 'jn24Od1zGLG4XUZcnUnZB6', 'id': 'jn24Od1zGLG4XUZcnUnZB6',
'ext': 'mp4', 'ext': 'mp4',
@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
}, },
}, { }, {
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
'md5': '749afab6ea5a136a8806855166ae46a2', 'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
'info_dict': { 'info_dict': {
'id': 'aywerkD2Sv1vGNqq9b85Q2', 'id': 'aywerkD2Sv1vGNqq9b85Q2',
'ext': 'mp4', 'ext': 'mp4',
@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _parse_content(self, content, url):
video_id = content['dataMediaId']
if content.get('dataCmsId') == 'ooyala':
return self.url_result(
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
config_url = urljoin(url, content['dataConfig'])
config = self._download_json(
config_url, video_id, 'Downloading config JSON')
title = config['info']['title']
def mmc_url(mmc_type):
return re.sub(
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
config['services']['mmc'])
duration = None
formats = []
for mmc_type in ('flash', 'html5'):
mmc = self._download_json(
mmc_url(mmc_type), video_id,
'Downloading %s mmc JSON' % mmc_type, fatal=False)
if not mmc:
continue
if not duration:
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
gcp = location.get('gcp')
ogn = location.get('ogn')
if None in (gat, gcp, ogn):
continue
token_data = {
'gcp': gcp,
'ogn': ogn,
'sta': 0,
}
media = self._download_json(
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
}, fatal=False) or {}
stream = media.get('stream') or media.get('file')
if not stream:
continue
ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
stream, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
'duration': duration,
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
title = self._html_search_meta( article = self._parse_json(self._search_regex(
['og:title', 'twitter:title'], webpage, 'title') r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
info = self._get_player_info(url, webpage) webpage, 'article'), display_id)['article']
title = article.get('title')
description = clean_html(article.get('leadParagraph'))
if article.get('editorialType') != 'VID':
entries = []
for p in article.get('body', []):
content = p.get('content')
if p.get('type') != 'video' or not content:
continue
entries.append(self._parse_content(content, url))
return self.playlist_result(
entries, str_or_none(article.get('id')), title, description)
content = article['opening']['content']
info = self._parse_content(content, url)
info.update({ info.update({
'display_id': display_id, 'description': description,
'title': title,
'description': self._html_search_meta(
['og:description', 'twitter:description'],
webpage, 'title', fatal=False),
}) })
return info return info

View File

@ -32,13 +32,15 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(OnceIE): class ThePlatformBaseIE(OnceIE):
_TP_TLD = 'com'
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'): def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml( meta = self._download_xml(
smil_url, video_id, note=note, query={'format': 'SMIL'}, smil_url, video_id, note=note, query={'format': 'SMIL'},
headers=self.geo_verification_headers()) headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith( if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'): 'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
raise ExtractorError(error_element.attrib['abstract'], expected=True) raise ExtractorError(error_element.attrib['abstract'], expected=True)
smil_formats = self._parse_smil_formats( smil_formats = self._parse_smil_formats(
@ -66,7 +68,7 @@ class ThePlatformBaseIE(OnceIE):
return formats, subtitles return formats, subtitles
def _download_theplatform_metadata(self, path, video_id): def _download_theplatform_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
return self._download_json(info_url, video_id) return self._download_json(info_url, video_id)
def _parse_theplatform_metadata(self, info): def _parse_theplatform_metadata(self, info):
@ -308,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
class ThePlatformFeedIE(ThePlatformBaseIE): class ThePlatformFeedIE(ThePlatformBaseIE):
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s' _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))' _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
_TESTS = [{ _TESTS = [{
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', 'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
@ -325,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], 'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
'uploader': 'NBCU-NEWS', 'uploader': 'NBCU-NEWS',
}, },
}, {
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
'only_matching': True,
}] }]
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None): def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):

View File

@ -15,6 +15,7 @@ from ..utils import (
update_url_query, update_url_query,
ExtractorError, ExtractorError,
strip_or_none, strip_or_none,
url_or_none,
) )
@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
subtitles = {} subtitles = {}
for source in video_data.findall('closedCaptions/source'): for source in video_data.findall('closedCaptions/source'):
for track in source.findall('track'): for track in source.findall('track'):
track_url = track.get('url') track_url = url_or_none(track.get('url'))
if not isinstance(track_url, compat_str) or track_url.endswith('/big'): if not track_url or track_url.endswith('/big'):
continue continue
lang = track.get('lang') or track.get('label') or 'en' lang = track.get('lang') or track.get('label') or 'en'
subtitles.setdefault(lang, []).append({ subtitles.setdefault(lang, []).append({

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unescapeHTML, unescapeHTML,
url_or_none,
) )
@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
for stream in self._download_json(data_file, video_id): for stream in self._download_json(data_file, video_id):
if not isinstance(stream, dict): if not isinstance(stream, dict):
continue continue
stream_url = stream.get('url') stream_url = url_or_none(stream.get('url'))
if (stream_url in stream_urls or not stream_url or if stream_url in stream_urls or not stream_url:
not isinstance(stream_url, compat_str)):
continue continue
stream_urls.add(stream_url) stream_urls.add(stream_url)
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(

View File

@ -19,6 +19,7 @@ from ..utils import (
try_get, try_get,
unsmuggle_url, unsmuggle_url,
update_url_query, update_url_query,
url_or_none,
) )
@ -31,12 +32,12 @@ class TVPlayIE(InfoExtractor):
https?:// https?://
(?:www\.)? (?:www\.)?
(?: (?:
tvplay(?:\.skaties)?\.lv/parraides| tvplay(?:\.skaties)?\.lv(?:/parraides)?|
(?:tv3play|play\.tv3)\.lt/programos| (?:tv3play|play\.tv3)\.lt(?:/programos)?|
tv3play(?:\.tv3)?\.ee/sisu| tv3play(?:\.tv3)?\.ee/sisu|
(?:tv(?:3|6|8|10)play|viafree)\.se/program| (?:tv(?:3|6|8|10)play|viafree)\.se/program|
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer| (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
play\.novatv\.bg/programi play\.nova(?:tv)?\.bg/programi
) )
/(?:[^/]+/)+ /(?:[^/]+/)+
) )
@ -202,10 +203,18 @@ class TVPlayIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
'only_matching': True,
},
{ {
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
'only_matching': True, 'only_matching': True,
}, },
{
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
'only_matching': True,
},
{ {
# views is null # views is null
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183', 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
@ -255,7 +264,8 @@ class TVPlayIE(InfoExtractor):
quality = qualities(['hls', 'medium', 'high']) quality = qualities(['hls', 'medium', 'high'])
formats = [] formats = []
for format_id, video_url in streams.get('streams', {}).items(): for format_id, video_url in streams.get('streams', {}).items():
if not video_url or not isinstance(video_url, compat_str): video_url = url_or_none(video_url)
if not video_url:
continue continue
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'f4m': if ext == 'f4m':
@ -286,6 +296,7 @@ class TVPlayIE(InfoExtractor):
'url': m.group('url'), 'url': m.group('url'),
'app': m.group('app'), 'app': m.group('app'),
'play_path': m.group('playpath'), 'play_path': m.group('playpath'),
'preference': -1,
}) })
else: else:
fmt.update({ fmt.update({
@ -445,3 +456,102 @@ class ViafreeIE(InfoExtractor):
'skip_rtmp': True, 'skip_rtmp': True,
}), }),
ie=TVPlayIE.ie_key(), video_id=video_id) ie=TVPlayIE.ie_key(), video_id=video_id)
class TVPlayHomeIE(InfoExtractor):
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
'info_dict': {
'id': '366367',
'ext': 'mp4',
'title': 'Aferistai',
'description': 'Aferistai. Kalėdinė pasaka.',
'series': 'Aferistai [N-7]',
'season': '1 sezonas',
'season_number': 1,
'duration': 464,
'timestamp': 1394209658,
'upload_date': '20140307',
'age_limit': 18,
},
'params': {
'skip_download': True,
},
'add_ie': [TVPlayIE.ie_key()],
}, {
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
'only_matching': True,
}, {
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
default=None)
if video_id:
return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
m3u8_url = self._search_regex(
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'm3u8 url', group='url')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
title = self._search_regex(
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'title', default=None, group='value') or self._html_search_meta(
'title', webpage, default=None) or self._og_search_title(
webpage)
description = self._html_search_meta(
'description', webpage,
default=None) or self._og_search_description(webpage)
thumbnail = self._search_regex(
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'thumbnail', default=None, group='url') or self._html_search_meta(
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
webpage)
duration = int_or_none(self._search_regex(
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
fatal=False))
season = self._search_regex(
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'season', default=None, group='value')
season_number = int_or_none(self._search_regex(
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None))
episode = self._search_regex(
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
default=None, group='value')
episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'season': season,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'formats': formats,
}

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
import itertools import itertools
import re import re
import random import random
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError,
compat_kwargs, compat_kwargs,
compat_parse_qs, compat_parse_qs,
compat_str, compat_str,
@ -26,7 +26,7 @@ from ..utils import (
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
urlencode_postdata, url_or_none,
urljoin, urljoin,
) )
@ -36,8 +36,9 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv' _API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net' _USHER_BASE = 'https://usher.ttvnw.net'
_LOGIN_URL = 'https://www.twitch.tv/login' _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
_CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
_NETRC_MACHINE = 'twitch' _NETRC_MACHINE = 'twitch'
def _handle_error(self, response): def _handle_error(self, response):
@ -76,22 +77,21 @@ class TwitchBaseIE(InfoExtractor):
page_url = urlh.geturl() page_url = urlh.geturl()
post_url = self._search_regex( post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
'post url', default=page_url, group='url') 'post url', default=self._LOGIN_POST_URL, group='url')
post_url = urljoin(page_url, post_url) post_url = urljoin(page_url, post_url)
headers = {'Referer': page_url} headers = {
'Referer': page_url,
'Origin': page_url,
'Content-Type': 'text/plain;charset=UTF-8',
}
try: response = self._download_json(
response = self._download_json( post_url, None, note, data=json.dumps(form).encode(),
post_url, None, note, headers=headers, expected_status=400)
data=urlencode_postdata(form), error = response.get('error_description') or response.get('error_code')
headers=headers) if error:
except ExtractorError as e: fail(error)
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
response = self._parse_json(
e.cause.read().decode('utf-8'), None)
fail(response.get('message') or response['errors'][0])
raise
if 'Authenticated successfully' in response.get('message', ''): if 'Authenticated successfully' in response.get('message', ''):
return None, None return None, None
@ -104,7 +104,7 @@ class TwitchBaseIE(InfoExtractor):
headers=headers) headers=headers)
login_page, handle = self._download_webpage_handle( login_page, handle = self._download_webpage_handle(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_FORM_URL, None, 'Downloading login page')
# Some TOR nodes and public proxies are blocked completely # Some TOR nodes and public proxies are blocked completely
if 'blacklist_message' in login_page: if 'blacklist_message' in login_page:
@ -114,6 +114,7 @@ class TwitchBaseIE(InfoExtractor):
login_page, handle, 'Logging in', { login_page, handle, 'Logging in', {
'username': username, 'username': username,
'password': password, 'password': password,
'client_id': self._CLIENT_ID,
}) })
# Successful login # Successful login
@ -239,7 +240,7 @@ class TwitchVodIE(TwitchItemBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/| (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
player\.twitch\.tv/\?.*?\bvideo=v player\.twitch\.tv/\?.*?\bvideo=v
) )
(?P<id>\d+) (?P<id>\d+)
@ -295,6 +296,9 @@ class TwitchVodIE(TwitchItemBaseIE):
}, { }, {
'url': 'https://m.twitch.tv/beagsandjam/v/247478721', 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.twitch.tv/northernlion/video/291940395',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -663,8 +667,8 @@ class TwitchClipsIE(TwitchBaseIE):
for option in status['quality_options']: for option in status['quality_options']:
if not isinstance(option, dict): if not isinstance(option, dict):
continue continue
source = option.get('source') source = url_or_none(option.get('source'))
if not source or not isinstance(source, compat_str): if not source:
continue continue
formats.append({ formats.append({
'url': source, 'url': source,

View File

@ -20,6 +20,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
try_get, try_get,
unescapeHTML, unescapeHTML,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
if not isinstance(source_list, list): if not isinstance(source_list, list):
return return
for source in source_list: for source in source_list:
video_url = source.get('file') or source.get('src') video_url = url_or_none(source.get('file') or source.get('src'))
if not video_url or not isinstance(video_url, compat_str): if not video_url:
continue continue
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8': if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
continue continue
if track.get('kind') != 'captions': if track.get('kind') != 'captions':
continue continue
src = track.get('src') src = url_or_none(track.get('src'))
if not src or not isinstance(src, compat_str): if not src:
continue continue
lang = track.get('language') or track.get( lang = track.get('language') or track.get(
'srclang') or track.get('label') 'srclang') or track.get('label')
@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
for cc in captions: for cc in captions:
if not isinstance(cc, dict): if not isinstance(cc, dict):
continue continue
cc_url = cc.get('url') cc_url = url_or_none(cc.get('url'))
if not cc_url or not isinstance(cc_url, compat_str): if not cc_url:
continue continue
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str) lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
sub_dict = (automatic_captions if cc.get('source') == 'auto' sub_dict = (automatic_captions if cc.get('source') == 'auto'

View File

@ -3,15 +3,13 @@ from __future__ import unicode_literals
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_HTTPError
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
url_or_none,
) )
@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
formats = [] formats = []
for f in video.get('formats', []): for f in video.get('formats', []):
format_url = f.get('uri') format_url = url_or_none(f.get('uri'))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
continue continue
format_type = f.get('type') format_type = f.get('type')
if format_type == 'dash': if format_type == 'dash':

View File

@ -13,7 +13,7 @@ from ..utils import (
class VidziIE(InfoExtractor): class VidziIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://vidzi.tv/cghql9yq6emu.html', 'url': 'http://vidzi.tv/cghql9yq6emu.html',
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
@ -35,6 +35,9 @@ class VidziIE(InfoExtractor):
}, { }, {
'url': 'https://vidzi.si/rph9gztxj1et.html', 'url': 'https://vidzi.si/rph9gztxj1et.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://vidzi.nu/cghql9yq6emu.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
# We try to find out to which variable is assigned the config dic # We try to find out to which variable is assigned the config dic
m_variable_name = re.search(r'(\w)\.video\.id', webpage) m_variable_name = re.search(r'(\w)\.video\.id', webpage)
if m_variable_name is not None: if m_variable_name is not None:
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1)) config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
else: else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
config = self._search_regex(config_re, webpage, 'info section', config = self._search_regex(config_re, webpage, 'info section',
flags=re.DOTALL) flags=re.DOTALL)
config = json.loads(config) config = json.loads(config)

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,
url_or_none,
)
class ViqeoIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
viqeo:|
https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
)
(?P<id>[\da-f]+)
'''
_TESTS = [{
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
'md5': 'a169dd1a6426b350dca4296226f21e76',
'info_dict': {
'id': 'cde96f09d25f39bee837',
'ext': 'mp4',
'title': 'cde96f09d25f39bee837',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 76,
},
}, {
'url': 'viqeo:cde96f09d25f39bee837',
'only_matching': True,
}, {
'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
data = self._parse_json(
self._search_regex(
r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
video_id)
formats = []
thumbnails = []
for media_file in data['mediaFiles']:
if not isinstance(media_file, dict):
continue
media_url = url_or_none(media_file.get('url'))
if not media_url or not media_url.startswith(('http', '//')):
continue
media_type = str_or_none(media_file.get('type'))
if not media_type:
continue
media_kind = media_type.split('/')[0].lower()
f = {
'url': media_url,
'width': int_or_none(media_file.get('width')),
'height': int_or_none(media_file.get('height')),
}
format_id = str_or_none(media_file.get('quality'))
if media_kind == 'image':
f['id'] = format_id
thumbnails.append(f)
elif media_kind in ('video', 'audio'):
is_audio = media_kind == 'audio'
f.update({
'format_id': 'audio' if is_audio else format_id,
'fps': int_or_none(media_file.get('fps')),
'vcodec': 'none' if is_audio else None,
})
formats.append(f)
self._sort_formats(formats)
duration = int_or_none(data.get('duration'))
return {
'id': video_id,
'title': video_id,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
'skip': 'Geo-restricted to Hong Kong', 'skip': 'Geo-restricted to Hong Kong',
}] }]
_AREA_ID = {
'HK': 1,
'SG': 2,
'TH': 4,
'PH': 5,
}
def _real_extract(self, url): def _real_extract(self, url):
country_code, video_id = re.match(self._VALID_URL, url).groups() country_code, video_id = re.match(self._VALID_URL, url).groups()
query = {
'r': 'vod/ajax-detail',
'platform_flag_label': 'web',
'product_id': video_id,
}
area_id = self._AREA_ID.get(country_code.upper())
if area_id:
query['area_id'] = area_id
product_data = self._download_json( product_data = self._download_json(
'http://www.viu.com/ott/%s/index.php' % country_code, video_id, 'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
'Downloading video info', query={ 'Downloading video info', query=query)['data']
'r': 'vod/ajax-detail',
'platform_flag_label': 'web',
'product_id': video_id,
})['data']
video_data = product_data.get('current_product') video_data = product_data.get('current_product')
if not video_data: if not video_data:
@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code, 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
video_id, 'Downloading stream info', query={ video_id, 'Downloading stream info', query={
'ccs_product_id': video_data['ccs_product_id'], 'ccs_product_id': video_data['ccs_product_id'],
}, headers={
'Referer': url,
'Origin': re.search(r'https?://[^/]+', url).group(0),
})['data']['stream'] })['data']['stream']
stream_sizes = stream_data.get('size', {}) stream_sizes = stream_data.get('size', {})

View File

@ -17,9 +17,11 @@ from ..utils import (
int_or_none, int_or_none,
orderedSet, orderedSet,
remove_start, remove_start,
str_or_none,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
@ -105,10 +107,10 @@ class VKIE(VKBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня', 'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'uploader_id': '-77521',
'duration': 195, 'duration': 195,
'timestamp': 1329060660, 'timestamp': 1329049880,
'upload_date': '20120212', 'upload_date': '20120212',
'view_count': int,
}, },
}, },
{ {
@ -117,12 +119,12 @@ class VKIE(VKBaseIE):
'info_dict': { 'info_dict': {
'id': '165548505', 'id': '165548505',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Tom Cruise',
'title': 'No name', 'title': 'No name',
'uploader': 'Tom Cruise',
'uploader_id': '205387401',
'duration': 9, 'duration': 9,
'timestamp': 1374374880, 'timestamp': 1374364108,
'upload_date': '20130721', 'upload_date': '20130720',
'view_count': int,
} }
}, },
{ {
@ -206,10 +208,10 @@ class VKIE(VKBaseIE):
'id': 'V3K4mi0SYkc', 'id': 'V3K4mi0SYkc',
'ext': 'webm', 'ext': 'webm',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:d9903938abdc74c738af77f527ca0596', 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 178, 'duration': 179,
'upload_date': '20130116', 'upload_date': '20130116',
'uploader': "Children's Joy Foundation", 'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf', 'uploader_id': 'thecjf',
'view_count': int, 'view_count': int,
}, },
@ -221,6 +223,7 @@ class VKIE(VKBaseIE):
'id': 'k3lz2cmXyRuJQSjGHUv', 'id': 'k3lz2cmXyRuJQSjGHUv',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
# TODO: fix test by fixing dailymotion description extraction
'description': 'md5:c651358f03c56f1150b555c26d90a0fd', 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
'uploader': 'AniLibria.Tv', 'uploader': 'AniLibria.Tv',
'upload_date': '20160914', 'upload_date': '20160914',
@ -240,9 +243,12 @@ class VKIE(VKBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show', 'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля', 'uploader': 'THE WAY SHOW | 17 апреля',
'timestamp': 1454870100, 'uploader_id': '-110305615',
'timestamp': 1454859345,
'upload_date': '20160207', 'upload_date': '20160207',
'view_count': int, },
'params': {
'skip_download': True,
}, },
}, },
{ {
@ -295,7 +301,7 @@ class VKIE(VKBaseIE):
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
if video_id: if video_id:
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
# Some videos (removed?) can only be downloaded with list id specified # Some videos (removed?) can only be downloaded with list id specified
list_id = mobj.group('list_id') list_id = mobj.group('list_id')
if list_id: if list_id:
@ -345,6 +351,9 @@ class VKIE(VKBaseIE):
r'<!>This video is no longer available, because its author has been blocked.': r'<!>This video is no longer available, because its author has been blocked.':
'Video %s is no longer available, because its author has been blocked.', 'Video %s is no longer available, because its author has been blocked.',
r'<!>This video is no longer available, because it has been deleted.':
'Video %s is no longer available, because it has been deleted.',
} }
for error_re, error_msg in ERRORS.items(): for error_re, error_msg in ERRORS.items():
@ -393,7 +402,8 @@ class VKIE(VKBaseIE):
if not data: if not data:
data = self._parse_json( data = self._parse_json(
self._search_regex( self._search_regex(
r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'), [r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
info_page, 'json', default='{}'),
video_id) video_id)
if data: if data:
data = data['player']['params'][0] data = data['player']['params'][0]
@ -415,7 +425,7 @@ class VKIE(VKBaseIE):
timestamp = unified_timestamp(self._html_search_regex( timestamp = unified_timestamp(self._html_search_regex(
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page, r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
'upload date', fatal=False)) 'upload date', default=None)) or int_or_none(data.get('date'))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
@ -423,7 +433,8 @@ class VKIE(VKBaseIE):
formats = [] formats = []
for format_id, format_url in data.items(): for format_id, format_url in data.items():
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
continue continue
if (format_id.startswith(('url', 'cache')) or if (format_id.startswith(('url', 'cache')) or
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')): format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
@ -452,9 +463,12 @@ class VKIE(VKBaseIE):
'title': title, 'title': title,
'thumbnail': data.get('jpg'), 'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'), 'uploader': data.get('md_author'),
'uploader_id': str_or_none(data.get('author_id')),
'duration': data.get('duration'), 'duration': data.get('duration'),
'timestamp': timestamp, 'timestamp': timestamp,
'view_count': view_count, 'view_count': view_count,
'like_count': int_or_none(data.get('liked')),
'dislike_count': int_or_none(data.get('nolikes')),
'is_live': is_live, 'is_live': is_live,
} }

View File

@ -10,6 +10,7 @@ from ..utils import (
js_to_json, js_to_json,
strip_or_none, strip_or_none,
try_get, try_get,
unescapeHTML,
unified_timestamp, unified_timestamp,
) )
@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
source = (self._parse_json( player_config = self._parse_json(
self._search_regex( self._search_regex(
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
default='{}'), 'player config', default='{}', group='data'),
video_id, transform_source=js_to_json, video_id, transform_source=unescapeHTML, fatal=False)
fatal=False) or {}).get('source') or {}
if not player_config:
player_config = self._parse_json(
self._search_regex(
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
default='{}'),
video_id, transform_source=js_to_json, fatal=False) or {}
source = player_config.get('source') or {}
video_id = compat_str(source.get('videoId') or video_id) video_id = compat_str(source.get('videoId') or video_id)

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
orderedSet,
)
class WebOfStoriesIE(InfoExtractor): class WebOfStoriesIE(InfoExtractor):
@ -133,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
entries = [ entries = [
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories') self.url_result(
for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage)) 'http://www.webofstories.com/play/%s' % video_id,
'WebOfStories', video_id=video_id)
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
] ]
title = self._search_regex( title = self._search_regex(

View File

@ -23,7 +23,7 @@ class XFileShareIE(InfoExtractor):
(r'powerwatch\.pw', 'PowerWatch'), (r'powerwatch\.pw', 'PowerWatch'),
(r'rapidvideo\.ws', 'Rapidvideo.ws'), (r'rapidvideo\.ws', 'Rapidvideo.ws'),
(r'thevideobee\.to', 'TheVideoBee'), (r'thevideobee\.to', 'TheVideoBee'),
(r'vidto\.me', 'Vidto'), (r'vidto\.(?:me|se)', 'Vidto'),
(r'streamin\.to', 'Streamin.To'), (r'streamin\.to', 'Streamin.To'),
(r'xvidstage\.com', 'XVIDSTAGE'), (r'xvidstage\.com', 'XVIDSTAGE'),
(r'vidabc\.com', 'Vid ABC'), (r'vidabc\.com', 'Vid ABC'),
@ -115,7 +115,10 @@ class XFileShareIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html', 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
'only_matching': True 'only_matching': True,
}, {
'url': 'http://vidto.se/1tx1pf6t12cg.html',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -13,6 +13,7 @@ from ..utils import (
parse_duration, parse_duration,
try_get, try_get,
unified_strdate, unified_strdate,
url_or_none,
) )
@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
else: else:
format_url = format_item format_url = format_item
filesize = None filesize = None
if not isinstance(format_url, compat_str): format_url = url_or_none(format_url)
if not format_url:
continue continue
formats.append({ formats.append({
'format_id': '%s-%s' % (format_id, quality), 'format_id': '%s-%s' % (format_id, quality),
@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
default='{}'), default='{}'),
video_id, fatal=False) video_id, fatal=False)
for format_id, format_url in sources.items(): for format_id, format_url in sources.items():
if not isinstance(format_url, compat_str): format_url = url_or_none(format_url)
if not format_url:
continue continue
if format_url in format_urls: if format_url in format_urls:
continue continue

View File

@ -4,12 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
qualities, qualities,
unescapeHTML, unescapeHTML,
url_or_none,
) )
@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
formats = [] formats = []
for format_id in QUALITIES: for format_id in QUALITIES:
is_hd = format_id == 'hd' is_hd = format_id == 'hd'
format_url = playlist.get( format_url = url_or_none(playlist.get(
'file%s' % ('_hd' if is_hd else '')) 'file%s' % ('_hd' if is_hd else '')))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
continue continue
formats.append({ formats.append({
'url': format_url, 'url': format_url,

View File

@ -3,11 +3,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_duration, parse_duration,
url_or_none,
) )
@ -50,8 +50,8 @@ class YouJizzIE(InfoExtractor):
for encoding in encodings: for encoding in encodings:
if not isinstance(encoding, dict): if not isinstance(encoding, dict):
continue continue
format_url = encoding.get('filename') format_url = url_or_none(encoding.get('filename'))
if not isinstance(format_url, compat_str): if not format_url:
continue continue
if determine_ext(format_url) == 'm3u8': if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(

View File

@ -3,13 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
url_or_none,
) )
from ..aes import aes_decrypt_text from ..aes import aes_decrypt_text
@ -88,8 +88,8 @@ class YouPornIE(InfoExtractor):
for definition in definitions: for definition in definitions:
if not isinstance(definition, dict): if not isinstance(definition, dict):
continue continue
video_url = definition.get('videoUrl') video_url = url_or_none(definition.get('videoUrl'))
if isinstance(video_url, compat_str) and video_url: if video_url:
links.append(video_url) links.append(video_url)
# Fallback #1, this also contains extra low quality 180p format # Fallback #1, this also contains extra low quality 180p format

View File

@ -0,0 +1,41 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import urljoin
class YourPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
_TEST = {
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
'info_dict': {
'id': '57ffcb2e1179b',
'ext': 'mp4',
'title': 'md5:c9f43630bd968267672651ba905a7d35',
'thumbnail': r're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = urljoin(url, self._parse_json(
self._search_regex(
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
group='data'),
video_id)[video_id])
title = (self._search_regex(
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
default=None) or self._og_search_description(webpage)).strip()
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
}

View File

@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}' _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
def _set_language(self): def _set_language(self):
self._set_cookie( self._set_cookie(
@ -2123,7 +2123,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
) )
( (
(?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,} (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots # Top tracks, they can also include dots
|(?:MC)[\w\.]* |(?:MC)[\w\.]*
) )
@ -2261,6 +2261,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
}, { }, {
'url': 'TLGGrESM50VT6acwMjAyMjAxNw', 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
'only_matching': True, 'only_matching': True,
}, {
# music album playlist
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
'only_matching': True,
}] }]
def _real_initialize(self): def _real_initialize(self):

View File

@ -13,6 +13,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
try_get, try_get,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -150,8 +151,8 @@ class ZattooBaseIE(InfoExtractor):
for watch in watch_urls: for watch in watch_urls:
if not isinstance(watch, dict): if not isinstance(watch, dict):
continue continue
watch_url = watch.get('url') watch_url = url_or_none(watch.get('url'))
if not watch_url or not isinstance(watch_url, compat_str): if not watch_url:
continue continue
format_id_list = [stream_type] format_id_list = [stream_type]
maxrate = watch.get('maxrate') maxrate = watch.get('maxrate')

View File

@ -15,6 +15,7 @@ from ..utils import (
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_or_none,
urljoin, urljoin,
) )
@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE):
def _extract_subtitles(src): def _extract_subtitles(src):
subtitles = {} subtitles = {}
for caption in try_get(src, lambda x: x['captions'], list) or []: for caption in try_get(src, lambda x: x['captions'], list) or []:
subtitle_url = caption.get('uri') subtitle_url = url_or_none(caption.get('uri'))
if subtitle_url and isinstance(subtitle_url, compat_str): if subtitle_url:
lang = caption.get('language', 'deu') lang = caption.get('language', 'deu')
subtitles.setdefault(lang, []).append({ subtitles.setdefault(lang, []).append({
'url': subtitle_url, 'url': subtitle_url,
@ -76,8 +77,8 @@ class ZDFIE(ZDFBaseIE):
return subtitles return subtitles
def _extract_format(self, video_id, formats, format_urls, meta): def _extract_format(self, video_id, formats, format_urls, meta):
format_url = meta.get('url') format_url = url_or_none(meta.get('url'))
if not format_url or not isinstance(format_url, compat_str): if not format_url:
return return
if format_url in format_urls: if format_url in format_urls:
return return
@ -152,7 +153,8 @@ class ZDFIE(ZDFBaseIE):
content, lambda x: x['teaserImageRef']['layouts'], dict) content, lambda x: x['teaserImageRef']['layouts'], dict)
if layouts: if layouts:
for layout_key, layout_url in layouts.items(): for layout_key, layout_url in layouts.items():
if not isinstance(layout_url, compat_str): layout_url = url_or_none(layout_url)
if not layout_url:
continue continue
thumbnail = { thumbnail = {
'url': layout_url, 'url': layout_url,

View File

@ -49,7 +49,6 @@ from .compat import (
compat_os_name, compat_os_name,
compat_parse_qs, compat_parse_qs,
compat_shlex_quote, compat_shlex_quote,
compat_socket_create_connection,
compat_str, compat_str,
compat_struct_pack, compat_struct_pack,
compat_struct_unpack, compat_struct_unpack,
@ -82,7 +81,7 @@ def register_socks_protocols():
compiled_regex_type = type(re.compile('')) compiled_regex_type = type(re.compile(''))
std_headers = { std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate', 'Accept-Encoding': 'gzip, deflate',
@ -882,13 +881,51 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
kwargs['strict'] = True kwargs['strict'] = True
hc = http_class(*args, **compat_kwargs(kwargs)) hc = http_class(*args, **compat_kwargs(kwargs))
source_address = ydl_handler._params.get('source_address') source_address = ydl_handler._params.get('source_address')
if source_address is not None: if source_address is not None:
# This is to workaround _create_connection() from socket where it will try all
# address data from getaddrinfo() including IPv6. This filters the result from
# getaddrinfo() based on the source_address value.
# This is based on the cpython socket.create_connection() function.
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
host, port = address
err = None
addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
ip_addrs = [addr for addr in addrs if addr[0] == af]
if addrs and not ip_addrs:
ip_version = 'v4' if af == socket.AF_INET else 'v6'
raise socket.error(
"No remote IP%s addresses available for connect, can't use '%s' as source address"
% (ip_version, source_address[0]))
for res in ip_addrs:
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
sock.bind(source_address)
sock.connect(sa)
err = None # Explicitly break reference cycle
return sock
except socket.error as _:
err = _
if sock is not None:
sock.close()
if err is not None:
raise err
else:
raise socket.error('getaddrinfo returns an empty list')
if hasattr(hc, '_create_connection'):
hc._create_connection = _create_connection
sa = (source_address, 0) sa = (source_address, 0)
if hasattr(hc, 'source_address'): # Python 2.7+ if hasattr(hc, 'source_address'): # Python 2.7+
hc.source_address = sa hc.source_address = sa
else: # Python 2.6 else: # Python 2.6
def _hc_connect(self, *args, **kwargs): def _hc_connect(self, *args, **kwargs):
sock = compat_socket_create_connection( sock = _create_connection(
(self.host, self.port), self.timeout, sa) (self.host, self.port), self.timeout, sa)
if is_https: if is_https:
self.sock = ssl.wrap_socket( self.sock = ssl.wrap_socket(
@ -1866,6 +1903,13 @@ def strip_or_none(v):
return None if v is None else v.strip() return None if v is None else v.strip()
def url_or_none(url):
if not url or not isinstance(url, compat_str):
return None
url = url.strip()
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
def parse_duration(s): def parse_duration(s):
if not isinstance(s, compat_basestring): if not isinstance(s, compat_basestring):
return None return None
@ -2282,7 +2326,7 @@ def parse_age_limit(s):
def strip_jsonp(code): def strip_jsonp(code):
return re.sub( return re.sub(
r'''(?sx)^ r'''(?sx)^
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+) (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
(?:\s*&&\s*(?P=func_name))? (?:\s*&&\s*(?P=func_name))?
\s*\(\s*(?P<callback_data>.*)\);? \s*\(\s*(?P<callback_data>.*)\);?
\s*?(?://[^\n]*)*$''', \s*?(?://[^\n]*)*$''',
@ -3562,7 +3606,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
setattr(self, '%s_open' % type, setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type)) meth(r, proxy, type))
return compat_urllib_request.ProxyHandler.__init__(self, proxies) compat_urllib_request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type): def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy') req_proxy = req.headers.get('Ytdl-request-proxy')

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.07.10' __version__ = '2018.08.28'