1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2025-01-06 05:07:54 +01:00

Merge branch 'master' into fix-zing-mp3

This commit is contained in:
Thinh Nguyen 2018-08-28 19:08:27 -04:00
commit ac4fdaa75a
No known key found for this signature in database
GPG Key ID: C565BDA2570826DE
99 changed files with 2158 additions and 713 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.10**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.08.28*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.08.28**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.07.10
[debug] youtube-dl version 2018.08.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

3
.gitignore vendored
View File

@ -48,3 +48,6 @@ youtube-dl.zsh
tmp/
venv/
# VS Code related files
.vscode

View File

@ -239,3 +239,10 @@ Martin Weinelt
Surya Oktafendri
TingPing
Alexandre Macabies
Bastian de Groot
Niklas Haas
András Veres-Szentkirályi
Enes Solak
Nathan Rossi
Thomas van der Berg
Luca Cherubin

View File

@ -1,3 +1,94 @@
version 2018.08.28
Extractors
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
(#17361)
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
* [webofstories:playlist] Fix extraction (#16914)
+ [tvplayhome] Add support for new tvplay URLs (#17344)
+ [generic] Allow relative src for videojs embeds (#17324)
+ [xfileshare] Add support for vidto.se (#17317)
+ [vidzi] Add support for vidzi.nu (#17316)
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
version 2018.08.22
Core
* [utils] Use pure browser header for User-Agent (#17236)
Extractors
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
+ [yourporn] Add support for yourporn.sexy (#17298)
+ [go] Add support for disneynow.go.com (#16299, #17264)
+ [6play] Add support for play.rtl.hr (#17249)
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
(#16788, #17254)
* [lci] Fix extraction (#17274)
* [bbccouk] Extend id URL regular expression (#17270)
* [cwtv] Fix extraction (#17256)
* [nova] Fix extraction (#17241)
+ [generic] Add support for expressen embeds
* [raywenderlich] Adapt to site redesign (#17225)
+ [redbulltv] Add support redbull.com tv URLs (#17218)
+ [bitchute] Add support for bitchute.com (#14052)
+ [clyp] Add support for token protected media (#17184)
* [imdb] Fix extension extraction (#17167)
version 2018.08.04
Extractors
* [funk:channel] Improve byChannelAlias extraction (#17142)
* [twitch] Fix authentication (#17024, #17126)
* [twitch:vod] Improve URL regular expression (#17135)
* [watchbox] Fix extraction (#17107)
* [pbs] Fix extraction (#17109)
* [theplatform] Relax URL regular expression (#16181, #17097)
+ [viqeo] Add support for viqeo.tv (#17066)
version 2018.07.29
Extractors
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
+ [pornhub] Add support for subtitles (#16924, #17088)
* [ceskatelevize] Use https for API call (#16997, #16999)
* [dailymotion:playlist] Fix extraction (#16894)
* [ted] Improve extraction
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
* [telecinco] Fix extraction (#17080)
* [mitele] Reduce number of requests
* [rai] Return non HTTP relinker URL intact (#17055)
* [vk] Fix extraction for inline only videos (#16923)
* [streamcloud] Fix extraction (#17054)
* [facebook] Fix tahoe player extraction with authentication (#16655)
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
version 2018.07.21
Core
+ [utils] Introduce url_or_none
* [utils] Allow JSONP without function name (#17028)
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
Extractors
+ [bbc] Add support for BBC Radio Play pages (#17022)
* [iwara] Fix download URLs (#17026)
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
+ [viu] Pass Referer and Origin headers and area id (#16992)
+ [vimeo] Add another config regular expression (#17013)
+ [facebook] Extract view count (#16942)
* [dailymotion] Improve description extraction (#16984)
* [slutload] Fix and improve extraction (#17001)
* [mediaset] Fix extraction (#16977)
+ [theplatform] Add support for theplatform TLD customization (#16977)
* [imgur] Relax URL regular expression (#16987)
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
#16959)
version 2018.07.10
Core

View File

@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.

View File

@ -108,6 +108,8 @@
- **BiliBili**
- **BioBioChileTV**
- **BIQLE**
- **BitChute**
- **BitChuteChannel**
- **BleacherReport**
- **BleacherReportCMS**
- **blinkx**
@ -405,6 +407,7 @@
- **Ketnet**
- **KhanAcademy**
- **KickStarter**
- **KinoPoisk**
- **KonserthusetPlay**
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
@ -577,6 +580,7 @@
- **Normalboots**
- **NosVideo**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **NovaEmbed**
- **nowness**
- **nowness:playlist**
- **nowness:series**
@ -672,6 +676,8 @@
- **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
- **puhutv:serie**
- **Puls4**
- **Pyvideo**
- **qqmusic**: QQ音乐
@ -694,6 +700,7 @@
- **RaiPlayLive**
- **RaiPlayPlaylist**
- **RayWenderlich**
- **RayWenderlichCourse**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
@ -910,6 +917,7 @@
- **tvp:embed**: Telewizja Polska
- **tvp:series**
- **TVPlayer**
- **TVPlayHome**
- **Tweakers**
- **twitch:chapter**
- **twitch:clips**
@ -999,6 +1007,7 @@
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **Viqeo**
- **Viu**
- **viu:ott**
- **viu:playlist**
@ -1090,6 +1099,7 @@
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourPorn**
- **YourUpload**
- **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels

View File

@ -78,6 +78,7 @@ from youtube_dl.utils import (
uppercase_escape,
lowercase_escape,
url_basename,
url_or_none,
base_url,
urljoin,
urlencode_postdata,
@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
def test_url_or_none(self):
self.assertEqual(url_or_none(None), None)
self.assertEqual(url_or_none(''), None)
self.assertEqual(url_or_none('foo'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
self.assertEqual(url_or_none('http$://foo.de'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None)
@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
strip_or_none,
url_or_none,
)
@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
if not video_id:
entries = []
for episode in video_data.get('archiveEpisodes', []):
episode_url = episode.get('url')
episode_url = url_or_none(episode.get('url'))
if not episode_url:
continue
entries.append(self.url_result(

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
url_or_none,
urlencode_postdata,
xpath_text,
)
@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
file_elements = video_element.findall(compat_xpath('./file'))
one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1):
file_url = file_element.text
file_url = url_or_none(file_element.text)
if not file_url:
continue
key = file_element.get('key', '')

View File

@ -3,11 +3,12 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
mimetype2ext,
determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
parse_iso8601,
url_or_none,
)
@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data.get('@attributes', {})
thumbnail_url = thumbnail.get('url')
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data.get('@attributes', {})
subtitle_href = subtitle.get('href')
subtitle_href = url_or_none(subtitle.get('href'))
if not subtitle_href:
continue
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
media_content = [media_content]
for media_data in media_content:
media = media_data.get('@attributes', {})
media_url = media.get('url')
media_url = url_or_none(media.get('url'))
if not media_url:
continue
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
else:
formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
'url': media['url'],
'url': media_url,
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
'ext': ext,

View File

@ -8,6 +8,7 @@ from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
url_or_none,
urlencode_postdata,
urljoin,
)
@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
}, fatal=False)
if not playlist:
continue
stream_url = playlist.get('streamurl')
stream_url = url_or_none(playlist.get('streamurl'))
if stream_url:
rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',

View File

@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
_TESTS = [{
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
'info_dict': {
'id': '4465496',
'ext': 'mp4',
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
'duration': 22,
'timestamp': 1534855680,
'upload_date': '20180821',
'uploader': 'ANV',
},
'params': {
'skip_download': True,
},
}, {
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
'only_matching': True,
}]
def __init__(self, *args, **kwargs):
super(AnvatoIE, self).__init__(*args, **kwargs)
self.__server_time = None
@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
'api': {
'anvrid': anvrid,
'anvstk': md5_text('%s|%s|%d|%s' % (
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
access_key, anvrid, server_time,
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
'anvts': server_time,
},
}
@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
access_key) or access_key
return self._get_anvato_videos(access_key, video_id)

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
)
@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
for rendition in video_data.get('renditions', []):
video_url = rendition.get('url')
video_url = url_or_none(rendition.get('url'))
if not video_url:
continue
ext = rendition.get('format')

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
js_to_json,
url_or_none,
)
@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
for source in sources:
if not isinstance(source, dict):
continue
source_url = source.get('file')
if not source_url or not isinstance(source_url, compat_str):
source_url = url_or_none(source.get('file'))
if not source_url:
continue
ext = determine_ext(source_url)
if ext == 'm3u8':

View File

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
mimetype2ext,
url_or_none,
)
@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
formats = []
for item in file_list[0]:
file_url = item.get('file')
file_url = url_or_none(item.get('file'))
if not file_url:
continue
ext = mimetype2ext(item.get('type'))

View File

@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from .generic import GenericIE
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
@ -15,6 +14,7 @@ from ..utils import (
unified_strdate,
xpath_text,
update_url_query,
url_or_none,
)
from ..compat import compat_etree_fromstring
@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
quality = stream.get('_quality')
server = stream.get('_server')
for stream_url in stream_urls:
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
if not url_or_none(stream_url):
continue
ext = determine_ext(stream_url)
if quality != 'auto' and ext in ('f4m', 'm3u8'):

View File

@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML,
update_url_query,
unified_strdate,
url_or_none,
)
@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
fatal=False)
if not stat:
continue
retry_url = stat.get('retry_url')
if not isinstance(retry_url, compat_str):
retry_url = url_or_none(stat.get('retry_url'))
if not retry_url:
continue
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
formats = []
for format_id, format_url in show['audio_stream'].items():
if not isinstance(format_url, compat_str):
if not url_or_none(format_url):
continue
for known_ext in KNOWN_EXTENSIONS:
if known_ext in format_id:

View File

@ -29,7 +29,7 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pbw][\da-z]{7}'
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
@ -236,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
}, {
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@ -778,6 +784,17 @@ class BBCIE(BBCCoUkIE):
'params': {
'skip_download': True,
}
}, {
# window.__PRELOADED_STATE__
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
'info_dict': {
'id': 'b0b9z4vz',
'ext': 'mp4',
'title': 'Prom 6: An American in Paris and Turangalila',
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
}]
@classmethod
@ -1000,6 +1017,36 @@ class BBCIE(BBCCoUkIE):
'subtitles': subtitles,
}
preload_state = self._parse_json(self._search_regex(
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if preload_state:
current_programme = preload_state.get('programmes', {}).get('current') or {}
programme_id = current_programme.get('id')
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
self._sort_formats(formats)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
current_programme.get('duration', {}).get('value'))
thumbnail = None
image_url = current_programme.get('image_url')
if image_url:
thumbnail = image_url.replace('{recipe}', '1920x1920')
return {
'id': programme_id,
'title': title,
'description': dict_get(synopses, ('long', 'medium', 'short')),
'thumbnail': thumbnail,
'duration': duration,
'uploader': network.get('short_title'),
'uploader_id': network.get('id'),
'formats': formats,
'subtitles': subtitles,
}
bbc3_config = self._parse_json(
self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,

View File

@ -0,0 +1,120 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
from ..utils import urlencode_postdata
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
'info_dict': {
'id': 'szoMrox2JEI',
'ext': 'mp4',
'title': 'Fuck bitches get money',
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Victoria X Rave',
},
}, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
'only_matching': True,
}, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
})
title = self._search_regex(
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
webpage, 'title', default=None) or self._html_search_meta(
'description', webpage, 'title',
default=None) or self._og_search_description(webpage)
formats = [
{'url': mobj.group('url')}
for mobj in re.finditer(
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
self._sort_formats(formats)
description = self._html_search_regex(
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex(
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
'uploader', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': formats,
}
class BitChuteChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
'playlist_mincount': 185,
'info_dict': {
'id': 'victoriaxrave',
},
}
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
def _entries(self, channel_id):
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
offset = 0
for page_num in itertools.count(1):
data = self._download_json(
'%sextend/' % channel_url, channel_id,
'Downloading channel page %d' % page_num,
data=urlencode_postdata({
'csrfmiddlewaretoken': self._TOKEN,
'name': '',
'offset': offset,
}), headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': channel_url,
'X-Requested-With': 'XMLHttpRequest',
'Cookie': 'csrftoken=%s' % self._TOKEN,
})
if data.get('success') is False:
break
html = data.get('html')
if not html:
break
video_ids = re.findall(
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
html)
if not video_ids:
break
offset += len(video_ids)
for video_id in video_ids:
yield self.url_result(
'https://www.bitchute.com/video/%s' % video_id,
ie=BitChuteIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
channel_id = self._match_id(url)
return self.playlist_result(
self._entries(channel_id), playlist_id=channel_id)

View File

@ -4,8 +4,10 @@ import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_str
from ..utils import int_or_none
from ..utils import (
int_or_none,
url_or_none,
)
class BreakIE(InfoExtractor):
@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
formats = []
for video in content:
video_url = video.get('url')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(video.get('url'))
if not video_url:
continue
bitrate = int_or_none(self._search_regex(
r'(\d+)_kbps', video_url, 'tbr', default=None))

View File

@ -2,10 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
)
@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
for media in encodings:
if not isinstance(media, dict):
continue
media_url = media.get('location')
if not media_url or not isinstance(media_url, compat_str):
media_url = url_or_none(media.get('location'))
if not media_url:
continue
format_id_list = [format_id]

View File

@ -11,6 +11,7 @@ from ..utils import (
strip_or_none,
float_or_none,
int_or_none,
merge_dicts,
parse_iso8601,
)
@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
webpage, urlh = self._download_webpage_handle(url, display_id)
title = self._html_search_regex(
info = self._search_json_ld(webpage, display_id, default={})
# title is optional here since it may be extracted by extractor
# that is delegated from here
title = strip_or_none(self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip()
webpage, 'title', default=None))
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
# the first one
video_id = list(video.values())[0].get('videoid')
return {
return merge_dicts(info, {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}
})

View File

@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
parse_duration,
parse_iso8601,
parse_resolution,
url_or_none,
)
@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
media_url = media['media']['url']
if isinstance(media_url, list):
for format_ in media_url:
format_url = format_.get('file')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(format_.get('file'))
if not format_url:
continue
label = format_.get('label')
f = parse_resolution(label)

View File

@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')

View File

@ -1,15 +1,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
float_or_none,
parse_iso8601,
unified_timestamp,
)
class ClypIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
_TEST = {
_TESTS = [{
'url': 'https://clyp.it/ojz2wfah',
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
'info_dict': {
@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
'timestamp': 1443515251,
'upload_date': '20150929',
},
}
}, {
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
'info_dict': {
'id': 'b04p1odi',
'ext': 'mp3',
'title': 'GJ! (Reward Edit)',
'description': 'Metal Resistance (THE ONE edition)',
'duration': 177.789,
'timestamp': 1528241278,
'upload_date': '20180605',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
token = qs.get('token', [None])[0]
query = {}
if token:
query['token'] = token
metadata = self._download_json(
'https://api.clyp.it/%s' % audio_id, audio_id)
'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
formats = []
for secure in ('', 'Secure'):
@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
title = metadata['Title']
description = metadata.get('Description')
duration = float_or_none(metadata.get('Duration'))
timestamp = parse_iso8601(metadata.get('DateCreated'))
timestamp = unified_timestamp(metadata.get('DateCreated'))
return {
'id': audio_id,

View File

@ -1859,9 +1859,7 @@ class InfoExtractor(object):
'height': height,
})
formats.extend(m3u8_formats)
continue
if src_ext == 'f4m':
elif src_ext == 'f4m':
f4m_url = src_url
if not f4m_params:
f4m_params = {
@ -1871,9 +1869,13 @@ class InfoExtractor(object):
f4m_url += '&' if '?' in f4m_url else '?'
f4m_url += compat_urllib_parse_urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
continue
if src_url.startswith('http') and self._is_valid_url(src, video_id):
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src_url, video_id, mpd_id='dash', fatal=False))
elif re.search(r'\.ism/[Mm]anifest', src_url):
formats.extend(self._extract_ism_formats(
src_url, video_id, ism_id='mss', fatal=False))
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1
formats.append({
'url': src_url,
@ -1884,7 +1886,6 @@ class InfoExtractor(object):
'width': width,
'height': height,
})
continue
return formats

View File

@ -4,16 +4,14 @@ from __future__ import unicode_literals, division
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_HTTPError,
)
from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
parse_age_limit,
parse_duration,
url_or_none,
ExtractorError
)
@ -86,8 +84,8 @@ class CrackleIE(InfoExtractor):
for e in media['MediaURLs']:
if e.get('UseDRM') is True:
continue
format_url = e.get('Path')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(e.get('Path'))
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
@ -124,8 +122,8 @@ class CrackleIE(InfoExtractor):
for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue
cc_url = cc_file.get('Path')
if not cc_url or not isinstance(cc_url, compat_str):
cc_url = url_or_none(cc_file.get('Path'))
if not cc_url:
continue
lang = cc_file.get('Locale') or 'en'
subtitles.setdefault(lang, []).append({'url': cc_url})

View File

@ -262,6 +262,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# Just test metadata extraction
'skip_download': True,
},
}, {
'url': 'http://www.crunchyroll.com/media-723735',
'only_matching': True,
}]
_FORMAT_IDS = {
@ -580,7 +583,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:playlist'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_age_limit,
parse_iso8601,
smuggle_url,
str_or_none,
)
@ -40,10 +43,15 @@ class CWTVIE(InfoExtractor):
'duration': 1263,
'series': 'Whose Line Is It Anyway?',
'season_number': 11,
'season': '11',
'episode_number': 20,
'upload_date': '20151006',
'timestamp': 1444107300,
'age_limit': 14,
'uploader': 'CWTV',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
@ -58,60 +66,28 @@ class CWTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = None
formats = []
for partner in (154, 213):
vdata = self._download_json(
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
if not vdata:
continue
video_data = vdata
for quality, quality_data in vdata.get('videos', {}).items():
quality_url = quality_data.get('uri')
if not quality_url:
continue
if quality == 'variantplaylist':
formats.extend(self._extract_m3u8_formats(
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
tbr = int_or_none(quality_data.get('bitrate'))
format_id = 'http' + ('-%d' % tbr if tbr else '')
if self._is_valid_url(quality_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': quality_url,
'tbr': tbr,
})
video_metadata = video_data['assetFields']
ism_url = video_metadata.get('smoothStreamingUrl')
if ism_url:
formats.extend(self._extract_ism_formats(
ism_url, video_id, ism_id='mss', fatal=False))
self._sort_formats(formats)
video_data = self._download_json(
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
video_id)['video']
title = video_data['title']
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
thumbnails = [{
'url': image['uri'],
'width': image.get('width'),
'height': image.get('height'),
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
subtitles = {
'en': [{
'url': video_metadata['UnicornCcUrl'],
}],
} if video_metadata.get('UnicornCcUrl') else None
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))
if episode and season:
episode = episode.lstrip(season)
return {
'_type': 'url_transparent',
'id': video_id,
'title': video_metadata['title'],
'description': video_metadata.get('description'),
'duration': int_or_none(video_metadata.get('duration')),
'series': video_metadata.get('seriesName'),
'season_number': int_or_none(video_metadata.get('seasonNumber')),
'season': video_metadata.get('seasonName'),
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
'timestamp': parse_iso8601(video_data.get('startTime')),
'thumbnails': thumbnails,
'formats': formats,
'subtitles': subtitles,
'title': title,
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
'description': video_data.get('description_long'),
'duration': int_or_none(video_data.get('duration_secs')),
'series': video_data.get('series_name'),
'season_number': int_or_none(season),
'episode_number': int_or_none(episode),
'timestamp': parse_iso8601(video_data.get('start_time')),
'age_limit': parse_age_limit(video_data.get('rating')),
'ie_key': 'ThePlatform',
}

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals
import base64
import functools
import hashlib
import itertools
import json
@ -16,11 +17,13 @@ from ..utils import (
error_to_compat_str,
ExtractorError,
int_or_none,
mimetype2ext,
OnDemandPagedList,
parse_iso8601,
sanitized_Request,
str_to_int,
unescapeHTML,
mimetype2ext,
urlencode_postdata,
)
@ -144,7 +147,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
age_limit = self._rta_search(webpage)
description = self._og_search_description(webpage) or self._html_search_meta(
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'description', webpage, 'description')
view_count_str = self._search_regex(
@ -342,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
'id': 'xv4bw_nqtv_sport',
'id': 'xv4bw',
},
'playlist_mincount': 20,
}]
_PAGE_SIZE = 100
def _fetch_page(self, playlist_id, authorizaion, page):
page += 1
videos = self._download_json(
'https://graphql.api.dailymotion.com',
playlist_id, 'Downloading page %d' % page,
data=json.dumps({
'query': '''{
collection(xid: "%s") {
videos(first: %d, page: %d) {
pageInfo {
hasNextPage
nextPage
}
edges {
node {
xid
url
}
}
}
}
}''' % (playlist_id, self._PAGE_SIZE, page)
}).encode(), headers={
'Authorization': authorizaion,
'Origin': 'https://www.dailymotion.com',
})['data']['collection']['videos']
for edge in videos['edges']:
node = edge['node']
yield self.url_result(
node['url'], DailymotionIE.ie_key(), node['xid'])
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
api = self._parse_json(self._search_regex(
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
webpage, 'player config'), playlist_id)['context']['api']
auth = self._download_json(
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
playlist_id, data=urlencode_postdata({
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
'grant_type': 'client_credentials',
}))
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
entries = OnDemandPagedList(functools.partial(
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
return self.playlist_result(
entries, playlist_id,
self._og_search_title(webpage))
class DailymotionUserIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
'title': 'UnderProject',
},
'playlist_mincount': 1800,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time',
}]
def _extract_entries(self, id):
video_ids = set()
@ -378,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
'title': 'UnderProject',
},
'playlist_mincount': 1800,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')

View File

@ -7,6 +7,7 @@ from ..utils import (
float_or_none,
int_or_none,
unified_timestamp,
url_or_none,
)
@ -69,7 +70,7 @@ class DctpTvIE(InfoExtractor):
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
if url_or_none(server.get('endpoint')) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
@ -92,8 +93,8 @@ class DctpTvIE(InfoExtractor):
for image in images:
if not isinstance(image, dict):
continue
image_url = image.get('url')
if not image_url or not isinstance(image_url, compat_str):
image_url = url_or_none(image.get('url'))
if not image_url:
continue
thumbnails.append({
'url': image_url,

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
@ -12,6 +11,7 @@ from ..utils import (
parse_age_limit,
remove_end,
unescapeHTML,
url_or_none,
)
@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
captions = stream.get('captions')
if isinstance(captions, list):
for caption in captions:
subtitle_url = caption.get('fileUrl')
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
not subtitle_url.startswith('http')):
subtitle_url = url_or_none(caption.get('fileUrl'))
if not subtitle_url or not subtitle_url.startswith('http'):
continue
lang = caption.get('fileLang', 'en')
ext = determine_ext(subtitle_url)

View File

@ -7,7 +7,6 @@ import json
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urlparse,
)
from ..utils import (
@ -17,6 +16,7 @@ from ..utils import (
parse_age_limit,
parse_duration,
unified_timestamp,
url_or_none,
)
@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
for sub in subs:
if not isinstance(sub, dict):
continue
sub_url = sub.get('url')
if not sub_url or not isinstance(sub_url, compat_str):
sub_url = url_or_none(sub.get('url'))
if not sub_url:
continue
subtitles.setdefault(
sub.get('code') or sub.get('language') or 'en', []).append({
@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
for format_id, format_dict in download_assets.items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(format_dict.get('url'))
if not format_url:
continue
formats.append({
'url': format_url,

View File

@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
unsmuggle_url,
url_or_none,
)
@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not isinstance(format_url, compat_str):
if not url_or_none(format_url):
continue
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):

View File

@ -8,6 +8,7 @@ from ..utils import (
int_or_none,
try_get,
unified_timestamp,
url_or_none,
)
@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
lesson_url = url_or_none(lesson.get('http_url'))
if not lesson_url:
continue
lesson_id = lesson.get('id')
if lesson_id:
@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(format_url)
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'm3u8':

View File

@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
parse_duration,
str_to_int,
url_or_none,
)
@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
for format_id, format_dict in formats_dict.items():
if not isinstance(format_dict, dict):
continue
src = format_dict.get('src')
if not isinstance(src, compat_str) or not src.startswith('http'):
src = url_or_none(format_dict.get('src'))
if not src or not src.startswith('http'):
continue
if kind == 'hls':
formats.extend(self._extract_m3u8_formats(

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
@ -11,7 +13,13 @@ from ..utils import (
class ExpressenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?expressen\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
_TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
}, {
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
webpage)]
def _real_extract(self, url):
display_id = self._match_id(url)

View File

@ -118,6 +118,10 @@ from .bilibili import (
BiliBiliBangumiIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
BitChuteIE,
BitChuteChannelIE,
)
from .biqle import BIQLEIE
from .bleacherreport import (
BleacherReportIE,
@ -516,6 +520,7 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .kinopoisk import KinoPoiskIE
from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
@ -736,7 +741,10 @@ from .nonktube import NonkTubeIE
from .noovo import NoovoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nova import NovaIE
from .nova import (
NovaEmbedIE,
NovaIE,
)
from .novamov import (
AuroraVidIE,
CloudTimeIE,
@ -860,6 +868,10 @@ from .pornhub import (
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
)
from .presstv import PressTVIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
@ -891,7 +903,10 @@ from .rai import (
RaiPlayPlaylistIE,
RaiIE,
)
from .raywenderlich import RayWenderlichIE
from .raywenderlich import (
RayWenderlichIE,
RayWenderlichCourseIE,
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redbulltv import RedBullTVIE
@ -1166,6 +1181,7 @@ from .tvp import (
from .tvplay import (
TVPlayIE,
ViafreeIE,
TVPlayHomeIE,
)
from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
@ -1287,6 +1303,7 @@ from .viki import (
VikiIE,
VikiChannelIE,
)
from .viqeo import ViqeoIE
from .viu import (
ViuIE,
ViuPlaylistIE,
@ -1412,6 +1429,7 @@ from .younow import (
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .youtube import (
YoutubeIE,

View File

@ -20,6 +20,7 @@ from ..utils import (
int_or_none,
js_to_json,
limit_length,
parse_count,
sanitized_Request,
try_get,
urlencode_postdata,
@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '274175099429670',
'ext': 'mp4',
'title': 'Asif Nawab Butt posted a video to his Timeline.',
'title': 're:^Asif Nawab Butt posted a video',
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
}, {
# have 1080P, but only up to 720p in swf params
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '0d9813160b146b3bc8744e006027fcc6',
'md5': '9571fae53d4165bbbadb17a94651dcdc',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20161030',
'uploader': 'CNN',
'thumbnail': r're:^https?://.*',
'view_count': int,
},
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '1417995061575415',
'ext': 'mp4',
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
'title': 'md5:1db063d6a8c13faa8da727817339c857',
'timestamp': 1486648217,
'upload_date': '20170209',
'uploader': 'Yaroslav Korpan',
@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '1396382447100162',
'ext': 'mp4',
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
'title': 'md5:19a428bbde91364e3de815383b54a235',
'timestamp': 1486035494,
'upload_date': '20170202',
'uploader': 'Elisabeth Ahtn',
@ -353,7 +355,6 @@ class FacebookIE(InfoExtractor):
tahoe_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
data=urlencode_postdata({
'__user': 0,
'__a': 1,
'__pc': self._search_regex(
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
@ -361,6 +362,9 @@ class FacebookIE(InfoExtractor):
'__rev': self._search_regex(
r'client_revision["\']\s*:\s*(\d+),', webpage,
'client revision', default='3944515'),
'fb_dtsg': self._search_regex(
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
webpage, 'dtsg token', default=''),
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
@ -426,6 +430,10 @@ class FacebookIE(InfoExtractor):
'timestamp', default=None))
thumbnail = self._og_search_thumbnail(webpage)
view_count = parse_count(self._search_regex(
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
default=None))
info_dict = {
'id': video_id,
'title': video_title,
@ -433,6 +441,7 @@ class FacebookIE(InfoExtractor):
'uploader': uploader,
'timestamp': timestamp,
'thumbnail': thumbnail,
'view_count': view_count,
}
return webpage, info_dict

View File

@ -10,6 +10,7 @@ from ..utils import (
int_or_none,
qualities,
unified_strdate,
url_or_none,
)
@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
formats = []
path = None
for f in item.get('mbr', []):
src = f.get('src')
if not src or not isinstance(src, compat_str):
src = url_or_none(f.get('src'))
if not src:
continue
tbr = int_or_none(self._search_regex(
r'_(\d{3,})\.mp4', src, 'tbr', default=None))

View File

@ -16,6 +16,7 @@ from ..utils import (
int_or_none,
parse_duration,
try_get,
url_or_none,
)
from .dailymotion import DailymotionIE
@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
def sign(manifest_url, manifest_id):
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
signed_url = self._download_webpage(
signed_url = url_or_none(self._download_webpage(
'https://%s/esi/TA' % host, video_id,
'Downloading signed %s manifest URL' % manifest_id,
fatal=False, query={
'url': manifest_url,
})
if (signed_url and isinstance(signed_url, compat_str) and
re.search(r'^(?:https?:)?//', signed_url)):
}))
if signed_url:
return signed_url
return manifest_url

View File

@ -11,6 +11,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
parse_duration,
url_or_none,
urlencode_postdata,
)
@ -80,7 +81,7 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
chapters = []
lesson_elements = course.get('lessonElements')
if isinstance(lesson_elements, list):
chapters = [e for e in lesson_elements if isinstance(e, compat_str)]
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
return chapters
@staticmethod

View File

@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
@ -125,17 +126,31 @@ class FunkChannelIE(FunkBaseIE):
# Id-based channels are currently broken on their side: webplayer
# tries to process them via byChannelAlias endpoint and fails
# predictably.
by_channel_alias = self._download_json(
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
% channel_id,
'Downloading byChannelAlias JSON', headers=headers, query={
'size': 100,
}, fatal=False)
if by_channel_alias:
for page_num in itertools.count():
by_channel_alias = self._download_json(
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
% channel_id,
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
headers=headers, query={
'filterFsk': 'false',
'sort': 'creationDate,desc',
'size': 100,
'page': page_num,
}, fatal=False)
if not by_channel_alias:
break
video_list = try_get(
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
if video_list:
if not video_list:
break
try:
video = next(r for r in video_list if r.get('alias') == alias)
break
except StopIteration:
pass
if not try_get(
by_channel_alias, lambda x: x['_links']['next']):
break
if not video:
by_id_list = self._download_json(

View File

@ -112,6 +112,8 @@ from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE
from .expressen import ExpressenIE
class GenericIE(InfoExtractor):
@ -2059,6 +2061,30 @@ class GenericIE(InfoExtractor):
},
'skip': 'TODO: fix nested playlists processing in tests',
},
{
# Viqeo embeds
'url': 'https://viqeo.tv/',
'info_dict': {
'id': 'viqeo',
'title': 'All-new video platform',
},
'playlist_count': 6,
},
{
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
'info_dict': {
'id': 'shell',
'ext': 'mp4',
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download MPD manifest'],
},
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@ -3093,6 +3119,16 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
sharevideos_urls, video_id, video_title)
viqeo_urls = ViqeoIE._extract_urls(webpage)
if viqeo_urls:
return self.playlist_from_matches(
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
expressen_urls = ExpressenIE._extract_urls(webpage)
if expressen_urls:
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:

View File

@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
'requestor_id': 'DisneyXD',
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
_TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': {
@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
}, {
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
'only_matching': True,
}, {
# brand 004
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
'only_matching': True,
}, {
# brand 008
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
}]
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
def _real_extract(self, url):
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
site_info = self._SITE_INFO[sub_domain]
brand = site_info['brand']
if not video_id:
webpage = self._download_webpage(url, display_id)
site_info = self._SITE_INFO.get(sub_domain, {})
brand = site_info.get('brand')
if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
default=None)
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
default='004')
site_info = next(
si for _, si in self._SITE_INFO.items()
if si.get('brand') == brand)
if not video_id:
# show extraction works for Disney, DisneyJunior and DisneyXD
# ABC and Freeform has different layout

View File

@ -8,6 +8,7 @@ from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
urlencode_postdata,
)
@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
bitrates = rendition.get('bitrates')
if not isinstance(bitrates, dict):
continue
m3u8_url = bitrates.get('hls')
if not isinstance(m3u8_url, compat_str):
m3u8_url = url_or_none(bitrates.get('hls'))
if not m3u8_url:
continue
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
if not isinstance(cc_file, list) or len(cc_file) < 3:
continue
cc_lang = cc_file[0]
cc_url = cc_file[2]
if not isinstance(cc_lang, compat_str) or not isinstance(
cc_url, compat_str):
cc_url = url_or_none(cc_file[2])
if not isinstance(cc_lang, compat_str) or not cc_url:
continue
subtitles.setdefault(cc_lang, []).append({
'url': cc_url,

View File

@ -3,12 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
mimetype2ext,
parse_duration,
qualities,
url_or_none,
)
@ -61,10 +61,11 @@ class ImdbIE(InfoExtractor):
for encoding in video_metadata.get('encodings', []):
if not encoding or not isinstance(encoding, dict):
continue
video_url = encoding.get('videoUrl')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(encoding.get('videoUrl'))
if not video_url:
continue
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
ext = mimetype2ext(encoding.get(
'mimeType')) or determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@ -12,7 +12,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
}, {
'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True,
}, {
'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -17,6 +17,7 @@ from ..utils import (
lowercase_escape,
std_headers,
try_get,
url_or_none,
)
@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
node = try_get(edge, lambda x: x['node'], dict)
if not node:
continue
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
node_video_url = url_or_none(node.get('video_url'))
if not node_video_url:
continue
entries.append({

View File

@ -20,6 +20,7 @@ from ..utils import (
merge_dicts,
parse_duration,
smuggle_url,
url_or_none,
xpath_with_ns,
xpath_element,
xpath_text,
@ -250,8 +251,8 @@ class ITVIE(InfoExtractor):
for sub in subs:
if not isinstance(sub, dict):
continue
href = sub.get('Href')
if isinstance(href, compat_str):
href = url_or_none(sub.get('Href'))
if href:
extract_subtitle(href)
if not info.get('duration'):
info['duration'] = parse_duration(video_data.get('Duration'))

View File

@ -7,6 +7,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
remove_end,
url_or_none,
)
@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
formats = []
for a_format in video_data:
format_uri = url_or_none(a_format.get('uri'))
if not format_uri:
continue
format_id = a_format.get('resolution')
height = int_or_none(self._search_regex(
r'(\d+)p', format_id, 'height', default=None))
formats.append({
'url': a_format['uri'],
'url': self._proto_relative_url(format_uri, 'https:'),
'format_id': format_id,
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
'height': height,

View File

@ -4,16 +4,14 @@ import re
from .common import InfoExtractor
from ..aes import aes_decrypt_text
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
str_to_int,
strip_or_none,
url_or_none,
)
@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
encrypted = False
def extract_format(format_url, height=None):
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//')):
return
if format_url in format_urls:
return

View File

@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
dict_get,
int_or_none,
)
class KinoPoiskIE(InfoExtractor):
_GEO_COUNTRIES = ['RU']
_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.kinopoisk.ru/film/81041/watch/',
'md5': '4f71c80baea10dfa54a837a46111d326',
'info_dict': {
'id': '81041',
'ext': 'mp4',
'title': 'Алеша попович и тугарин змей',
'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
'thumbnail': r're:^https?://.*',
'duration': 4533,
'age_limit': 12,
},
'params': {
'format': 'bestvideo',
},
}, {
'url': 'https://www.kinopoisk.ru/film/81041',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
query={'kpId': video_id})
data = self._parse_json(
self._search_regex(
r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
webpage, 'data'),
video_id)['models']
film = data['filmStatus']
title = film.get('title') or film['originalTitle']
formats = self._extract_m3u8_formats(
data['playlistEntity']['uri'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
description = dict_get(
film, ('descriptscription', 'description',
'shortDescriptscription', 'shortDescription'))
thumbnail = film.get('coverUrl') or film.get('posterUrl')
duration = int_or_none(film.get('duration'))
age_limit = int_or_none(film.get('restrictionAge'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -2,11 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
url_or_none,
)
@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
captions = source.get('captionsAvailableLanguages')
if isinstance(captions, dict):
for lang, subtitle_url in captions.items():
if lang != 'none' and isinstance(subtitle_url, compat_str):
subtitle_url = url_or_none(subtitle_url)
if lang != 'none' and subtitle_url:
subtitles.setdefault(lang, []).append({'url': subtitle_url})
return {

View File

@ -20,5 +20,7 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
wat_id = self._search_regex(
(r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
webpage, 'wat id')
return self.url_result('wat:' + wat_id, 'Wat', wat_id)

View File

@ -3,75 +3,75 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from .theplatform import ThePlatformBaseIE
from ..utils import (
determine_ext,
parse_duration,
try_get,
unified_strdate,
ExtractorError,
int_or_none,
update_url_query,
)
class MediasetIE(InfoExtractor):
class MediasetIE(ThePlatformBaseIE):
_TP_TLD = 'eu'
_VALID_URL = r'''(?x)
(?:
mediaset:|
https?://
(?:www\.)?video\.mediaset\.it/
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?:
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
player/index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9]+)
)(?P<id>[0-9A-Z]{16})
'''
_TESTS = [{
# full episode
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
'info_dict': {
'id': '661824',
'id': 'FAFU000000661824',
'ext': 'mp4',
'title': 'Quarta puntata',
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1414,
'creator': 'mediaset',
'duration': 1414.26,
'upload_date': '20161107',
'series': 'Hello Goodbye',
'categories': ['reality'],
'timestamp': 1478532900,
'uploader': 'Rete 4',
'uploader_id': 'R4',
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
'md5': '1276f966ac423d16ba255ce867de073e',
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
'md5': '288532f0ad18307705b01e581304cd7b',
'info_dict': {
'id': '846685',
'id': 'F309013801000501',
'ext': 'mp4',
'title': 'Puntata del 25 maggio',
'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 6565,
'creator': 'mediaset',
'upload_date': '20180525',
'duration': 6565.007,
'upload_date': '20180526',
'series': 'Matrix',
'categories': ['infotainment'],
'timestamp': 1527326245,
'uploader': 'Canale 5',
'uploader_id': 'C5',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
# clip
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
'only_matching': True,
}, {
# iframe simple
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
'only_matching': True,
}, {
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
'only_matching': True,
}, {
'url': 'mediaset:661824',
'url': 'mediaset:FAFU000000665924',
'only_matching': True,
}]
@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://www.video.mediaset.it/html/metainfo.sjson',
video_id, 'Downloading media info', query={
'id': video_id
})['video']
title = video['title']
media_id = video.get('guid') or video_id
video_list = self._download_json(
'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
video_id, 'Downloading video CDN JSON', query={
'streamid': media_id,
'format': 'json',
})['videoList']
guid = self._match_id(url)
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
info = self._extract_theplatform_metadata(tp_path, guid)
formats = []
for format_url in video_list:
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'ism' or '.ism' in format_url:
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
formats.append({
'url': format_url,
'format_id': determine_ext(format_url),
})
subtitles = {}
first_e = None
for asset_type in ('SD', 'HD'):
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
'mbr': 'true',
'formats': f,
'assetTypes': asset_type,
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
except ExtractorError as e:
if not first_e:
first_e = e
break
for tp_f in tp_formats:
tp_f['quality'] = 1 if asset_type == 'HD' else 0
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if first_e and not formats:
raise first_e
self._sort_formats(formats)
creator = try_get(
video, lambda x: x['brand-info']['publisher'], compat_str)
category = try_get(
video, lambda x: x['brand-info']['category'], compat_str)
categories = [category] if category else None
fields = []
for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
fields.extend(templ % repl for repl in repls)
feed_data = self._download_json(
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
guid, fatal=False, query={'fields': ','.join(fields)})
if feed_data:
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
info.update({
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
'series': feed_data.get('mediasetprogram$brandTitle'),
'uploader': publish_info.get('description'),
'uploader_id': publish_info.get('channel'),
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
})
return {
'id': video_id,
'title': title,
'description': video.get('short-description'),
'thumbnail': video.get('thumbnail'),
'duration': parse_duration(video.get('duration')),
'creator': creator,
'upload_date': unified_strdate(video.get('production-date')),
'webpage_url': video.get('url'),
'series': video.get('brand-value'),
'season': video.get('season'),
'categories': categories,
info.update({
'id': guid,
'formats': formats,
}
'subtitles': subtitles,
})
return info

View File

@ -15,6 +15,7 @@ from ..utils import (
mimetype2ext,
unescapeHTML,
unsmuggle_url,
url_or_none,
urljoin,
)
@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
stream_formats = []
for unum, VideoUrl in enumerate(video_urls):
video_url = VideoUrl.get('Location')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(VideoUrl.get('Location'))
if not video_url:
continue
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS

View File

@ -1,84 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import uuid
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
int_or_none,
extract_attributes,
determine_ext,
smuggle_url,
parse_duration,
)
class MiTeleBaseIE(InfoExtractor):
def _get_player_info(self, url, webpage):
player_data = extract_attributes(self._search_regex(
r'(?s)(<ms-video-player.+?</ms-video-player>)',
webpage, 'ms video player'))
video_id = player_data['data-media-id']
if player_data.get('data-cms-id') == 'ooyala':
return self.url_result(
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
config = self._download_json(
config_url, video_id, 'Downloading config JSON')
mmc_url = config['services']['mmc']
duration = None
formats = []
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
mmc = self._download_json(
m_url, video_id, 'Downloading mmc JSON')
if not duration:
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
gcp = location.get('gcp')
ogn = location.get('ogn')
if None in (gat, gcp, ogn):
continue
token_data = {
'gcp': gcp,
'ogn': ogn,
'sta': 0,
}
media = self._download_json(
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
})
stream = media.get('stream') or media.get('file')
if not stream:
continue
ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
stream, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
'duration': duration,
}
class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': {
'id': '57b0dfb9c715da65618b4afa',
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
'ext': 'mp4',
'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': {
'id': '57b0de3dc915da14058b4876',
'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
'ext': 'mp4',
'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
gigya_url = self._search_regex(
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
appKey = self._search_regex(
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
gigya_sc, 'appKey')
session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session',
video_id, 'Downloading session keys', query={
'appKey': appKey,
'uuid': compat_str(uuid.uuid4()),
})
paths = self._download_json(
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
video_id, 'Downloading paths JSON',
query={'sessionKey': compat_str(session_json['sessionKey'])})
'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
video_id, 'Downloading paths JSON')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
source = self._download_json(
'http://%s%s%s/docs/%s' % (
ooyala_s['base_url'], ooyala_s['full_path'],
ooyala_s['provider_id'], video_id),
'%s://%s%s%s/docs/%s' % (
ooyala_s.get('protocol', 'https'), base_url, full_path,
ooyala_s.get('provider_id', '104951'), video_id),
video_id, 'Downloading data JSON', query={
'include_titles': 'Series,Season',
'product_name': 'test',
'product_name': ooyala_s.get('product_name', 'test'),
'format': 'full',
})['hits']['hits'][0]['_source']

View File

@ -6,28 +6,90 @@ import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
js_to_json,
qualities,
unified_strdate,
url_or_none,
)
class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
'md5': 'b3834f6de5401baabf31ed57456463f7',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
'title': '2180. díl',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2578,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
bitrates = self._parse_json(
self._search_regex(
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
formats = []
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
continue
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
f = {
'url': format_url,
}
f_id = format_id
for quality in QUALITIES:
if '%s.mp4' % quality in format_url:
f_id += '-%s' % quality
f.update({
'quality': quality_key(quality),
'format_note': quality.upper(),
})
break
f['format_id'] = f_id
formats.append(f)
self._sort_formats(formats)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
(r'<value>(?P<title>[^<]+)',
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'title', group='value')
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex(
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='value')
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
class NovaIE(InfoExtractor):
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
'info_dict': {
'id': '1608920',
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
'ext': 'flv',
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
'info_dict': {
@ -38,33 +100,6 @@ class NovaIE(InfoExtractor):
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
'thumbnail': r're:^https?://.*\.(?:jpg)',
}
}, {
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
'info_dict': {
'id': '1756825',
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
'ext': 'flv',
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
'info_dict': {
'id': '1756858',
'ext': 'flv',
'title': 'Televizní noviny - 30. 5. 2015',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'upload_date': '20150530',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
'info_dict': {
@ -79,6 +114,20 @@ class NovaIE(InfoExtractor):
# rtmp download
'skip_download': True,
}
}, {
# media.cms.nova.cz embed
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
'title': '2180. díl',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2578,
},
'params': {
'skip_download': True,
},
'add_ie': [NovaEmbedIE.ie_key()],
}, {
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
'only_matching': True,
@ -103,6 +152,15 @@ class NovaIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
# novaplus
embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
webpage, 'embed url', default=None)
if embed_id:
return self.url_result(
'https://media.cms.nova.cz/embed/%s' % embed_id,
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
video_id = self._search_regex(
[r"(?:media|video_id)\s*:\s*'(\d+)'",
r'media=(\d+)',
@ -111,8 +169,21 @@ class NovaIE(InfoExtractor):
webpage, 'video id')
config_url = self._search_regex(
r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
webpage, 'config url', default=None)
config_params = {}
if not config_url:
player = self._parse_json(
self._search_regex(
r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
'player', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if player:
config_url = url_or_none(player.get('configUrl'))
params = player.get('configParams')
if isinstance(params, dict):
config_params = params
if not config_url:
DEFAULT_SITE_ID = '23000'
@ -127,14 +198,20 @@ class NovaIE(InfoExtractor):
}
site_id = self._search_regex(
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
site, DEFAULT_SITE_ID)
config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
% (site_id, video_id))
config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
config_params = {
'site': site_id,
'media': video_id,
'quality': 3,
'version': 1,
}
config = self._download_json(
config_url, display_id,
'Downloading config JSON',
'Downloading config JSON', query=config_params,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
mediafile = config['mediafile']

View File

@ -15,6 +15,7 @@ from ..utils import (
strip_jsonp,
strip_or_none,
unified_strdate,
url_or_none,
US_RATINGS,
)
@ -557,6 +558,13 @@ class PBSIE(InfoExtractor):
if redirect_url and redirect_url not in redirect_urls:
redirects.append(redirect)
redirect_urls.add(redirect_url)
encodings = info.get('encodings')
if isinstance(encodings, list):
for encoding in encodings:
encoding_url = url_or_none(encoding)
if encoding_url and encoding_url not in redirect_urls:
redirects.append({'url': encoding_url})
redirect_urls.add(encoding_url)
chapters = []
# Player pages may also serve different qualities

View File

@ -10,6 +10,7 @@ from ..utils import (
parse_resolution,
try_get,
unified_timestamp,
url_or_none,
urljoin,
)
@ -200,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
for file_ in video['files']:
if not isinstance(file_, dict):
continue
file_url = file_.get('fileUrl')
if not file_url or not isinstance(file_url, compat_str):
file_url = url_or_none(file_.get('fileUrl'))
if not file_url:
continue
file_size = int_or_none(file_.get('size'))
format_id = try_get(

View File

@ -18,6 +18,7 @@ from ..utils import (
orderedSet,
remove_quotes,
str_to_int,
url_or_none,
)
@ -68,6 +69,31 @@ class PornHubIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# subtitles
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
'info_dict': {
'id': 'ph5af5fef7c2aa7',
'ext': 'mp4',
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
'uploader': 'BFFs',
'duration': 622,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 18,
'tags': list,
'categories': list,
'subtitles': {
'en': [{
"ext": 'srt'
}]
},
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
@ -139,12 +165,19 @@ class PornHubIE(InfoExtractor):
video_urls = []
video_urls_set = set()
subtitles = {}
flashvars = self._parse_json(
self._search_regex(
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
video_id)
if flashvars:
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
if subtitle_url:
subtitles.setdefault('en', []).append({
'url': subtitle_url,
'ext': 'srt',
})
thumbnail = flashvars.get('image_url')
duration = int_or_none(flashvars.get('video_duration'))
media_definitions = flashvars.get('mediaDefinitions')
@ -256,6 +289,7 @@ class PornHubIE(InfoExtractor):
'age_limit': 18,
'tags': tags,
'categories': categories,
'subtitles': subtitles,
}

View File

@ -0,0 +1,247 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
parse_resolution,
str_or_none,
try_get,
unified_timestamp,
url_or_none,
urljoin,
)
class PuhuTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
IE_NAME = 'puhutv'
_TESTS = [{
# film
'url': 'https://puhutv.com/sut-kardesler-izle',
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
'info_dict': {
'id': '5085',
'display_id': 'sut-kardesler',
'ext': 'mp4',
'title': 'Süt Kardeşler',
'description': 'md5:405fd024df916ca16731114eb18e511a',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4832.44,
'creator': 'Arzu Film',
'timestamp': 1469778212,
'upload_date': '20160729',
'release_year': 1976,
'view_count': int,
'tags': ['Aile', 'Komedi', 'Klasikler'],
},
}, {
# episode, geo restricted, bypassable with --geo-verification-proxy
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
'only_matching': True,
}, {
# 4k, with subtitles
'url': 'https://puhutv.com/dip-1-bolum-izle',
'only_matching': True,
}]
_SUBTITLE_LANGS = {
'English': 'en',
'Deutsch': 'de',
'عربى': 'ar'
}
def _real_extract(self, url):
display_id = self._match_id(url)
info = self._download_json(
urljoin(url, '/api/slug/%s-izle' % display_id),
display_id)['data']
video_id = compat_str(info['id'])
title = info.get('name') or info['title']['name']
if info.get('display_name'):
title = '%s %s' % (title, info.get('display_name'))
try:
videos = self._download_json(
'https://puhutv.com/api/assets/%s/videos' % video_id,
display_id, 'Downloading video JSON',
headers=self.geo_verification_headers())
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_geo_restricted()
raise
formats = []
for video in videos['data']['videos']:
media_url = url_or_none(video.get('url'))
if not media_url:
continue
playlist = video.get('is_playlist')
if video.get('stream_type') == 'hls' and playlist is True:
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
quality = int_or_none(video.get('quality'))
f = {
'url': media_url,
'ext': 'mp4',
'height': quality
}
video_format = video.get('video_format')
if video_format == 'hls' and playlist is False:
format_id = 'hls'
f['protocol'] = 'm3u8_native'
elif video_format == 'mp4':
format_id = 'http'
else:
continue
if quality:
format_id += '-%sp' % quality
f['format_id'] = format_id
formats.append(f)
self._sort_formats(formats)
description = try_get(
info, lambda x: x['title']['description'],
compat_str) or info.get('description')
timestamp = unified_timestamp(info.get('created_at'))
creator = try_get(
info, lambda x: x['title']['producer']['name'], compat_str)
duration = float_or_none(
try_get(info, lambda x: x['content']['duration_in_ms'], int),
scale=1000)
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
images = try_get(
info, lambda x: x['content']['images']['wide'], dict) or {}
thumbnails = []
for image_id, image_url in images.items():
if not isinstance(image_url, compat_str):
continue
if not image_url.startswith(('http', '//')):
image_url = 'https://%s' % image_url
t = parse_resolution(image_id)
t.update({
'id': image_id,
'url': image_url
})
thumbnails.append(t)
release_year = try_get(info, lambda x: x['title']['released_at'], int)
season_number = int_or_none(info.get('season_number'))
season_id = str_or_none(info.get('season_id'))
episode_number = int_or_none(info.get('episode_number'))
tags = []
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
if not isinstance(genre, dict):
continue
genre_name = genre.get('name')
if genre_name and isinstance(genre_name, compat_str):
tags.append(genre_name)
subtitles = {}
for subtitle in try_get(
info, lambda x: x['content']['subtitles'], list) or []:
if not isinstance(subtitle, dict):
continue
lang = subtitle.get('language')
sub_url = url_or_none(subtitle.get('url'))
if not lang or not isinstance(lang, compat_str) or not sub_url:
continue
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
'url': sub_url
}]
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'season_id': season_id,
'season_number': season_number,
'episode_number': episode_number,
'release_year': release_year,
'timestamp': timestamp,
'creator': creator,
'view_count': view_count,
'duration': duration,
'tags': tags,
'subtitles': subtitles,
'thumbnails': thumbnails,
'formats': formats
}
class PuhuTVSerieIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
IE_NAME = 'puhutv:serie'
_TESTS = [{
'url': 'https://puhutv.com/deniz-yildizi-detay',
'info_dict': {
'title': 'Deniz Yıldızı',
'id': 'deniz-yildizi',
},
'playlist_mincount': 205,
}, {
# a film detail page which is using same url with serie page
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
'only_matching': True,
}]
def _extract_entries(self, seasons):
for season in seasons:
season_id = season.get('id')
if not season_id:
continue
page = 1
has_more = True
while has_more is True:
season = self._download_json(
'https://galadriel.puhutv.com/seasons/%s' % season_id,
season_id, 'Downloading page %s' % page, query={
'page': page,
'per': 40,
})
episodes = season.get('episodes')
if isinstance(episodes, list):
for ep in episodes:
slug_path = str_or_none(ep.get('slugPath'))
if not slug_path:
continue
video_id = str_or_none(int_or_none(ep.get('id')))
yield self.url_result(
'https://puhutv.com/%s' % slug_path,
ie=PuhuTVIE.ie_key(), video_id=video_id,
video_title=ep.get('name') or ep.get('eventLabel'))
page += 1
has_more = season.get('hasMore')
def _real_extract(self, url):
playlist_id = self._match_id(url)
info = self._download_json(
urljoin(url, '/api/slug/%s-detay' % playlist_id),
playlist_id)['data']
seasons = info.get('seasons')
if seasons:
return self.playlist_result(
self._extract_entries(seasons), playlist_id, info.get('name'))
# For films, these are using same url with series
video_id = info.get('slug') or info['assets'][0]['slug']
return self.url_result(
'https://puhutv.com/%s-izle' % video_id,
PuhuTVIE.ie_key(), video_id)

View File

@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
_GEO_BYPASS = False
def _extract_relinker_info(self, relinker_url, video_id):
if not re.match(r'https?://', relinker_url):
return {'formats': [{'url': relinker_url}]}
formats = []
geoprotection = None
is_live = None
@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
'params': {
'skip_download': True,
},
}, {
# Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
'only_matching': True,
}]
def _extract_from_content_id(self, content_id, url):

View File

@ -4,24 +4,37 @@ import re
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..compat import compat_str
from ..utils import (
extract_attributes,
ExtractorError,
smuggle_url,
unsmuggle_url,
int_or_none,
merge_dicts,
try_get,
unescapeHTML,
unified_timestamp,
urljoin,
)
class RayWenderlichIE(InfoExtractor):
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
_VALID_URL = r'''(?x)
https?://
(?:
videos\.raywenderlich\.com/courses|
(?:www\.)?raywenderlich\.com
)/
(?P<course_id>[^/]+)/lessons/(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
'info_dict': {
'id': '248377018',
'ext': 'mp4',
'title': 'Testing In iOS Episode 1: Introduction',
'title': 'Introduction',
'description': 'md5:804d031b3efa9fcb49777d512d74f722',
'timestamp': 1513906277,
'upload_date': '20171222',
'duration': 133,
'uploader': 'Ray Wenderlich',
'uploader_id': 'user3304672',
@ -34,69 +47,133 @@ class RayWenderlichIE(InfoExtractor):
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
'only_matching': True,
}]
@staticmethod
def _extract_video_id(data, lesson_id):
if not data:
return
groups = try_get(data, lambda x: x['groups'], list) or []
if not groups:
return
for group in groups:
if not isinstance(group, dict):
continue
contents = try_get(data, lambda x: x['contents'], list) or []
for content in contents:
if not isinstance(content, dict):
continue
ordinal = int_or_none(content.get('ordinal'))
if ordinal != lesson_id:
continue
video_id = content.get('identifier')
if video_id:
return compat_str(video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id, lesson_id = mobj.group('course_id', 'id')
display_id = '%s/%s' % (course_id, lesson_id)
webpage = self._download_webpage(url, display_id)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'twitter:image', webpage, 'thumbnail')
if '>Subscribe to unlock' in webpage:
raise ExtractorError(
'This content is only available for subscribers',
expected=True)
info = {
'thumbnail': thumbnail,
}
vimeo_id = self._search_regex(
r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
if not vimeo_id:
data = self._parse_json(
self._search_regex(
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
'data collection', default='{}', group='data'),
display_id, transform_source=unescapeHTML, fatal=False)
video_id = self._extract_video_id(
data, lesson_id) or self._search_regex(
r'/videos/(\d+)/', thumbnail, 'video id')
headers = {
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
}
csrf_token = self._html_search_meta(
'csrf-token', webpage, 'csrf token', default=None)
if csrf_token:
headers['X-CSRF-Token'] = csrf_token
video = self._download_json(
'https://videos.raywenderlich.com/api/v1/videos/%s.json'
% video_id, display_id, headers=headers)['video']
vimeo_id = video['clips'][0]['provider_id']
info.update({
'_type': 'url_transparent',
'title': video.get('name'),
'description': video.get('description') or video.get(
'meta_description'),
'duration': int_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('created_at')),
})
return merge_dicts(info, self.url_result(
VimeoIE._smuggle_referrer(
'https://player.vimeo.com/video/%s' % vimeo_id, url),
ie=VimeoIE.ie_key(), video_id=vimeo_id))
class RayWenderlichCourseIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
videos\.raywenderlich\.com/courses|
(?:www\.)?raywenderlich\.com
)/
(?P<id>[^/]+)
'''
_TEST = {
'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
'info_dict': {
'title': 'Testing in iOS',
'id': '105-testing-in-ios',
'id': '3530-testing-in-ios',
},
'params': {
'noplaylist': False,
},
'playlist_count': 29,
}]
}
@classmethod
def suitable(cls, url):
return False if RayWenderlichIE.suitable(url) else super(
RayWenderlichCourseIE, cls).suitable(url)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
course_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
course_id, lesson_id = mobj.group('course_id', 'id')
video_id = '%s/%s' % (course_id, lesson_id)
webpage = self._download_webpage(url, video_id)
no_playlist = self._downloader.params.get('noplaylist')
if no_playlist or smuggled_data.get('force_video', False):
if no_playlist:
self.to_screen(
'Downloading just video %s because of --no-playlist'
% video_id)
if '>Subscribe to unlock' in webpage:
raise ExtractorError(
'This content is only available for subscribers',
expected=True)
vimeo_id = self._search_regex(
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
return self.url_result(
VimeoIE._smuggle_referrer(
'https://player.vimeo.com/video/%s' % vimeo_id, url),
ie=VimeoIE.ie_key(), video_id=vimeo_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% course_id)
lesson_ids = set((lesson_id, ))
for lesson in re.findall(
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
attrs = extract_attributes(lesson)
if not attrs:
continue
lesson_url = attrs.get('href')
if not lesson_url:
continue
lesson_id = self._search_regex(
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
if not lesson_id:
continue
lesson_ids.add(lesson_id)
webpage = self._download_webpage(url, course_id)
entries = []
for lesson_id in sorted(lesson_ids):
lesson_urls = set()
for lesson_url in re.findall(
r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
if lesson_url in lesson_urls:
continue
lesson_urls.add(lesson_url)
entries.append(self.url_result(
smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
ie=RayWenderlichIE.ie_key()))
urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
title = self._search_regex(
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
default=None)
title = self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title', default=None)
return self.playlist_result(entries, course_id, title)

View File

@ -10,7 +10,7 @@ from ..utils import (
class RedBullTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
_TESTS = [{
# film
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
@ -35,6 +35,9 @@ class RedBullTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -3,12 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
str_to_int,
unified_strdate,
url_or_none,
)
@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
video_id, fatal=False)
if medias and isinstance(medias, list):
for media in medias:
format_url = media.get('videoUrl')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(media.get('videoUrl'))
if not format_url:
continue
format_id = media.get('quality')
formats.append({

View File

@ -6,6 +6,7 @@ from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
url_or_none,
)
@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
title = config['title']
formats = []
for video in config['src']:
src = video.get('src')
if not src or not isinstance(src, compat_str):
src = url_or_none(video.get('src'))
if not src:
continue
ext = determine_ext(src)
if ext == 'm3u8':

View File

@ -16,6 +16,7 @@ from ..utils import (
int_or_none,
try_get,
unified_timestamp,
url_or_none,
)
@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
break
for result in results:
video_url = result.get('video_url')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(result.get('video_url'))
if not video_url:
continue
entry = self._extract_video(result, require_title=False)
entry.update({

View File

@ -19,7 +19,7 @@ from ..utils import (
class SixPlayIE(InfoExtractor):
IE_NAME = '6play'
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
@ -32,6 +32,9 @@ class SixPlayIE(InfoExtractor):
}, {
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
'only_matching': True,
}, {
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
'only_matching': True,
}]
def _real_extract(self, url):
@ -39,6 +42,7 @@ class SixPlayIE(InfoExtractor):
service, consumer_name = {
'6play.fr': ('6play', 'm6web'),
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
}.get(domain, ('6play', 'm6web'))
data = self._download_json(

View File

@ -1,12 +1,10 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
_VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '868309628ba00fd488cf516a113fd717',
@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
'title': 'virginie baisee en cam',
'age_limit': 18,
'thumbnail': r're:https?://.*?\.jpg'
}
},
}, {
# mobile site
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
'only_matching': True,
}, {
'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
'only_matching': True,
}, {
'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
webpage = self._download_webpage(desktop_url, video_id)
embed_page = self._download_webpage(
'http://www.slutload.com/embed_player/%s' % video_id, video_id,
'Downloading embed page', fatal=False)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
webpage, 'title').strip()
if embed_page:
def extract(what):
return self._html_search_regex(
r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
embed_page, 'video %s' % what, default=None, group='url')
video_url = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
webpage, 'thumbnail', fatal=False)
video_url = extract('url')
if video_url:
title = self._html_search_regex(
r'<title>([^<]+)', embed_page, 'title', default=video_id)
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': extract('preview'),
'age_limit': 18
}
return {
webpage = self._download_webpage(
'http://www.slutload.com/video/_/%s/' % video_id, video_id)
title = self._html_search_regex(
r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
info.update({
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': thumbnail,
'age_limit': 18
}
'title': title,
'age_limit': 18,
})
return info

View File

@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'http_headers': {
'Referer': url,
},
}

View File

@ -7,8 +7,10 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
int_or_none,
try_get,
url_or_none,
)
@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
'''
_TESTS = [{
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
'md5': 'b0ce2b05ca215042124fbc9e3886493a',
'info_dict': {
'id': '102',
'ext': 'mp4',
@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
'uploader': 'Dan Dennett',
'width': 853,
'duration': 1308,
}
}, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
'md5': 'b899ac15e345fb39534d913f7606082b',
'info_dict': {
'id': 'tSVI8ta_P4w',
'ext': 'mp4',
'title': 'Vishal Sikka: The beauty and power of algorithms',
'thumbnail': r're:^https?://.+\.jpg',
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
'upload_date': '20140122',
'uploader_id': 'TEDInstitute',
'uploader': 'TED Institute',
'view_count': int,
'comment_count': int,
'tags': list,
},
'params': {
'skip_download': True,
},
}, {
# missing HTTP bitrates
'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
'info_dict': {
'id': '6069',
'ext': 'mp4',
'title': 'The beauty and power of algorithms',
'thumbnail': r're:^https?://.+\.jpg',
'description': 'md5:734e352710fb00d840ab87ae31aaf688',
'uploader': 'Vishal Sikka',
},
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
}, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
'info_dict': {
'id': '1972',
'ext': 'mp4',
@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
'description': 'md5:5174aed4d0f16021b704120360f72b92',
'duration': 1128,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
'info_dict': {
@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
'skip_download': True,
},
}, {
# YouTube video
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
'add_ie': ['Youtube'],
# no nativeDownloads
'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
'info_dict': {
'id': 'aFBIPO-P7LM',
'id': '1792',
'ext': 'mp4',
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
'uploader': 'TEDx Talks',
'uploader_id': 'TEDxTalks',
'upload_date': '20111216',
'title': 'The orchestra in my mouth',
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
'uploader': 'Tom Thum',
'view_count': int,
'comment_count': int,
'tags': list,
},
'params': {
'skip_download': True,
@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
info = self._extract_info(webpage)
talk_info = try_get(
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
dict) or info['talks'][0]
data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
talk_info = data['talks'][0]
title = talk_info['title'].strip()
external = talk_info.get('external')
if external:
service = external['service']
self.to_screen('Found video from %s' % service)
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return {
'_type': 'url',
'url': ext_url or external['uri'],
}
native_downloads = try_get(
talk_info, lambda x: x['downloads']['nativeDownloads'],
dict) or talk_info['nativeDownloads']
talk_info,
(lambda x: x['downloads']['nativeDownloads'],
lambda x: x['nativeDownloads']),
dict) or {}
formats = [{
'url': format_url,
@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
player_talk = talk_info['player_talks'][0]
external = player_talk.get('external')
if isinstance(external, dict):
service = external.get('service')
if isinstance(service, compat_str):
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return {
'_type': 'url',
'url': ext_url or external['uri'],
}
resources_ = player_talk.get('resources') or talk_info.get('resources')
http_url = None
for format_id, resources in resources_.items():
if not isinstance(resources, dict):
continue
if format_id == 'h264':
for resource in resources:
h264_url = resource.get('file')
@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
'tbr': int_or_none(resource.get('bitrate')),
})
elif format_id == 'hls':
stream_url = url_or_none(resources.get('stream'))
if not stream_url:
continue
formats.extend(self._extract_m3u8_formats(
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
if not bitrate:
continue
bitrate_url = re.sub(r'\d+k', bitrate, http_url)
if not self._is_valid_url(
bitrate_url, video_name, '%s bitrate' % bitrate):
continue
f = m3u8_format.copy()
f.update({
'url': re.sub(r'\d+k', bitrate, http_url),
'url': bitrate_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
'description': self._og_search_description(webpage),
'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats,
'duration': talk_info.get('duration'),
'duration': float_or_none(talk_info.get('duration')),
'view_count': int_or_none(data.get('viewed_count')),
'comment_count': int_or_none(
try_get(data, lambda x: x['comments']['count'])),
'tags': try_get(talk_info, lambda x: x['tags'], list),
}
def _get_subtitles(self, video_id, talk_info):

View File

@ -1,26 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .mitele import MiTeleBaseIE
import json
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import (
clean_html,
determine_ext,
int_or_none,
str_or_none,
urljoin,
)
class TelecincoIE(MiTeleBaseIE):
class TelecincoIE(InfoExtractor):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
'info_dict': {
'id': 'JEA5ijCnF6p5W08A1rNKn7',
'ext': 'mp4',
'id': '1876350223',
'title': 'Bacalao con kokotxas al pil-pil',
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
'duration': 662,
},
'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7',
'info_dict': {
'id': 'JEA5ijCnF6p5W08A1rNKn7',
'ext': 'mp4',
'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
'duration': 662,
},
}]
}, {
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
'md5': '284393e5387b3b947b77c613ef04749a',
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
'info_dict': {
'id': 'jn24Od1zGLG4XUZcnUnZB6',
'ext': 'mp4',
@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
},
}, {
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
'md5': '749afab6ea5a136a8806855166ae46a2',
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
'info_dict': {
'id': 'aywerkD2Sv1vGNqq9b85Q2',
'ext': 'mp4',
@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
'only_matching': True,
}]
def _parse_content(self, content, url):
video_id = content['dataMediaId']
if content.get('dataCmsId') == 'ooyala':
return self.url_result(
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
config_url = urljoin(url, content['dataConfig'])
config = self._download_json(
config_url, video_id, 'Downloading config JSON')
title = config['info']['title']
def mmc_url(mmc_type):
return re.sub(
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
config['services']['mmc'])
duration = None
formats = []
for mmc_type in ('flash', 'html5'):
mmc = self._download_json(
mmc_url(mmc_type), video_id,
'Downloading %s mmc JSON' % mmc_type, fatal=False)
if not mmc:
continue
if not duration:
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
gcp = location.get('gcp')
ogn = location.get('ogn')
if None in (gat, gcp, ogn):
continue
token_data = {
'gcp': gcp,
'ogn': ogn,
'sta': 0,
}
media = self._download_json(
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
}, fatal=False) or {}
stream = media.get('stream') or media.get('file')
if not stream:
continue
ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
stream, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
'duration': duration,
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
['og:title', 'twitter:title'], webpage, 'title')
info = self._get_player_info(url, webpage)
article = self._parse_json(self._search_regex(
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
webpage, 'article'), display_id)['article']
title = article.get('title')
description = clean_html(article.get('leadParagraph'))
if article.get('editorialType') != 'VID':
entries = []
for p in article.get('body', []):
content = p.get('content')
if p.get('type') != 'video' or not content:
continue
entries.append(self._parse_content(content, url))
return self.playlist_result(
entries, str_or_none(article.get('id')), title, description)
content = article['opening']['content']
info = self._parse_content(content, url)
info.update({
'display_id': display_id,
'title': title,
'description': self._html_search_meta(
['og:description', 'twitter:description'],
webpage, 'title', fatal=False),
'description': description,
})
return info

View File

@ -32,13 +32,15 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(OnceIE):
_TP_TLD = 'com'
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(
smil_url, video_id, note=note, query={'format': 'SMIL'},
headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
raise ExtractorError(error_element.attrib['abstract'], expected=True)
smil_formats = self._parse_smil_formats(
@ -66,7 +68,7 @@ class ThePlatformBaseIE(OnceIE):
return formats, subtitles
def _download_theplatform_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
return self._download_json(info_url, video_id)
def _parse_theplatform_metadata(self, info):
@ -308,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
class ThePlatformFeedIE(ThePlatformBaseIE):
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
_TESTS = [{
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
@ -325,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
'uploader': 'NBCU-NEWS',
},
}, {
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
'only_matching': True,
}]
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):

View File

@ -15,6 +15,7 @@ from ..utils import (
update_url_query,
ExtractorError,
strip_or_none,
url_or_none,
)
@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
subtitles = {}
for source in video_data.findall('closedCaptions/source'):
for track in source.findall('track'):
track_url = track.get('url')
if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
track_url = url_or_none(track.get('url'))
if not track_url or track_url.endswith('/big'):
continue
lang = track.get('lang') or track.get('label') or 'en'
subtitles.setdefault(lang, []).append({

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
unescapeHTML,
url_or_none,
)
@ -106,9 +106,8 @@ class TVNetIE(InfoExtractor):
for stream in self._download_json(data_file, video_id):
if not isinstance(stream, dict):
continue
stream_url = stream.get('url')
if (stream_url in stream_urls or not stream_url or
not isinstance(stream_url, compat_str)):
stream_url = url_or_none(stream.get('url'))
if stream_url in stream_urls or not stream_url:
continue
stream_urls.add(stream_url)
formats.extend(self._extract_m3u8_formats(

View File

@ -19,6 +19,7 @@ from ..utils import (
try_get,
unsmuggle_url,
update_url_query,
url_or_none,
)
@ -31,12 +32,12 @@ class TVPlayIE(InfoExtractor):
https?://
(?:www\.)?
(?:
tvplay(?:\.skaties)?\.lv/parraides|
(?:tv3play|play\.tv3)\.lt/programos|
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
tv3play(?:\.tv3)?\.ee/sisu|
(?:tv(?:3|6|8|10)play|viafree)\.se/program|
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
play\.novatv\.bg/programi
play\.nova(?:tv)?\.bg/programi
)
/(?:[^/]+/)+
)
@ -202,10 +203,18 @@ class TVPlayIE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
'only_matching': True,
},
{
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
'only_matching': True,
},
{
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
'only_matching': True,
},
{
# views is null
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
@ -255,7 +264,8 @@ class TVPlayIE(InfoExtractor):
quality = qualities(['hls', 'medium', 'high'])
formats = []
for format_id, video_url in streams.get('streams', {}).items():
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(video_url)
if not video_url:
continue
ext = determine_ext(video_url)
if ext == 'f4m':
@ -286,6 +296,7 @@ class TVPlayIE(InfoExtractor):
'url': m.group('url'),
'app': m.group('app'),
'play_path': m.group('playpath'),
'preference': -1,
})
else:
fmt.update({
@ -445,3 +456,102 @@ class ViafreeIE(InfoExtractor):
'skip_rtmp': True,
}),
ie=TVPlayIE.ie_key(), video_id=video_id)
class TVPlayHomeIE(InfoExtractor):
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
'info_dict': {
'id': '366367',
'ext': 'mp4',
'title': 'Aferistai',
'description': 'Aferistai. Kalėdinė pasaka.',
'series': 'Aferistai [N-7]',
'season': '1 sezonas',
'season_number': 1,
'duration': 464,
'timestamp': 1394209658,
'upload_date': '20140307',
'age_limit': 18,
},
'params': {
'skip_download': True,
},
'add_ie': [TVPlayIE.ie_key()],
}, {
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
'only_matching': True,
}, {
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
default=None)
if video_id:
return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
m3u8_url = self._search_regex(
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'm3u8 url', group='url')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
title = self._search_regex(
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'title', default=None, group='value') or self._html_search_meta(
'title', webpage, default=None) or self._og_search_title(
webpage)
description = self._html_search_meta(
'description', webpage,
default=None) or self._og_search_description(webpage)
thumbnail = self._search_regex(
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'thumbnail', default=None, group='url') or self._html_search_meta(
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
webpage)
duration = int_or_none(self._search_regex(
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
fatal=False))
season = self._search_regex(
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'season', default=None, group='value')
season_number = int_or_none(self._search_regex(
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None))
episode = self._search_regex(
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
default=None, group='value')
episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'season': season,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'formats': formats,
}

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
import itertools
import re
import random
import json
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_kwargs,
compat_parse_qs,
compat_str,
@ -26,7 +26,7 @@ from ..utils import (
try_get,
unified_timestamp,
update_url_query,
urlencode_postdata,
url_or_none,
urljoin,
)
@ -36,8 +36,9 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net'
_LOGIN_URL = 'https://www.twitch.tv/login'
_CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
_LOGIN_FORM_URL = 'https://www.twitch.tv/login'
_LOGIN_POST_URL = 'https://passport.twitch.tv/login'
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
_NETRC_MACHINE = 'twitch'
def _handle_error(self, response):
@ -76,22 +77,21 @@ class TwitchBaseIE(InfoExtractor):
page_url = urlh.geturl()
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
'post url', default=page_url, group='url')
'post url', default=self._LOGIN_POST_URL, group='url')
post_url = urljoin(page_url, post_url)
headers = {'Referer': page_url}
headers = {
'Referer': page_url,
'Origin': page_url,
'Content-Type': 'text/plain;charset=UTF-8',
}
try:
response = self._download_json(
post_url, None, note,
data=urlencode_postdata(form),
headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
response = self._parse_json(
e.cause.read().decode('utf-8'), None)
fail(response.get('message') or response['errors'][0])
raise
response = self._download_json(
post_url, None, note, data=json.dumps(form).encode(),
headers=headers, expected_status=400)
error = response.get('error_description') or response.get('error_code')
if error:
fail(error)
if 'Authenticated successfully' in response.get('message', ''):
return None, None
@ -104,7 +104,7 @@ class TwitchBaseIE(InfoExtractor):
headers=headers)
login_page, handle = self._download_webpage_handle(
self._LOGIN_URL, None, 'Downloading login page')
self._LOGIN_FORM_URL, None, 'Downloading login page')
# Some TOR nodes and public proxies are blocked completely
if 'blacklist_message' in login_page:
@ -114,6 +114,7 @@ class TwitchBaseIE(InfoExtractor):
login_page, handle, 'Logging in', {
'username': username,
'password': password,
'client_id': self._CLIENT_ID,
})
# Successful login
@ -239,7 +240,7 @@ class TwitchVodIE(TwitchItemBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
player\.twitch\.tv/\?.*?\bvideo=v
)
(?P<id>\d+)
@ -295,6 +296,9 @@ class TwitchVodIE(TwitchItemBaseIE):
}, {
'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
'only_matching': True,
}, {
'url': 'https://www.twitch.tv/northernlion/video/291940395',
'only_matching': True,
}]
def _real_extract(self, url):
@ -663,8 +667,8 @@ class TwitchClipsIE(TwitchBaseIE):
for option in status['quality_options']:
if not isinstance(option, dict):
continue
source = option.get('source')
if not source or not isinstance(source, compat_str):
source = url_or_none(option.get('source'))
if not source:
continue
formats.append({
'url': source,

View File

@ -20,6 +20,7 @@ from ..utils import (
sanitized_Request,
try_get,
unescapeHTML,
url_or_none,
urlencode_postdata,
)
@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
if not isinstance(source_list, list):
return
for source in source_list:
video_url = source.get('file') or source.get('src')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(source.get('file') or source.get('src'))
if not video_url:
continue
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
continue
if track.get('kind') != 'captions':
continue
src = track.get('src')
if not src or not isinstance(src, compat_str):
src = url_or_none(track.get('src'))
if not src:
continue
lang = track.get('language') or track.get(
'srclang') or track.get('label')
@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
for cc in captions:
if not isinstance(cc, dict):
continue
cc_url = cc.get('url')
if not cc_url or not isinstance(cc_url, compat_str):
cc_url = url_or_none(cc.get('url'))
if not cc_url:
continue
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
sub_dict = (automatic_captions if cc.get('source') == 'auto'

View File

@ -3,15 +3,13 @@ from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
parse_iso8601,
url_or_none,
)
@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
formats = []
for f in video.get('formats', []):
format_url = f.get('uri')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(f.get('uri'))
if not format_url:
continue
format_type = f.get('type')
if format_type == 'dash':

View File

@ -13,7 +13,7 @@ from ..utils import (
class VidziIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://vidzi.tv/cghql9yq6emu.html',
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
@ -35,6 +35,9 @@ class VidziIE(InfoExtractor):
}, {
'url': 'https://vidzi.si/rph9gztxj1et.html',
'only_matching': True,
}, {
'url': 'http://vidzi.nu/cghql9yq6emu.html',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
# We try to find out to which variable is assigned the config dic
m_variable_name = re.search(r'(\w)\.video\.id', webpage)
if m_variable_name is not None:
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
config = self._search_regex(config_re, webpage, 'info section',
flags=re.DOTALL)
config = json.loads(config)

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,
url_or_none,
)
class ViqeoIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
viqeo:|
https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
)
(?P<id>[\da-f]+)
'''
_TESTS = [{
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
'md5': 'a169dd1a6426b350dca4296226f21e76',
'info_dict': {
'id': 'cde96f09d25f39bee837',
'ext': 'mp4',
'title': 'cde96f09d25f39bee837',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 76,
},
}, {
'url': 'viqeo:cde96f09d25f39bee837',
'only_matching': True,
}, {
'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
data = self._parse_json(
self._search_regex(
r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
video_id)
formats = []
thumbnails = []
for media_file in data['mediaFiles']:
if not isinstance(media_file, dict):
continue
media_url = url_or_none(media_file.get('url'))
if not media_url or not media_url.startswith(('http', '//')):
continue
media_type = str_or_none(media_file.get('type'))
if not media_type:
continue
media_kind = media_type.split('/')[0].lower()
f = {
'url': media_url,
'width': int_or_none(media_file.get('width')),
'height': int_or_none(media_file.get('height')),
}
format_id = str_or_none(media_file.get('quality'))
if media_kind == 'image':
f['id'] = format_id
thumbnails.append(f)
elif media_kind in ('video', 'audio'):
is_audio = media_kind == 'audio'
f.update({
'format_id': 'audio' if is_audio else format_id,
'fps': int_or_none(media_file.get('fps')),
'vcodec': 'none' if is_audio else None,
})
formats.append(f)
self._sort_formats(formats)
duration = int_or_none(data.get('duration'))
return {
'id': video_id,
'title': video_id,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
'skip': 'Geo-restricted to Hong Kong',
}]
_AREA_ID = {
'HK': 1,
'SG': 2,
'TH': 4,
'PH': 5,
}
def _real_extract(self, url):
country_code, video_id = re.match(self._VALID_URL, url).groups()
query = {
'r': 'vod/ajax-detail',
'platform_flag_label': 'web',
'product_id': video_id,
}
area_id = self._AREA_ID.get(country_code.upper())
if area_id:
query['area_id'] = area_id
product_data = self._download_json(
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
'Downloading video info', query={
'r': 'vod/ajax-detail',
'platform_flag_label': 'web',
'product_id': video_id,
})['data']
'Downloading video info', query=query)['data']
video_data = product_data.get('current_product')
if not video_data:
@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
video_id, 'Downloading stream info', query={
'ccs_product_id': video_data['ccs_product_id'],
}, headers={
'Referer': url,
'Origin': re.search(r'https?://[^/]+', url).group(0),
})['data']['stream']
stream_sizes = stream_data.get('size', {})

View File

@ -17,9 +17,11 @@ from ..utils import (
int_or_none,
orderedSet,
remove_start,
str_or_none,
str_to_int,
unescapeHTML,
unified_timestamp,
url_or_none,
urlencode_postdata,
)
from .dailymotion import DailymotionIE
@ -105,10 +107,10 @@ class VKIE(VKBaseIE):
'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'uploader_id': '-77521',
'duration': 195,
'timestamp': 1329060660,
'timestamp': 1329049880,
'upload_date': '20120212',
'view_count': int,
},
},
{
@ -117,12 +119,12 @@ class VKIE(VKBaseIE):
'info_dict': {
'id': '165548505',
'ext': 'mp4',
'uploader': 'Tom Cruise',
'title': 'No name',
'uploader': 'Tom Cruise',
'uploader_id': '205387401',
'duration': 9,
'timestamp': 1374374880,
'upload_date': '20130721',
'view_count': int,
'timestamp': 1374364108,
'upload_date': '20130720',
}
},
{
@ -206,10 +208,10 @@ class VKIE(VKBaseIE):
'id': 'V3K4mi0SYkc',
'ext': 'webm',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:d9903938abdc74c738af77f527ca0596',
'duration': 178,
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 179,
'upload_date': '20130116',
'uploader': "Children's Joy Foundation",
'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf',
'view_count': int,
},
@ -221,6 +223,7 @@ class VKIE(VKBaseIE):
'id': 'k3lz2cmXyRuJQSjGHUv',
'ext': 'mp4',
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
# TODO: fix test by fixing dailymotion description extraction
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
'uploader': 'AniLibria.Tv',
'upload_date': '20160914',
@ -240,9 +243,12 @@ class VKIE(VKBaseIE):
'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля',
'timestamp': 1454870100,
'uploader_id': '-110305615',
'timestamp': 1454859345,
'upload_date': '20160207',
'view_count': int,
},
'params': {
'skip_download': True,
},
},
{
@ -295,7 +301,7 @@ class VKIE(VKBaseIE):
video_id = mobj.group('videoid')
if video_id:
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
# Some videos (removed?) can only be downloaded with list id specified
list_id = mobj.group('list_id')
if list_id:
@ -345,6 +351,9 @@ class VKIE(VKBaseIE):
r'<!>This video is no longer available, because its author has been blocked.':
'Video %s is no longer available, because its author has been blocked.',
r'<!>This video is no longer available, because it has been deleted.':
'Video %s is no longer available, because it has been deleted.',
}
for error_re, error_msg in ERRORS.items():
@ -393,7 +402,8 @@ class VKIE(VKBaseIE):
if not data:
data = self._parse_json(
self._search_regex(
r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
[r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
info_page, 'json', default='{}'),
video_id)
if data:
data = data['player']['params'][0]
@ -415,7 +425,7 @@ class VKIE(VKBaseIE):
timestamp = unified_timestamp(self._html_search_regex(
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
'upload date', fatal=False))
'upload date', default=None)) or int_or_none(data.get('date'))
view_count = str_to_int(self._search_regex(
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
@ -423,7 +433,8 @@ class VKIE(VKBaseIE):
formats = []
for format_id, format_url in data.items():
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
continue
if (format_id.startswith(('url', 'cache')) or
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
@ -452,9 +463,12 @@ class VKIE(VKBaseIE):
'title': title,
'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'),
'uploader_id': str_or_none(data.get('author_id')),
'duration': data.get('duration'),
'timestamp': timestamp,
'view_count': view_count,
'like_count': int_or_none(data.get('liked')),
'dislike_count': int_or_none(data.get('nolikes')),
'is_live': is_live,
}

View File

@ -10,6 +10,7 @@ from ..utils import (
js_to_json,
strip_or_none,
try_get,
unescapeHTML,
unified_timestamp,
)
@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
source = (self._parse_json(
player_config = self._parse_json(
self._search_regex(
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
default='{}'),
video_id, transform_source=js_to_json,
fatal=False) or {}).get('source') or {}
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
'player config', default='{}', group='data'),
video_id, transform_source=unescapeHTML, fatal=False)
if not player_config:
player_config = self._parse_json(
self._search_regex(
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
default='{}'),
video_id, transform_source=js_to_json, fatal=False) or {}
source = player_config.get('source') or {}
video_id = compat_str(source.get('videoId') or video_id)

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import (
int_or_none,
orderedSet,
)
class WebOfStoriesIE(InfoExtractor):
@ -133,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
self.url_result(
'http://www.webofstories.com/play/%s' % video_id,
'WebOfStories', video_id=video_id)
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
]
title = self._search_regex(

View File

@ -23,7 +23,7 @@ class XFileShareIE(InfoExtractor):
(r'powerwatch\.pw', 'PowerWatch'),
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
(r'thevideobee\.to', 'TheVideoBee'),
(r'vidto\.me', 'Vidto'),
(r'vidto\.(?:me|se)', 'Vidto'),
(r'streamin\.to', 'Streamin.To'),
(r'xvidstage\.com', 'XVIDSTAGE'),
(r'vidabc\.com', 'Vid ABC'),
@ -115,7 +115,10 @@ class XFileShareIE(InfoExtractor):
'only_matching': True,
}, {
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://vidto.se/1tx1pf6t12cg.html',
'only_matching': True,
}]
@staticmethod

View File

@ -13,6 +13,7 @@ from ..utils import (
parse_duration,
try_get,
unified_strdate,
url_or_none,
)
@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
else:
format_url = format_item
filesize = None
if not isinstance(format_url, compat_str):
format_url = url_or_none(format_url)
if not format_url:
continue
formats.append({
'format_id': '%s-%s' % (format_id, quality),
@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
default='{}'),
video_id, fatal=False)
for format_id, format_url in sources.items():
if not isinstance(format_url, compat_str):
format_url = url_or_none(format_url)
if not format_url:
continue
if format_url in format_urls:
continue

View File

@ -4,12 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
qualities,
unescapeHTML,
url_or_none,
)
@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
formats = []
for format_id in QUALITIES:
is_hd = format_id == 'hd'
format_url = playlist.get(
'file%s' % ('_hd' if is_hd else ''))
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(playlist.get(
'file%s' % ('_hd' if is_hd else '')))
if not format_url:
continue
formats.append({
'url': format_url,

View File

@ -3,11 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
url_or_none,
)
@ -50,8 +50,8 @@ class YouJizzIE(InfoExtractor):
for encoding in encodings:
if not isinstance(encoding, dict):
continue
format_url = encoding.get('filename')
if not isinstance(format_url, compat_str):
format_url = url_or_none(encoding.get('filename'))
if not format_url:
continue
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(

View File

@ -3,13 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
unescapeHTML,
unified_strdate,
url_or_none,
)
from ..aes import aes_decrypt_text
@ -88,8 +88,8 @@ class YouPornIE(InfoExtractor):
for definition in definitions:
if not isinstance(definition, dict):
continue
video_url = definition.get('videoUrl')
if isinstance(video_url, compat_str) and video_url:
video_url = url_or_none(definition.get('videoUrl'))
if video_url:
links.append(video_url)
# Fallback #1, this also contains extra low quality 180p format

View File

@ -0,0 +1,41 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import urljoin
class YourPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
_TEST = {
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
'info_dict': {
'id': '57ffcb2e1179b',
'ext': 'mp4',
'title': 'md5:c9f43630bd968267672651ba905a7d35',
'thumbnail': r're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = urljoin(url, self._parse_json(
self._search_regex(
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
group='data'),
video_id)[video_id])
title = (self._search_regex(
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
default=None) or self._og_search_description(webpage)).strip()
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
}

View File

@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
def _set_language(self):
self._set_cookie(
@ -2123,7 +2123,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
)
(
(?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
@ -2261,6 +2261,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
}, {
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
'only_matching': True,
}, {
# music album playlist
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
'only_matching': True,
}]
def _real_initialize(self):

View File

@ -13,6 +13,7 @@ from ..utils import (
ExtractorError,
int_or_none,
try_get,
url_or_none,
urlencode_postdata,
)
@ -150,8 +151,8 @@ class ZattooBaseIE(InfoExtractor):
for watch in watch_urls:
if not isinstance(watch, dict):
continue
watch_url = watch.get('url')
if not watch_url or not isinstance(watch_url, compat_str):
watch_url = url_or_none(watch.get('url'))
if not watch_url:
continue
format_id_list = [stream_type]
maxrate = watch.get('maxrate')

View File

@ -15,6 +15,7 @@ from ..utils import (
try_get,
unified_timestamp,
update_url_query,
url_or_none,
urljoin,
)
@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE):
def _extract_subtitles(src):
subtitles = {}
for caption in try_get(src, lambda x: x['captions'], list) or []:
subtitle_url = caption.get('uri')
if subtitle_url and isinstance(subtitle_url, compat_str):
subtitle_url = url_or_none(caption.get('uri'))
if subtitle_url:
lang = caption.get('language', 'deu')
subtitles.setdefault(lang, []).append({
'url': subtitle_url,
@ -76,8 +77,8 @@ class ZDFIE(ZDFBaseIE):
return subtitles
def _extract_format(self, video_id, formats, format_urls, meta):
format_url = meta.get('url')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(meta.get('url'))
if not format_url:
return
if format_url in format_urls:
return
@ -152,7 +153,8 @@ class ZDFIE(ZDFBaseIE):
content, lambda x: x['teaserImageRef']['layouts'], dict)
if layouts:
for layout_key, layout_url in layouts.items():
if not isinstance(layout_url, compat_str):
layout_url = url_or_none(layout_url)
if not layout_url:
continue
thumbnail = {
'url': layout_url,

View File

@ -49,7 +49,6 @@ from .compat import (
compat_os_name,
compat_parse_qs,
compat_shlex_quote,
compat_socket_create_connection,
compat_str,
compat_struct_pack,
compat_struct_unpack,
@ -82,7 +81,7 @@ def register_socks_protocols():
compiled_regex_type = type(re.compile(''))
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
@ -882,13 +881,51 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
kwargs['strict'] = True
hc = http_class(*args, **compat_kwargs(kwargs))
source_address = ydl_handler._params.get('source_address')
if source_address is not None:
# This is to workaround _create_connection() from socket where it will try all
# address data from getaddrinfo() including IPv6. This filters the result from
# getaddrinfo() based on the source_address value.
# This is based on the cpython socket.create_connection() function.
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
host, port = address
err = None
addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
ip_addrs = [addr for addr in addrs if addr[0] == af]
if addrs and not ip_addrs:
ip_version = 'v4' if af == socket.AF_INET else 'v6'
raise socket.error(
"No remote IP%s addresses available for connect, can't use '%s' as source address"
% (ip_version, source_address[0]))
for res in ip_addrs:
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
sock.bind(source_address)
sock.connect(sa)
err = None # Explicitly break reference cycle
return sock
except socket.error as _:
err = _
if sock is not None:
sock.close()
if err is not None:
raise err
else:
raise socket.error('getaddrinfo returns an empty list')
if hasattr(hc, '_create_connection'):
hc._create_connection = _create_connection
sa = (source_address, 0)
if hasattr(hc, 'source_address'): # Python 2.7+
hc.source_address = sa
else: # Python 2.6
def _hc_connect(self, *args, **kwargs):
sock = compat_socket_create_connection(
sock = _create_connection(
(self.host, self.port), self.timeout, sa)
if is_https:
self.sock = ssl.wrap_socket(
@ -1866,6 +1903,13 @@ def strip_or_none(v):
return None if v is None else v.strip()
def url_or_none(url):
if not url or not isinstance(url, compat_str):
return None
url = url.strip()
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
def parse_duration(s):
if not isinstance(s, compat_basestring):
return None
@ -2282,7 +2326,7 @@ def parse_age_limit(s):
def strip_jsonp(code):
return re.sub(
r'''(?sx)^
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
(?:\s*&&\s*(?P=func_name))?
\s*\(\s*(?P<callback_data>.*)\);?
\s*?(?://[^\n]*)*$''',
@ -3562,7 +3606,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type))
return compat_urllib_request.ProxyHandler.__init__(self, proxies)
compat_urllib_request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy')

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.07.10'
__version__ = '2018.08.28'