1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-27 10:54:34 +01:00

Merge pull request #11 from ytdl-org/master

update
This commit is contained in:
tsia 2019-12-20 15:41:59 +01:00 committed by GitHub
commit d05db22b74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 459 additions and 349 deletions

View File

@ -1,3 +1,55 @@
version <unreleased>
Core
* [utils] Improve str_to_int
+ [downloader/hls] Add ability to override AES decryption key URL (#17521)
Extractors
+ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
+ [slideslive] Add support for url and vimeo service names (#23414)
* [slideslive] Fix extraction (#23413)
* [twitch:clips] Fix extraction (#23375)
+ [soundcloud] Add support for token protected embeds (#18954)
* [vk] Improve extraction
* Fix User Videos extraction (#23356)
* Extract all videos for lists with more than 1000 videos (#23356)
+ Add support for video albums (#14327, #14492)
- [kontrtube] Remove extractor
- [videopremium] Remove extractor
- [musicplayon] Remove extractor (#9225)
+ [ufctv] Add support for ufcfightpass.imgdge.com and
ufcfightpass.imggaming.com (#23343)
+ [twitch] Extract m3u8 formats frame rate (#23333)
+ [imggaming] Add support for playlists and extract subtitles
+ [ufcarabia] Add support for UFC Arabia (#23312)
* [ufctv] Fix extraction
* [yahoo] Fix gyao brightcove player id (#23303)
* [vzaar] Override AES decryption key URL (#17521)
+ [vzaar] Add support for AES HLS manifests (#17521, #23299)
* [nrl] Fix extraction
* [teachingchannel] Fix extraction
* [nintendo] Fix extraction and partially add support for Nintendo Direct
videos (#4592)
+ [ooyala] Add better fallback values for domain and streams variables
+ [youtube] Add support youtubekids.com (#23272)
* [tv2] Detect DRM protection
+ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
* [tv2] Fix tv2.no article extraction
* [msn] Improve extraction
+ Add support for YouTube and NBCSports embeds
+ Add support for articles with multiple videos
* Improve AOL embed support
* Improve format extraction
* [abcotvs] Relax URL regular expression and improve metadata extraction
(#18014)
* [channel9] Reduce response size
* [adobetv] Improve extaction
* Use OnDemandPagedList for list extractors
* Reduce show extraction requests
* Extract original video format and subtitles
+ Add support for adobe tv embeds
version 2019.11.28 version 2019.11.28
Core Core
@ -583,7 +635,7 @@ Extractors
version 2019.04.17 version 2019.04.17
Extractors Extractors
* [openload] Randomize User-Agent (closes #20688) * [openload] Randomize User-Agent (#20688)
+ [openload] Add support for oladblock domains (#20471) + [openload] Add support for oladblock domains (#20471)
* [adn] Fix subtitle extraction (#12724) * [adn] Fix subtitle extraction (#12724)
+ [aol] Add support for localized websites + [aol] Add support for localized websites
@ -1148,7 +1200,7 @@ Extractors
+ [youtube] Extract channel meta fields (#9676, #12939) + [youtube] Extract channel meta fields (#9676, #12939)
* [porntube] Fix extraction (#17541) * [porntube] Fix extraction (#17541)
* [asiancrush] Fix extraction (#15630) * [asiancrush] Fix extraction (#15630)
+ [twitch:clips] Extend URL regular expression (closes #17559) + [twitch:clips] Extend URL regular expression (#17559)
+ [vzaar] Add support for HLS + [vzaar] Add support for HLS
* [tube8] Fix metadata extraction (#17520) * [tube8] Fix metadata extraction (#17520)
* [eporner] Extract JSON-LD (#17519) * [eporner] Extract JSON-LD (#17519)

View File

@ -500,6 +500,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int('123.456'), 123456)
self.assertEqual(str_to_int(523), 523) self.assertEqual(str_to_int(523), 523)
# Python 3 has no long
if sys.version_info < (3, 0):
eval('self.assertEqual(str_to_int(123456L), 123456)')
self.assertEqual(str_to_int('noninteger'), None)
self.assertEqual(str_to_int([]), None)
def test_url_basename(self): def test_url_basename(self):
self.assertEqual(url_basename('http://foo.de/'), '') self.assertEqual(url_basename('http://foo.de/'), '')

View File

@ -64,7 +64,7 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict): if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url'): if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
self.report_error('pycrypto not found. Please install it.') self.report_error('pycrypto not found. Please install it.')
return False return False
self.report_warning( self.report_warning(
@ -169,7 +169,7 @@ class HlsFD(FragmentFD):
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, decrypt_info['URI'])).read() self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
frag_content = AES.new( frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)

View File

@ -515,7 +515,6 @@ from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE from .kusi import KUSIIE
@ -661,7 +660,6 @@ from .mtv import (
MTVJapanIE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .mwave import MwaveIE, MwaveMeetGreetIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .mychannels import MyChannelsIE from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE
@ -1174,7 +1172,10 @@ from .tv2 import (
TV2ArticleIE, TV2ArticleIE,
KatsomoIE, KatsomoIE,
) )
from .tv2dk import TV2DKIE from .tv2dk import (
TV2DKIE,
TV2DKBornholmPlayIE,
)
from .tv2hu import TV2HuIE from .tv2hu import TV2HuIE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE
@ -1238,7 +1239,10 @@ from .udemy import (
UdemyCourseIE UdemyCourseIE
) )
from .udn import UDNEmbedIE from .udn import UDNEmbedIE
from .ufctv import UFCTVIE from .ufctv import (
UFCTVIE,
UFCArabiaIE,
)
from .uktvplay import UKTVPlayIE from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .dlive import ( from .dlive import (
@ -1292,7 +1296,6 @@ from .videomore import (
VideomoreVideoIE, VideomoreVideoIE,
VideomoreSeasonIE, VideomoreSeasonIE,
) )
from .videopremium import VideoPremiumIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .vidio import VidioIE from .vidio import VidioIE
from .vidlii import VidLiiIE from .vidlii import VidLiiIE

View File

@ -0,0 +1,133 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
try_get,
)
class ImgGamingBaseIE(InfoExtractor):
_API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
_API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
_HEADERS = None
_MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
_REALM = None
_VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
def _real_initialize(self):
self._HEADERS = {
'Realm': 'dce.' + self._REALM,
'x-api-key': self._API_KEY,
}
email, password = self._get_login_info()
if email is None:
self.raise_login_required()
p_headers = self._HEADERS.copy()
p_headers['Content-Type'] = 'application/json'
self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
self._API_BASE + 'login',
None, 'Logging in', data=json.dumps({
'id': email,
'secret': password,
}).encode(), headers=p_headers)['authorisationToken']
def _call_api(self, path, media_id):
return self._download_json(
self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
def _extract_dve_api_url(self, media_id, media_type):
stream_path = 'stream'
if media_type == 'video':
stream_path += '/vod/'
else:
stream_path += '?eventId='
try:
return self._call_api(
stream_path, media_id)['playerUrlCallback']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
raise ExtractorError(
self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
expected=True)
raise
def _real_extract(self, url):
domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
if playlist_id:
if self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
media_type, media_id = 'playlist', playlist_id
if media_type == 'playlist':
playlist = self._call_api('vod/playlist/', media_id)
entries = []
for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
video_id = str_or_none(video.get('id'))
if not video_id:
continue
entries.append(self.url_result(
'https://%s/video/%s' % (domain, video_id),
self.ie_key(), video_id))
return self.playlist_result(
entries, media_id, playlist.get('title'),
playlist.get('description'))
dve_api_url = self._extract_dve_api_url(media_id, media_type)
video_data = self._download_json(dve_api_url, media_id)
is_live = media_type == 'live'
if is_live:
title = self._live_title(self._call_api('event/', media_id)['title'])
else:
title = video_data['name']
formats = []
for proto in ('hls', 'dash'):
media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
if not media_url:
continue
if proto == 'hls':
m3u8_formats = self._extract_m3u8_formats(
media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
for f in m3u8_formats:
f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
formats.append(f)
else:
formats.extend(self._extract_mpd_formats(
media_url, media_id, mpd_id='dash', fatal=False,
headers=self._MANIFEST_HEADERS))
self._sort_formats(formats)
subtitles = {}
for subtitle in video_data.get('subtitles', []):
subtitle_url = subtitle.get('url')
if not subtitle_url:
continue
subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
'url': subtitle_url,
})
return {
'id': media_id,
'title': title,
'formats': formats,
'thumbnail': video_data.get('thumbnailUrl'),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'tags': video_data.get('tags'),
'is_live': is_live,
'subtitles': subtitles,
}

View File

@ -1,73 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
)
class KontrTubeIE(InfoExtractor):
IE_NAME = 'kontrtube'
IE_DESC = 'KontrTube.ru - Труба зовёт'
_VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
_TEST = {
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
'md5': '975a991a4926c9a85f383a736a2e6b80',
'info_dict': {
'id': '2678',
'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
'ext': 'mp4',
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
'duration': 270,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(
url, display_id, 'Downloading page')
video_url = self._search_regex(
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
thumbnail = self._search_regex(
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
title = self._html_search_regex(
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description')
duration = self._search_regex(
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
if duration:
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
view_count = self._search_regex(
r'Просмотров: <em>([^<]+)</em>',
webpage, 'view count', fatal=False)
if view_count:
view_count = int_or_none(view_count.replace(' ', ''))
comment_count = int_or_none(self._search_regex(
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'thumbnail': thumbnail,
'title': title,
'description': description,
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
}

View File

@ -1,66 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
)
class MusicPlayOnIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
_TESTS = [{
'url': 'http://en.musicplayon.com/play?v=433377',
'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
'info_dict': {
'id': '433377',
'ext': 'mp4',
'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
'description': 'Rick Ross Interview On Chelsea Lately',
'duration': 342,
'uploader': 'ultrafish',
},
}, {
'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
'only_matching': True,
}]
_URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
url = self._URL_TEMPLATE % video_id
page = self._download_webpage(url, video_id)
title = self._og_search_title(page)
description = self._og_search_description(page)
thumbnail = self._og_search_thumbnail(page)
duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
view_count = self._og_search_property('count', page, fatal=False)
uploader = self._html_search_regex(
r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
sources = self._parse_json(
self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
video_id, transform_source=js_to_json)
formats = [{
'url': compat_urlparse.urljoin(url, source['src']),
'ext': mimetype2ext(source.get('type')),
'format_note': source.get('data-res'),
} for source in sources]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': int_or_none(duration),
'view_count': int_or_none(view_count),
'formats': formats,
}

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import smuggle_url
class SlidesLiveIE(InfoExtractor): class SlidesLiveIE(InfoExtractor):
@ -14,9 +14,9 @@ class SlidesLiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'LMtgR8ba0b0', 'id': 'LMtgR8ba0b0',
'ext': 'mp4', 'ext': 'mp4',
'title': '38902413: external video', 'title': 'GCC IA16 backend',
'description': '3890241320170925-9-1yd6ech.mp4', 'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Administrator', 'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'upload_date': '20170925', 'upload_date': '20170925',
} }
@ -24,16 +24,38 @@ class SlidesLiveIE(InfoExtractor):
# video_service_name = youtube # video_service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
'only_matching': True, 'only_matching': True,
}, {
# video_service_name = url
'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
'only_matching': True,
}, {
# video_service_name = vimeo
'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
url, video_id, headers={'Accept': 'application/json'}) 'https://ben.slideslive.com/player/' + video_id, video_id)
service_name = video_data['video_service_name'].lower() service_name = video_data['video_service_name'].lower()
if service_name == 'youtube': assert service_name in ('url', 'vimeo', 'youtube')
yt_video_id = video_data['video_service_id'] service_id = video_data['video_service_id']
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) info = {
'id': video_id,
'thumbnail': video_data.get('thumbnail'),
'url': service_id,
}
if service_name == 'url':
info['title'] = video_data['title']
else: else:
raise ExtractorError( info.update({
'Unsupported service name: {0}'.format(service_name), expected=True) '_type': 'url_transparent',
'ie_key': service_name.capitalize(),
'title': video_data.get('title'),
})
if service_name == 'vimeo':
info['url'] = smuggle_url(
'https://player.vimeo.com/video/' + service_id,
{'http_headers': {'Referer': url}})
return info

View File

@ -28,7 +28,12 @@ from ..utils import (
class SoundcloudEmbedIE(InfoExtractor): class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?url=(?P<id>.*)' _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
_TEST = {
# from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
'only_matching': True,
}
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -37,8 +42,13 @@ class SoundcloudEmbedIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result(compat_urlparse.parse_qs( query = compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)['url'][0]) compat_urlparse.urlparse(url).query)
api_url = query['url'][0]
secret_token = query.get('secret_token')
if secret_token:
api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
return self.url_result(api_url)
class SoundcloudIE(InfoExtractor): class SoundcloudIE(InfoExtractor):

View File

@ -1,10 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import extract_attributes from ..utils import (
determine_ext,
extract_attributes,
js_to_json,
url_or_none,
)
class TV2DKIE(InfoExtractor): class TV2DKIE(InfoExtractor):
@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor):
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
video_id=kaltura_id)) video_id=kaltura_id))
return self.playlist_result(entries) return self.playlist_result(entries)
class TV2DKBornholmPlayIE(InfoExtractor):
_VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
_TEST = {
'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
'info_dict': {
'id': '781021',
'ext': 'mp4',
'title': '12Nyheder-27.11.19',
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
data=json.dumps({
'playlist_id': video_id,
'serienavn': '',
}).encode(), headers={
'X-Requested-With': 'XMLHttpRequest',
'Content-Type': 'application/json; charset=UTF-8',
})['d']
# TODO: generalize flowplayer
title = self._search_regex(
r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
group='value')
sources = self._parse_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
video_id, js_to_json)
formats = []
srcs = set()
for source in sources:
src = url_or_none(source.get('src'))
if not src:
continue
if src in srcs:
continue
srcs.add(src)
ext = determine_ext(src)
src_type = source.get('type')
if src_type == 'application/x-mpegurl' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif src_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
}

View File

@ -17,12 +17,10 @@ from ..compat import (
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
qualities,
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -327,6 +325,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'allow_audio_only': 'true', 'allow_audio_only': 'true',
'allow_spectre': 'true', 'allow_spectre': 'true',
'player': 'twitchweb', 'player': 'twitchweb',
'playlist_include_framerate': 'true',
'nauth': access_token['token'], 'nauth': access_token['token'],
'nauthsig': access_token['sig'], 'nauthsig': access_token['sig'],
})), })),
@ -598,6 +597,7 @@ class TwitchStreamIE(TwitchBaseIE):
'allow_spectre': 'true', 'allow_spectre': 'true',
'p': random.randint(1000000, 10000000), 'p': random.randint(1000000, 10000000),
'player': 'twitchweb', 'player': 'twitchweb',
'playlist_include_framerate': 'true',
'segment_preference': '4', 'segment_preference': '4',
'sig': access_token['sig'].encode('utf-8'), 'sig': access_token['sig'].encode('utf-8'),
'token': access_token['token'].encode('utf-8'), 'token': access_token['token'].encode('utf-8'),
@ -674,63 +674,81 @@ class TwitchClipsIE(TwitchBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
status = self._download_json( clip = self._download_json(
'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id, 'https://gql.twitch.tv/gql', video_id, data=json.dumps({
video_id) 'query': '''{
clip(slug: "%s") {
broadcaster {
displayName
}
createdAt
curator {
displayName
id
}
durationSeconds
id
tiny: thumbnailURL(width: 86, height: 45)
small: thumbnailURL(width: 260, height: 147)
medium: thumbnailURL(width: 480, height: 272)
title
videoQualities {
frameRate
quality
sourceURL
}
viewCount
}
}''' % video_id,
}).encode(), headers={
'Client-ID': self._CLIENT_ID,
})['data']['clip']
if not clip:
raise ExtractorError(
'This clip is no longer available', expected=True)
formats = [] formats = []
for option in clip.get('videoQualities', []):
for option in status['quality_options']:
if not isinstance(option, dict): if not isinstance(option, dict):
continue continue
source = url_or_none(option.get('source')) source = url_or_none(option.get('sourceURL'))
if not source: if not source:
continue continue
formats.append({ formats.append({
'url': source, 'url': source,
'format_id': option.get('quality'), 'format_id': option.get('quality'),
'height': int_or_none(option.get('quality')), 'height': int_or_none(option.get('quality')),
'fps': int_or_none(option.get('frame_rate')), 'fps': int_or_none(option.get('frameRate')),
}) })
self._sort_formats(formats) self._sort_formats(formats)
info = { thumbnails = []
for thumbnail_id in ('tiny', 'small', 'medium'):
thumbnail_url = clip.get(thumbnail_id)
if not thumbnail_url:
continue
thumb = {
'id': thumbnail_id,
'url': thumbnail_url,
}
mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
if mobj:
thumb.update({
'height': int(mobj.group(2)),
'width': int(mobj.group(1)),
})
thumbnails.append(thumb)
return {
'id': clip.get('id') or video_id,
'title': clip.get('title') or video_id,
'formats': formats, 'formats': formats,
'duration': int_or_none(clip.get('durationSeconds')),
'views': int_or_none(clip.get('viewCount')),
'timestamp': unified_timestamp(clip.get('createdAt')),
'thumbnails': thumbnails,
'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
} }
clip = self._call_api(
'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
'Accept': 'application/vnd.twitchtv.v5+json',
})
if clip:
quality_key = qualities(('tiny', 'small', 'medium'))
thumbnails = []
thumbnails_dict = clip.get('thumbnails')
if isinstance(thumbnails_dict, dict):
for thumbnail_id, thumbnail_url in thumbnails_dict.items():
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'preference': quality_key(thumbnail_id),
})
info.update({
'id': clip.get('tracking_id') or video_id,
'title': clip.get('title') or video_id,
'duration': float_or_none(clip.get('duration')),
'views': int_or_none(clip.get('views')),
'timestamp': unified_timestamp(clip.get('created_at')),
'thumbnails': thumbnails,
'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
})
else:
info.update({
'title': video_id,
'id': video_id,
})
return info

View File

@ -1,73 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .imggaming import ImgGamingBaseIE
from ..utils import (
ExtractorError,
parse_duration,
parse_iso8601,
urlencode_postdata,
)
class UFCTVIE(InfoExtractor): class UFCTVIE(ImgGamingBaseIE):
_VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)' _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
_NETRC_MACHINE = 'ufctv' _NETRC_MACHINE = 'ufctv'
_TEST = { _REALM = 'ufc'
'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
'info_dict': {
'id': '34167',
'ext': 'mp4',
'title': 'UFC 219 Countdown: Full Episode',
'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
'timestamp': 1513962360,
'upload_date': '20171222',
},
'params': {
# m3u8 download
'skip_download': True,
}
}
def _real_initialize(self):
username, password = self._get_login_info()
if username is None:
return
code = self._download_json( class UFCArabiaIE(ImgGamingBaseIE):
'https://www.ufc.tv/secure/authenticate', _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
None, 'Logging in', data=urlencode_postdata({ _NETRC_MACHINE = 'ufcarabia'
'username': username, _REALM = 'admufc'
'password': password,
'format': 'json',
})).get('code')
if code and code != 'loginsuccess':
raise ExtractorError(code, expected=True)
def _real_extract(self, url):
display_id = self._match_id(url)
video_data = self._download_json(url, display_id, query={
'format': 'json',
})
video_id = str(video_data['id'])
title = video_data['name']
m3u8_url = self._download_json(
'https://www.ufc.tv/service/publishpoint', video_id, query={
'type': 'video',
'format': 'json',
'id': video_id,
}, headers={
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
})['path']
m3u8_url = m3u8_url.replace('_iphone.', '.')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'duration': parse_duration(video_data.get('runtime')),
'timestamp': parse_iso8601(video_data.get('releaseDate')),
'formats': formats,
}

View File

@ -1,46 +0,0 @@
from __future__ import unicode_literals
import re
import random
from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_TEST = {
'url': 'http://videopremium.tv/4w7oadjsf156',
'info_dict': {
'id': '4w7oadjsf156',
'ext': 'f4v',
'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
},
'params': {
'skip_download': True,
},
'skip': 'Test file has been deleted.',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
note='Downloading webpage again (with cookie)')
video_title = self._html_search_regex(
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
return {
'id': video_id,
'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16),
'play_path': 'mp4:%s.f4v' % video_id,
'page_url': 'http://videopremium.tv/' + video_id,
'player_url': 'http://videopremium.tv/uplayer/uppod.swf',
'ext': 'f4v',
'title': video_title,
}

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import collections import collections
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -11,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
OnDemandPagedList,
orderedSet, orderedSet,
str_or_none, str_or_none,
str_to_int, str_to_int,
@ -477,14 +479,23 @@ class VKIE(VKBaseIE):
class VKUserVideosIE(VKBaseIE): class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos' IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos" IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
_TEMPLATE_URL = 'https://vk.com/videos' _TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{ _TESTS = [{
'url': 'http://vk.com/videos205387401', 'url': 'https://vk.com/videos-767561',
'info_dict': { 'info_dict': {
'id': '205387401', 'id': '-767561_all',
}, },
'playlist_mincount': 4, 'playlist_mincount': 1150,
}, {
'url': 'https://vk.com/videos-767561?section=uploaded',
'info_dict': {
'id': '-767561_uploaded',
},
'playlist_mincount': 425,
}, {
'url': 'http://vk.com/videos205387401',
'only_matching': True,
}, { }, {
'url': 'http://vk.com/videos-77521', 'url': 'http://vk.com/videos-77521',
'only_matching': True, 'only_matching': True,
@ -498,25 +509,33 @@ class VKUserVideosIE(VKBaseIE):
'url': 'http://new.vk.com/videos205387401', 'url': 'http://new.vk.com/videos205387401',
'only_matching': True, 'only_matching': True,
}] }]
_VIDEO = collections.namedtuple( _PAGE_SIZE = 1000
'Video', ['owner_id', 'id', 'thumb', 'title', 'flags', 'duration', 'hash', 'moder_acts', 'owner', 'date', 'views', 'platform', 'blocked', 'music_video_meta']) _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
def _real_extract(self, url):
page_id = self._match_id(url)
def _fetch_page(self, page_id, section, page):
l = self._download_payload('al_video', page_id, { l = self._download_payload('al_video', page_id, {
'act': 'load_videos_silent', 'act': 'load_videos_silent',
'offset': page * self._PAGE_SIZE,
'oid': page_id, 'oid': page_id,
})[0]['']['list'] 'section': section,
})[0][section]['list']
entries = []
for video in l: for video in l:
v = self._VIDEO._make(video) v = self._VIDEO._make(video[:2])
video_id = '%d_%d' % (v.owner_id, v.id) video_id = '%d_%d' % (v.owner_id, v.id)
entries.append(self.url_result( yield self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)) 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
return self.playlist_result(entries, page_id) def _real_extract(self, url):
page_id, section = re.match(self._VALID_URL, url).groups()
if not section:
section = 'all'
entries = OnDemandPagedList(
functools.partial(self._fetch_page, page_id, section),
self._PAGE_SIZE)
return self.playlist_result(entries, '%s_%s' % (page_id, section))
class VKWallPostIE(VKBaseIE): class VKWallPostIE(VKBaseIE):
@ -580,8 +599,7 @@ class VKWallPostIE(VKBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
_AUDIO = collections.namedtuple( _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
'Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads', 'subtitle', 'main_artists', 'feat_artists', 'album', 'track_code', 'restriction', 'album_part', 'new_stats', 'access_key'])
def _decode(self, enc): def _decode(self, enc):
dec = '' dec = ''
@ -629,7 +647,7 @@ class VKWallPostIE(VKBaseIE):
for audio in re.findall(r'data-audio="([^"]+)', webpage): for audio in re.findall(r'data-audio="([^"]+)', webpage):
audio = self._parse_json(unescapeHTML(audio), post_id) audio = self._parse_json(unescapeHTML(audio), post_id)
a = self._AUDIO._make(audio) a = self._AUDIO._make(audio[:16])
if not a.url: if not a.url:
continue continue
title = unescapeHTML(a.title) title = unescapeHTML(a.title)

View File

@ -32,6 +32,18 @@ class VzaarIE(InfoExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'MP3', 'title': 'MP3',
}, },
}, {
# hlsAes = true
'url': 'https://view.vzaar.com/11379930/player',
'info_dict': {
'id': '11379930',
'ext': 'mp4',
'title': 'Videoaula',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, { }, {
# with null videoTitle # with null videoTitle
'url': 'https://view.vzaar.com/20313539/download', 'url': 'https://view.vzaar.com/20313539/download',
@ -58,6 +70,7 @@ class VzaarIE(InfoExtractor):
f = { f = {
'url': source_url, 'url': source_url,
'format_id': 'http', 'format_id': 'http',
'preference': 1,
} }
if 'audio' in source_url: if 'audio' in source_url:
f.update({ f.update({
@ -75,13 +88,17 @@ class VzaarIE(InfoExtractor):
video_guid = video_data.get('guid') video_guid = video_data.get('guid')
usp = video_data.get('usp') usp = video_data.get('usp')
if isinstance(video_guid, compat_str) and isinstance(usp, dict): if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?' hls_aes = video_data.get('hlsAes')
% (video_guid, video_id)) + '&'.join( qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
'%s=%s' % (k, v) for k, v in usp.items()) url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False)
if hls_aes:
for f in m3u8_formats:
f['_decryption_key_url'] = url_templ % ('goose', '') + qs
formats.extend(m3u8_formats)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -383,7 +383,7 @@ class YahooGyaOPlayerIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': video['title'], 'title': video['title'],
'url': smuggle_url( 'url': smuggle_url(
'http://players.brightcove.net/4235717419001/default_default/index.html?videoId=' + video['videoId'], 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['videoId'],
{'geo_countries': ['JP']}), {'geo_countries': ['JP']}),
'description': video.get('longDescription'), 'description': video.get('longDescription'),
'ie_key': BrightcoveNewIE.ie_key(), 'ie_key': BrightcoveNewIE.ie_key(),

View File

@ -46,6 +46,7 @@ from .compat import (
compat_html_entities, compat_html_entities,
compat_html_entities_html5, compat_html_entities_html5,
compat_http_client, compat_http_client,
compat_integer_types,
compat_kwargs, compat_kwargs,
compat_os_name, compat_os_name,
compat_parse_qs, compat_parse_qs,
@ -3519,10 +3520,11 @@ def str_or_none(v, default=None):
def str_to_int(int_str): def str_to_int(int_str):
""" A more relaxed version of int_or_none """ """ A more relaxed version of int_or_none """
if not isinstance(int_str, compat_str): if isinstance(int_str, compat_integer_types):
return int_str return int_str
int_str = re.sub(r'[,\.\+]', '', int_str) elif isinstance(int_str, compat_str):
return int(int_str) int_str = re.sub(r'[,\.\+]', '', int_str)
return int_or_none(int_str)
def float_or_none(v, scale=1, invscale=1, default=None): def float_or_none(v, scale=1, invscale=1, default=None):