mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2025-01-07 13:47:54 +01:00
[wetv] Add new extractor
This commit is contained in:
parent
d65d89183f
commit
9c42c640eb
@ -1409,6 +1409,10 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wetv import (
|
||||
WeTvIE,
|
||||
WeTvPlaylistIE,
|
||||
)
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
|
489
youtube_dl/extractor/wetv.py
Normal file
489
youtube_dl/extractor/wetv.py
Normal file
@ -0,0 +1,489 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from ctypes import c_int32
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WeTvBaseInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def parse_video_info(video_info):
|
||||
thumbnails = []
|
||||
for key, value in video_info.items():
|
||||
if key.startswith('pic_'):
|
||||
try:
|
||||
width, height = key[4:].split('_')
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
thumbnails.append({
|
||||
'width': int_or_none(width),
|
||||
'height': int_or_none(height),
|
||||
'url': url_or_none(value),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_info['vid'],
|
||||
'title': unescapeHTML(video_info['title']),
|
||||
'description': unescapeHTML(video_info.get('desc')),
|
||||
'duration': parse_duration(video_info.get('duration')),
|
||||
'episode_number': int_or_none(video_info.get('episode')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
def extract_info_from_page(self, webpage, video_id):
|
||||
inputs = self._hidden_inputs(webpage)
|
||||
info = self._parse_json(inputs.get('data_sync', ''), video_id, compat_urllib_parse_unquote)
|
||||
|
||||
return info, info['langInfo']['langId'], info['langInfo']['areaCode']
|
||||
|
||||
|
||||
class WeTvIE(WeTvBaseInfoExtractor):
|
||||
IE_NAME = 'wetv'
|
||||
IE_DESC = 'WeTV.vip'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
wetv:|
|
||||
https?://(?:m\.)?wetv\.vip/
|
||||
(?:(?P<language>[a-z]{2}(?:-[a-z]{2})?)/)?
|
||||
(?:play/){,2}
|
||||
(?:(?P<playlist_id>[a-z\d]{15})(?:-[^/]*)?/)?
|
||||
(?:play\?vid=)?
|
||||
)
|
||||
(?P<id>[a-z\d]{11})
|
||||
(?:$|[^a-z\d])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://wetv.vip/play?vid=o00318x0wds',
|
||||
'md5': '6b07174a61ed9c1dfb8defab3b40ba12',
|
||||
'info_dict': {
|
||||
'id': 'o00318x0wds',
|
||||
'ext': 'mp4',
|
||||
'title': "EP1\uff1aThe King's Avatar",
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/jenizogwk2t8400/o00318x0wds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# user video
|
||||
'url': 'https://wetv.vip/en/play/a3150lwr4jn-Ve-Po-Ad%20Review%20%2F%20HowTo',
|
||||
'md5': 'b053543f584bb4ae10767b5c6bf67807',
|
||||
'info_dict': {
|
||||
'id': 'a3150lwr4jn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ve-Po-Ad Review / HowTo',
|
||||
}
|
||||
}, {
|
||||
# non-m3u8
|
||||
'url': 'https://wetv.vip/en/play/h31438yuulr',
|
||||
'md5': 'c1b83ac4653d38b2d5e423caeab4148b',
|
||||
'info_dict': {
|
||||
'id': 'h31438yuulr',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gokukoku no Brynhildr - "Ichiban Boshi" \u2014 Full Ending',
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def create_guid():
|
||||
return ''.join([random.choice(string.digits + string.ascii_lowercase) for _ in range(32)])
|
||||
|
||||
def _real_initialize(self):
|
||||
self.ckey = CKey()
|
||||
self.default_quality = 'hd'
|
||||
self.common_params = {
|
||||
'charge': 0,
|
||||
'defaultfmt': 'auto',
|
||||
'otype': 'json',
|
||||
'platform': 4830201,
|
||||
'sdtfrom': 1002,
|
||||
'defnpayver': 0,
|
||||
'appVer': '3.5.57',
|
||||
'sphttps': 1,
|
||||
'spwm': 4,
|
||||
'fhdswitch': 0,
|
||||
'show1080p': 0,
|
||||
'isHLS': 1,
|
||||
'dtype': 3,
|
||||
'sphls': 2,
|
||||
'spgzip': 1,
|
||||
'dlver': 2,
|
||||
'drm': 32,
|
||||
'spau': 1,
|
||||
'spaudio': 15,
|
||||
'spsrt': 1,
|
||||
'spvideo': 16,
|
||||
'defsrc': 2,
|
||||
'encryptVer': '8.1',
|
||||
'fp2p': 1,
|
||||
'spadseg': 1,
|
||||
'guid': WeTvIE.create_guid(),
|
||||
'logintoken': '{"main_login":"","openid":"","appid":"","access_token":"","vuserid":"","vusession":""}',
|
||||
}
|
||||
|
||||
def generate_jsonp_url(self, quality, video_id, url, playlist_id, lang_code, country_code):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
timestamp = time.time()
|
||||
|
||||
params = self.common_params.copy()
|
||||
params.update({
|
||||
'defn': quality,
|
||||
'vid': video_id,
|
||||
'cid': playlist_id,
|
||||
'lang_code': lang_code,
|
||||
'country_code': country_code,
|
||||
'flowid': '{}_{}'.format(WeTvIE.create_guid(), params['platform']),
|
||||
'host': parsed_url.netloc,
|
||||
'refer': parsed_url.netloc,
|
||||
'ehost': compat_urlparse.urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, None, None, None)),
|
||||
'tm': int(timestamp),
|
||||
'_{}'.format(int(timestamp * 1000)): '',
|
||||
'callback': 'txplayerJsonpCallBack_getinfo_{}'.format(random.randint(10000, 1000000)),
|
||||
})
|
||||
params['cKey'] = self.ckey.make(
|
||||
video_id, compat_str(params['tm']), params['appVer'], params['guid'],
|
||||
compat_str(params['platform']), url, std_headers['User-Agent'])
|
||||
|
||||
return 'https://play.wetv.vip/getvinfo?{}'.format(compat_urllib_parse_urlencode(params))
|
||||
|
||||
def get_jsonp_data(self, quality, video_id, *args):
|
||||
jsonp_url = self.generate_jsonp_url(quality, video_id, *args)
|
||||
# "accept-encoding: gzip" results in
|
||||
# EOFError: Compressed file ended before the end-of-stream marker was reached
|
||||
return self._download_json(jsonp_url, video_id, transform_source=strip_jsonp,
|
||||
note='Downloading {} metadata'.format(quality),
|
||||
headers={'Accept-Encoding': 'deflate'})
|
||||
|
||||
@staticmethod
|
||||
def extract_qualities(data):
|
||||
qualities = []
|
||||
for quality in data['fl']['fi']:
|
||||
id = quality['name']
|
||||
qualities.append({
|
||||
'format_id': id,
|
||||
'format_note': quality['cname'],
|
||||
'filesize_approx': quality['fs'],
|
||||
})
|
||||
|
||||
return qualities
|
||||
|
||||
@staticmethod
|
||||
def extract_format(data):
|
||||
video_info = data['vl']['vi'][0]
|
||||
|
||||
url = video_info['ul']['ui'][random.randint(0, 2)]['url']
|
||||
if 'fvkey' in video_info:
|
||||
url += '{}?vkey={}'.format(video_info['fn'], video_info['fvkey'])
|
||||
elif url.endswith('/'):
|
||||
url += '{}.m3u8?ver=4'.format(video_info['fn'])
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'width': int_or_none(video_info.get('vw')),
|
||||
'height': int_or_none(video_info.get('vh')),
|
||||
}
|
||||
|
||||
def get_formats_and_data(self, *args):
|
||||
formats = []
|
||||
|
||||
default_quality_data = self.get_jsonp_data(self.default_quality, *args)
|
||||
qualities = WeTvIE.extract_qualities(default_quality_data)
|
||||
|
||||
for quality in qualities:
|
||||
id = quality['format_id']
|
||||
if id == self.default_quality:
|
||||
data = default_quality_data
|
||||
else:
|
||||
data = self.get_jsonp_data(id, *args)
|
||||
|
||||
quality.update(WeTvIE.extract_format(data))
|
||||
|
||||
formats.append(quality)
|
||||
|
||||
return formats, default_quality_data
|
||||
|
||||
def _get_subtitles(self, data):
|
||||
subtitles = {}
|
||||
for subtitle_info in data['sfl'].get('fi', []):
|
||||
subtitles[subtitle_info['lang'].lower()] = [{'url': subtitle_info['url']}]
|
||||
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
language = mobj.group('language')
|
||||
# urls like this: https://wetv.vip/en/play/jenizogwk2t8400/play?vid=p0031yjo98d
|
||||
# when opened in new tab - load series page (and start playing the first episode)
|
||||
# when clicked on the website - load clicked episode
|
||||
# solution: use https://wetv.vip/en/play/p0031yjo98d instead
|
||||
good_url = 'https://wetv.vip/{}/play/{}'.format(language or 'en', video_id)
|
||||
|
||||
if smuggled_data:
|
||||
# playlist extractor sets the metadata like title, duration, etc.,
|
||||
# no need to load the webpage again
|
||||
result = {}
|
||||
playlist_id = smuggled_data['playlist_id']
|
||||
lang_code = smuggled_data['lang_code']
|
||||
country_code = smuggled_data['country_code']
|
||||
else:
|
||||
webpage = self._download_webpage(good_url, video_id)
|
||||
|
||||
full_url = self._og_search_url(webpage)
|
||||
mobj = re.match(self._VALID_URL, full_url)
|
||||
playlist_id = mobj.group('playlist_id') or ''
|
||||
|
||||
info, lang_code, country_code = self.extract_info_from_page(webpage, video_id)
|
||||
result = WeTvBaseInfoExtractor.parse_video_info(info['videoInfo'])
|
||||
|
||||
formats, data = self.get_formats_and_data(video_id, good_url, playlist_id, lang_code, country_code)
|
||||
|
||||
subtitles = self.extract_subtitles(data)
|
||||
|
||||
result.update({
|
||||
'id': video_id,
|
||||
'url': 'http://example.com/example.mp4',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
class WeTvPlaylistIE(WeTvBaseInfoExtractor):
|
||||
IE_NAME = 'wetv:playlist'
|
||||
IE_DESC = 'WeTV.vip playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:m\.)?wetv\.vip/
|
||||
(?:[a-z]{2}(?:-[a-z]{2})?/)?
|
||||
play(?:/|\?cid=)
|
||||
(?P<id>[a-z\d]{15})
|
||||
(?:$|[^a-z\d])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://wetv.vip/en/play/jenizogwk2t8400',
|
||||
'info_dict': {
|
||||
'id': 'jenizogwk2t8400',
|
||||
'title': "The King's Avatar",
|
||||
'description': 'md5:eca1c149133af485673d7676d4eff0c9',
|
||||
},
|
||||
'playlist_count': 40,
|
||||
}, {
|
||||
'url': 'https://wetv.vip/play?cid=jenizogwk2t8400',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
info, lang_code, country_code = self.extract_info_from_page(webpage, playlist_id)
|
||||
|
||||
entries = []
|
||||
for video_info in info['videoList']:
|
||||
parsed_info = WeTvBaseInfoExtractor.parse_video_info(video_info)
|
||||
smuggled_url = smuggle_url(
|
||||
'wetv:{}'.format(parsed_info['id'], ),
|
||||
{
|
||||
'playlist_id': playlist_id,
|
||||
'lang_code': lang_code,
|
||||
'country_code': country_code,
|
||||
}
|
||||
)
|
||||
parsed_info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggled_url,
|
||||
'ie_key': WeTvIE.ie_key(),
|
||||
})
|
||||
entries.append(parsed_info)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': info.get('coverInfo', {}).get('title'),
|
||||
'description': info.get('coverInfo', {}).get('description'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class CKey:
|
||||
def __init__(self):
|
||||
self.encryption_arrays = [[
|
||||
1332468387, -1641050960, 2136896045, -1629555948,
|
||||
1399201960, -850809832, -1307058635, 751381793,
|
||||
-1933648423, 1106735553, -203378700, -550927659,
|
||||
766369351, 1817882502, -1615200142, 1083409063,
|
||||
-104955314, -1780208184, 173944250, 1254993693,
|
||||
1422337688, -1054667952, -880990486, -2119136777,
|
||||
-1822404972, 1380140484, -1723964626, 412019417,
|
||||
-890799303, -1734066435, 26893779, 420787978,
|
||||
-1337058067, 686432784, 695238595, 811911369,
|
||||
-391724567, -1068702727, -381903814, -648522509,
|
||||
-1266234148, 1959407397, -1644776673, 1152313324]]
|
||||
d = [None] * 256
|
||||
f = d.copy()
|
||||
g = d.copy()
|
||||
h = d.copy()
|
||||
j = d.copy()
|
||||
o = d.copy()
|
||||
for i in range(256):
|
||||
o[i] = i << 1 if i < 128 else i << 1 ^ 283
|
||||
|
||||
t = 0
|
||||
u = 0
|
||||
for i in range(256):
|
||||
v = u ^ u << 1 ^ u << 2 ^ u << 3 ^ u << 4
|
||||
v = CKey.rshift(v, 8) ^ 255 & v ^ 99
|
||||
d[t] = v
|
||||
x = o[t]
|
||||
z = o[o[x]]
|
||||
A = CKey.int32(257 * o[v] ^ 16843008 * v)
|
||||
f[t] = CKey.int32(A << 24 | CKey.rshift(A, 8))
|
||||
g[t] = CKey.int32(A << 16 | CKey.rshift(A, 16))
|
||||
h[t] = CKey.int32(A << 8 | CKey.rshift(A, 24))
|
||||
j[t] = A
|
||||
if t == 0:
|
||||
t = 1
|
||||
u = 1
|
||||
else:
|
||||
t = x ^ o[o[o[z ^ x]]]
|
||||
u ^= o[o[u]]
|
||||
|
||||
self.encryption_arrays.append(f)
|
||||
self.encryption_arrays.append(g)
|
||||
self.encryption_arrays.append(h)
|
||||
self.encryption_arrays.append(j)
|
||||
self.encryption_arrays.append(d)
|
||||
|
||||
@staticmethod
|
||||
def rshift(val, n):
|
||||
return (val & 0xFFFFFFFF) >> n
|
||||
|
||||
@staticmethod
|
||||
def int32(val):
|
||||
return c_int32(val).value
|
||||
|
||||
@staticmethod
|
||||
def encode_text(text):
|
||||
length = len(text)
|
||||
arr = [0] * (length // 4)
|
||||
for i in range(length):
|
||||
arr[i // 4] |= (255 & ord(text[i])) << 24 - i % 4 * 8
|
||||
return arr, length
|
||||
|
||||
@staticmethod
|
||||
def decode_text(arr, length):
|
||||
text_array = []
|
||||
for i in range(length):
|
||||
text_array.append('{:02x}'.format(
|
||||
CKey.rshift(arr[i // 4], 24 - i % 4 * 8) & 255))
|
||||
|
||||
return ''.join(text_array)
|
||||
|
||||
@staticmethod
|
||||
def calculate_hash(text):
|
||||
result = 0
|
||||
for char in text:
|
||||
result = CKey.int32(result << 5) - result + compat_ord(char)
|
||||
return compat_str(result)
|
||||
|
||||
@staticmethod
|
||||
def pad_text(text):
|
||||
pad_length = 16 - len(text) % 16
|
||||
return text + compat_chr(pad_length) * pad_length
|
||||
|
||||
def encrypt(self, arr):
|
||||
for i in range(0, len(arr), 4):
|
||||
self.main_algorithm(arr, i)
|
||||
|
||||
def main_algorithm(self, a, b):
|
||||
c, d, e, f, g, h = self.encryption_arrays
|
||||
|
||||
if b == 0:
|
||||
xor_arr = [22039283, 1457920463, 776125350, -1941999367]
|
||||
else:
|
||||
xor_arr = a[b - 4: b]
|
||||
|
||||
for i, val in enumerate(xor_arr):
|
||||
a[b + i] ^= val
|
||||
|
||||
j = a[b] ^ c[0]
|
||||
k = a[b + 1] ^ c[1]
|
||||
l = a[b + 2] ^ c[2]
|
||||
m = a[b + 3] ^ c[3]
|
||||
n = 4
|
||||
for _ in range(9):
|
||||
q = (d[CKey.rshift(j, 24)] ^ e[CKey.rshift(k, 16) & 255]
|
||||
^ f[CKey.rshift(l, 8) & 255] ^ g[255 & m] ^ c[n])
|
||||
s = (d[CKey.rshift(k, 24)] ^ e[CKey.rshift(l, 16) & 255]
|
||||
^ f[CKey.rshift(m, 8) & 255] ^ g[255 & j] ^ c[n + 1])
|
||||
t = (d[CKey.rshift(l, 24)] ^ e[CKey.rshift(m, 16) & 255]
|
||||
^ f[CKey.rshift(j, 8) & 255] ^ g[255 & k] ^ c[n + 2])
|
||||
m = (d[CKey.rshift(m, 24)] ^ e[CKey.rshift(j, 16) & 255]
|
||||
^ f[CKey.rshift(k, 8) & 255] ^ g[255 & l] ^ c[n + 3])
|
||||
j = q
|
||||
k = s
|
||||
l = t
|
||||
n += 4
|
||||
|
||||
q = CKey.int32(h[CKey.rshift(j, 24)] << 24
|
||||
| h[CKey.rshift(k, 16) & 255] << 16
|
||||
| h[CKey.rshift(l, 8) & 255] << 8
|
||||
| h[255 & m]) ^ c[n]
|
||||
s = CKey.int32(h[CKey.rshift(k, 24)] << 24
|
||||
| h[CKey.rshift(l, 16) & 255] << 16
|
||||
| h[CKey.rshift(m, 8) & 255] << 8
|
||||
| h[255 & j]) ^ c[n + 1]
|
||||
t = CKey.int32(h[CKey.rshift(l, 24)] << 24
|
||||
| h[CKey.rshift(m, 16) & 255] << 16
|
||||
| h[CKey.rshift(j, 8) & 255] << 8
|
||||
| h[255 & k]) ^ c[n + 2]
|
||||
m = CKey.int32(h[CKey.rshift(m, 24)] << 24
|
||||
| h[CKey.rshift(j, 16) & 255] << 16
|
||||
| h[CKey.rshift(k, 8) & 255] << 8
|
||||
| h[255 & l]) ^ c[n + 3]
|
||||
a[b] = q
|
||||
a[b + 1] = s
|
||||
a[b + 2] = t
|
||||
a[b + 3] = m
|
||||
|
||||
def make(self, vid, tm, app_ver, guid, platform, url,
|
||||
# user_agent is shortened anyway
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.',
|
||||
referer='', nav_code_name='Mozilla',
|
||||
nav_name='Netscape', nav_platform='Win32'):
|
||||
text_parts = [
|
||||
'', vid, tm, 'mg3c3b04ba', app_ver, guid, platform,
|
||||
url[:48], user_agent[:48].lower(), referer[:48],
|
||||
nav_code_name, nav_name, nav_platform, '00', ''
|
||||
]
|
||||
text_parts.insert(1, CKey.calculate_hash('|'.join(text_parts)))
|
||||
|
||||
text = CKey.pad_text('|'.join(text_parts))
|
||||
[arr, length] = CKey.encode_text(text)
|
||||
self.encrypt(arr)
|
||||
return CKey.decode_text(arr, length).upper()
|
Loading…
Reference in New Issue
Block a user