[acfun] Add new extractor

This commit is contained in:
johnsmith2077 2020-10-05 12:38:25 +08:00
parent d65d89183f
commit cef5fe698a
2 changed files with 225 additions and 0 deletions

View File

@ -0,0 +1,220 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import (
compat_cookiejar,
compat_urllib_parse_urlencode,
compat_urllib_request,
)
from ..utils import (
int_or_none,
float_or_none,
str_or_none,
str_to_int,
sanitized_Request,
ExtractorError,
)
class BasicAcfunInfoExtractor(InfoExtractor):
_FAKE_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa
}
def _extract_formats(self, currentVideoInfo):
durationMillis = currentVideoInfo.get('durationMillis')
if 'ksPlayJson' in currentVideoInfo:
ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
representation = ksPlayJson.get('adaptationSet')[0].get('representation')
formats = []
for stream in representation:
size = float_or_none(durationMillis) * stream["avgBitrate"] / 8
stream_id = stream["qualityLabel"]
quality = stream["qualityType"]
formats += [{
'url': stream["url"],
'ext': 'mp4',
'width': stream.get('width'),
'height': stream.get('height'),
'filesize': size,
}]
formats = formats[::-1]
self._sort_formats(formats)
return formats
class AcfunIE(BasicAcfunInfoExtractor):
_VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
_TESTS = [
{
'url': 'https://www.acfun.cn/v/ac18184362',
'info_dict': {
'id': '18184362',
'ext': 'mp4',
'duration': 192.042,
'title': '【AC娘】魔性新单《极乐857》上线来和AC娘一起云蹦迪吧',
'uploader': 'AC娘本体',
'uploader_id': 23682490
}
},
{
'url': 'https://www.acfun.cn/v/ac17532274_3',
'info_dict': {
'id': '17532274_3',
'ext': 'mp4',
'duration': 233.770,
'title': '【AC娘x竾颜音】【周六狂欢24小时】TRAP七夕恋歌落入本娘爱的陷阱 - TRAP 阿婵',
'uploader': 'AC娘本体',
'uploader_id': 23682490
}
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
json_text = self._html_search_regex(r'(?s)videoInfo\s*=\s*(\{.*?\});', webpage, 'json_text')
json_data = json.loads(json_text)
title = json_data['title']
p_title = self._html_search_regex(r"<li\s[^<]*?class='[^']*active[^']*'.*?>(.*?)</li>", webpage, 'p_title', default=None)
if p_title:
title = '%s - %s' % (title, p_title)
uploader = json_data.get('user').get('name')
uploader_id = json_data.get('user').get('id')
currentVideoInfo = json_data.get('currentVideoInfo')
durationMillis = currentVideoInfo.get('durationMillis')
duration = durationMillis / 1000
formats = self._extract_formats(currentVideoInfo)
return {
'id': video_id,
'uploader_id': str_to_int(uploader_id),
'title': title,
'uploader': str_or_none(uploader),
'duration': float_or_none(duration),
'formats': formats
}
class AcfunBangumiIE(BasicAcfunInfoExtractor):
_VALID_URL = r'https?://www\.acfun\.cn/bangumi/aa(?P<id>[_\d]+)'
_TEST = {
'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1748679',
'info_dict': {
'id': '6002917_36188_1748679',
'ext': 'mp4',
'duration': 1437.076,
'title': '租借女友 第12话 告白和女友',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, headers=self._FAKE_HEADERS)
json_text = self._html_search_regex(r'(?s)bangumiData\s*=\s*(\{.*?\});', webpage, 'json_text')
json_data = json.loads(json_text)
title = json_data.get('showTitle') or json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
currentVideoInfo = json_data.get('currentVideoInfo')
durationMillis = currentVideoInfo.get('durationMillis')
duration = durationMillis / 1000
formats = self._extract_formats(currentVideoInfo)
return {
'id': video_id,
'title': title,
'duration': float_or_none(duration),
'formats': formats
}
class AcfunLiveIE(BasicAcfunInfoExtractor):
_VALID_URL = r'https?://live\.acfun\.cn/live/(?P<id>\d+)'
_TEST = {
'url': 'https://live.acfun.cn/live/36782183',
'only_matching': True,
'info_dict': {
'id': '36782183',
'ext': 'mp4',
# 'title': '看见兔兔就烦!',
'is_live': True,
}
}
def _real_extract(self, url):
live_id = self._match_id(url)
self._FAKE_HEADERS.update({
'Referer': url
})
# Firstly get _did cookie
fisrt_req = sanitized_Request(url, headers=self._FAKE_HEADERS)
first_res = compat_urllib_request.urlopen(fisrt_req)
for header_name, header_value in first_res.info().items():
if header_name.lower() == 'set-cookie':
cookies = header_value
if not cookies:
raise ExtractorError('Fail to fetch cookies')
cookies_dict = dict(c.strip(' ,').split('=', 1) for c in cookies.split(';'))
did_cookie = cookies_dict['_did']
self._FAKE_HEADERS.update({
'Cookie': '_did=%s' % did_cookie
})
# Login to get userId and acfun.api.visitor_st
login_data = compat_urllib_parse_urlencode({'sid': 'acfun.api.visitor'}).encode('ascii')
login_json = self._download_json(
'https://id.app.acfun.cn/rest/app/visitor/login',
live_id,
data=login_data,
headers=self._FAKE_HEADERS)
streams_url = "https://api.kuaishouzt.com/rest/zt/live/web/startPlay?subBiz=mainApp&kpn=ACFUN_APP&kpf=PC_WEB&userId=%d&did=%s&acfun.api.visitor_st=%s" % (
login_json['userId'],
did_cookie, login_json['acfun.api.visitor_st'])
# Fetch stream lists
fetch_streams_data = compat_urllib_parse_urlencode({
'authorId': int_or_none(live_id),
'pullStreamType': 'FLV'
}).encode('ascii')
streams_json = self._download_json(
streams_url,
live_id,
data=fetch_streams_data,
headers=self._FAKE_HEADERS)
# print(streams_json)
title = streams_json['data']['caption']
streams_info = json.loads(streams_json['data']['videoPlayRes']) # streams info
representation = streams_info['liveAdaptiveManifest'][0]['adaptationSet']['representation']
formats = []
for stream in representation:
quality = stream["qualityType"]
formats += [{
'url': stream["url"],
'ext': 'mp4',
'tbr': stream.get('bitrate'),
}]
self._sort_formats(formats)
return {
'id': live_id,
'title': self._live_title(title),
'formats': formats,
'is_live': True
}

View File

@ -18,6 +18,11 @@ from .acast import (
ACastIE,
ACastChannelIE,
)
from .acfun import (
AcfunIE,
AcfunBangumiIE,
AcfunLiveIE,
)
from .adn import ADNIE
from .adobeconnect import AdobeConnectIE
from .adobetv import (