1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 08:34:32 +01:00

[iqiyi] Fix extraction

This commit is contained in:
Qingfan Wu 2020-10-03 16:55:39 -07:00 committed by GitHub
parent d65d89183f
commit dd256a8d05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -343,17 +343,22 @@ class IqiyiIE(InfoExtractor):
# There's no simple way to determine whether an URL is a playlist or not # There's no simple way to determine whether an URL is a playlist or not
# Sometimes there are playlist links in individual videos, so treat it # Sometimes there are playlist links in individual videos, so treat it
# as a single video first # as a single video first
tvid = self._search_regex( tvid = self._search_regex([
r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None) r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)',
r'param\[\'tvid\'\]\s*=\s*"(.+?)"',
r'"tvid":"(\d+)"',
], webpage, 'tvid', default=None)
if tvid is None: if tvid is None:
playlist_result = self._extract_playlist(webpage) playlist_result = self._extract_playlist(webpage)
if playlist_result: if playlist_result:
return playlist_result return playlist_result
raise ExtractorError('Can\'t find any video') raise ExtractorError('Can\'t find any video')
video_id = self._search_regex( video_id = self._search_regex([
r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)',
r'param\[\'vid\'\]\s*=\s*"(.+?)"',
r'"vid":"(\w+)"'
], webpage, 'video_id')
formats = [] formats = []
for _ in range(5): for _ in range(5):
raw_data = self.get_raw_data(tvid, video_id) raw_data = self.get_raw_data(tvid, video_id)
@ -385,8 +390,7 @@ class IqiyiIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
title = (get_element_by_id('widget-videotitle', webpage) title = (get_element_by_id('widget-videotitle', webpage)
or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title')) or self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,