Fixed incorrect playlist id and video results.

This commit is contained in:
Crypto90 2020-10-24 15:22:30 +02:00
parent 4dfb0763ba
commit f37d0b755d
1 changed files with 39 additions and 12 deletions

View File

@ -374,6 +374,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
titles_in_page = [] titles_in_page = []
playlist_video_id_in_page = [] playlist_video_id_in_page = []
durations_in_page = [] durations_in_page = []
print "===============================================================================================================\n"
print page
print "===============================================================================================================\n"
self.extract_videos_from_page_impl(self._VIDEO_RE, page, ids_in_page, titles_in_page, durations_in_page, playlist_video_id_in_page) self.extract_videos_from_page_impl(self._VIDEO_RE, page, ids_in_page, titles_in_page, durations_in_page, playlist_video_id_in_page)
return zip(ids_in_page, titles_in_page, durations_in_page, playlist_video_id_in_page) return zip(ids_in_page, titles_in_page, durations_in_page, playlist_video_id_in_page)
@ -3250,25 +3253,49 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
if not isinstance(content, dict): if not isinstance(content, dict):
continue continue
video = content.get('videoRenderer') video = content.get('videoRenderer')
if not isinstance(video, dict): video_playlist = content.get('playlistRenderer')
if not isinstance(video, dict) and not isinstance(video_playlist, dict):
continue continue
video_id = video.get('videoId') if video:
if not video_id: video_id = video.get('videoId')
else:
video_id = None
if video_playlist:
video_playlist_id = video_playlist.get('playlistId')
else:
video_playlist_id = None
if not video_id and not video_playlist_id:
continue continue
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) if video_id:
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) video_url = video_id
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
view_count = int_or_none(self._search_regex( duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
r'^(\d+)', re.sub(r'\s', '', view_count_text), description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
'view count', default=None)) view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) view_count = int_or_none(self._search_regex(
r'^(\d+)', re.sub(r'\s', '', view_count_text),
'view count', default=None))
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
elif video_playlist_id:
# Youtube playlist id found, duration will get the playlist video count
title = try_get(video_playlist, lambda x: x['title']['simpleText'], compat_str)
duration = video_playlist.get('videoCount')
video_id_original = try_get(video_playlist, lambda x: x['navigationEndpoint']['watchEndpoint']['videoId'], compat_str) or 'NA'
video_url = 'https://www.youtube.com/watch?v=' + video_id_original + '&list=' + video_playlist_id
video_id = video_playlist_id
description = None
view_count_text = ''
view_count = None
uploader = try_get(video_playlist, lambda x: x['shortBylineText']['runs'][0]['text'], compat_str)
total += 1 total += 1
yield { yield {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(), 'ie_key': YoutubeIE.ie_key(),
'id': video_id, 'id': video_id,
'url': video_id, 'url': video_url,
'title': title, 'title': title,
'description': description, 'description': description,
'duration': duration, 'duration': duration,