1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 16:44:32 +01:00

[AnimeLab] Extract both English and Japanese all in one go, if available

This commit is contained in:
Mariusz Skoneczko 2020-04-23 21:03:20 +10:00
parent c3dca171d0
commit 2f6d029e96

View File

@ -94,110 +94,118 @@ class AnimeLabIE(AnimeLabBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
# unfortunately we can get different URLs for the same formats
# e.g. if we are using a "free" account so no dubs available
# (so _remove_duplicate_formats is not effective)
# so we use a dictionary as a workaround
formats = {}
for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
'https://www.animelab.com/player/%s/dubbed'):
actual_url = language_option_url % display_id
webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
raw_data = video_collection[position]['videoEntry']
raw_data = video_collection[position]['videoEntry']
video_id = str_or_none(raw_data['id'])
video_id = str_or_none(raw_data['id'])
# create a title from many sources (while grabbing other info)
# TODO use more fallback sources to get some of these
series = raw_data.get('showTitle')
video_type = raw_data.get('videoEntryType', {}).get('name')
episode_number = raw_data.get('episodeNumber')
episode_name = raw_data.get('name')
# create a title from many sources (while grabbing other info)
# TODO use more fallback sources to get some of these
series = raw_data.get('showTitle')
video_type = raw_data.get('videoEntryType', {}).get('name')
episode_number = raw_data.get('episodeNumber')
episode_name = raw_data.get('name')
title_parts = (series, video_type, episode_number, episode_name)
if None not in title_parts:
title = '%s - %s %s - %s' % title_parts
else:
title = episode_name
title_parts = (series, video_type, episode_number, episode_name)
if None not in title_parts:
title = '%s - %s %s - %s' % title_parts
else:
title = episode_name
description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
duration = int_or_none(raw_data.get('duration'))
duration = int_or_none(raw_data.get('duration'))
thumbnail_data = raw_data.get('images', [])
thumbnails = []
for thumbnail in thumbnail_data:
for instance in thumbnail['imageInstances']:
image_data = instance.get('imageInfo', {})
thumbnails.append({
'id': str_or_none(image_data.get('id')),
'url': image_data.get('fullPath'),
'width': image_data.get('width'),
'height': image_data.get('height'),
})
thumbnail_data = raw_data.get('images', [])
thumbnails = []
for thumbnail in thumbnail_data:
for instance in thumbnail['imageInstances']:
image_data = instance.get('imageInfo', {})
thumbnails.append({
'id': str_or_none(image_data.get('id')),
'url': image_data.get('fullPath'),
'width': image_data.get('width'),
'height': image_data.get('height'),
})
season_data = raw_data.get('season', {})
season = str_or_none(season_data.get('name'))
season_number = int_or_none(season_data.get('seasonNumber'))
season_id = str_or_none(season_data.get('id'))
season_data = raw_data.get('season', {})
season = str_or_none(season_data.get('name'))
season_number = int_or_none(season_data.get('seasonNumber'))
season_id = str_or_none(season_data.get('id'))
formats = []
for video_data in raw_data['videoList']:
current_video_list = {}
current_video_list['language'] = video_data.get('language', {}).get('languageCode')
for video_data in raw_data['videoList']:
current_video_list = {}
current_video_list['language'] = video_data.get('language', {}).get('languageCode')
is_hardsubbed = video_data.get('hardSubbed')
is_hardsubbed = video_data.get('hardSubbed')
for video_instance in video_data['videoInstances']:
httpurl = video_instance.get('httpUrl')
url = httpurl if httpurl else video_instance.get('rtmpUrl')
if url is None:
# this video format is unavailable to the user (not premium etc.)
continue
for video_instance in video_data['videoInstances']:
httpurl = video_instance.get('httpUrl')
url = httpurl if httpurl else video_instance.get('rtmpUrl')
if url is None:
# this video format is unavailable to the user (not premium etc.)
continue
current_format = current_video_list.copy()
current_format = current_video_list.copy()
format_id_parts = []
format_id_parts = []
format_id_parts.append(str_or_none(video_instance.get('id')))
format_id_parts.append(str_or_none(video_instance.get('id')))
if is_hardsubbed is not None:
if is_hardsubbed:
format_id_parts.append('yeshardsubbed')
if is_hardsubbed is not None:
if is_hardsubbed:
format_id_parts.append('yeshardsubbed')
else:
format_id_parts.append('nothardsubbed')
format_id_parts.append(current_format['language'])
format_id = '_'.join([x for x in format_id_parts if x is not None])
ext = determine_ext(url)
if ext == 'm3u8':
for format_ in self._extract_m3u8_formats(
url, video_id, m3u8_id=format_id, fatal=False):
formats[format_['format_id']] = format_
continue
elif ext == 'mpd':
for format_ in self._extract_mpd_formats(
url, video_id, mpd_id=format_id, fatal=False):
formats[format_['format_id']] = format_
continue
current_format['url'] = url
quality_data = video_instance.get('videoQuality')
if quality_data:
quality = quality_data.get('name') or quality_data.get('description')
else:
format_id_parts.append('nothardsubbed')
quality = None
format_id_parts.append(current_format['language'])
height = None
if quality:
height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
format_id = '_'.join([x for x in format_id_parts if x is not None])
if height is None:
self.report_warning('Could not get height of video')
else:
current_format['height'] = height
current_format['format_id'] = format_id
ext = determine_ext(url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
url, video_id, m3u8_id=format_id, fatal=False)
formats.extend(m3u8_formats)
continue
elif ext == 'mpd':
mpd_formats = self._extract_mpd_formats(
url, video_id, mpd_id=format_id, fatal=False)
formats.extend(mpd_formats)
continue
current_format['url'] = url
quality_data = video_instance.get('videoQuality')
if quality_data:
quality = quality_data.get('name') or quality_data.get('description')
else:
quality = None
height = None
if quality:
height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
if height is None:
self.report_warning('Could not get height of video')
else:
current_format['height'] = height
current_format['format_id'] = format_id
formats.append(current_format)
formats[current_format['format_id']] = current_format
formats = list(formats.values())
self._sort_formats(formats)
return {