This commit is contained in:
tpikonen 2020-09-28 20:49:07 -05:00 committed by GitHub
commit 1477e104e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 123 additions and 97 deletions

View File

@ -1673,26 +1673,30 @@ class YoutubeDL(object):
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
subs = {}
subs = collections.defaultdict(list)
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
continue
named = collections.defaultdict(list)
for f in formats:
named[f.get('name', '')].append(f)
for name, fmts in named.items():
for ext in formats_preference:
if ext == 'best':
f = formats[-1]
f = fmts[-1]
break
matches = list(filter(lambda f: f['ext'] == ext, formats))
matches = [f for f in fmts if f['ext'] == ext]
if matches:
f = matches[-1]
break
else:
f = formats[-1]
f = fmts[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
subs[lang] = f
subs[lang].append(f)
return subs
def __forced_printings(self, info_dict, filename, incomplete):
@ -1813,11 +1817,13 @@ class YoutubeDL(object):
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
for sub_lang, sub_info_list in subtitles.items():
for sub_info in sub_info_list:
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
sub_name = sub_info.get('name', '')
sub_filename = subtitles_filename(filename, sub_lang, sub_name, sub_format, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
self.to_screen('[info] Video subtitle %s is already present' % (sub_filename))
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
@ -2226,10 +2232,15 @@ class YoutubeDL(object):
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
table = []
for lang, formats in subtitles.items():
named = collections.defaultdict(list)
for f in formats:
named[f.get('name', '')].append(f['ext'])
for name in named.keys():
table.append([lang, name, ', '.join(e for e in reversed(named[name]))])
self.to_screen(render_table(
['Language', 'formats'],
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
for lang, formats in subtitles.items()]))
['Language', 'name', 'formats'], table))
def urlopen(self, req):
""" Start an HTTP download """

View File

@ -249,6 +249,8 @@ class InfoExtractor(object):
entry and one of:
* "data": The subtitles file contents
* "url": A URL pointing to the subtitles file
* "name": (optional) Name or description of the subtitles, used
when there are more than one subtitles file for this language
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions

View File

@ -1464,18 +1464,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for track in subs_doc.findall('track'):
lang = track.attrib['lang_code']
if lang in sub_lang_list:
continue
sub_formats = sub_lang_list[lang]
else:
sub_formats = []
for ext in self._SUBTITLE_FORMATS:
name = track.attrib['name']
params = compat_urllib_parse_urlencode({
'lang': lang,
'v': video_id,
'fmt': ext,
'name': track.attrib['name'].encode('utf-8'),
'name': name.encode('utf-8'),
})
sub_formats.append({
'url': 'https://www.youtube.com/api/timedtext?' + params,
'ext': ext,
'name': name,
})
sub_lang_list[lang] = sub_formats
if not sub_lang_list:

View File

@ -385,15 +385,17 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
filename = information['filepath']
ext = information['ext']
sub_langs = []
sub_langs = set()
sub_filenames = []
webm_vtt_warn = False
for lang, sub_info in subtitles.items():
for lang, sub_info_list in subtitles.items():
for sub_info in sub_info_list:
sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
sub_langs.add(lang)
sub_name = sub_info.get('name', '')
sub_filenames.append(subtitles_filename(filename, lang, sub_name, sub_ext, ext))
else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
@ -611,15 +613,17 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
return [], info
self._downloader.to_screen('[ffmpeg] Converting subtitles')
sub_filenames = []
for lang, sub in subs.items():
for lang, sublist in subs.items():
for sub in sublist:
ext = sub['ext']
if ext == new_ext:
self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
name = sub.get('name', '')
old_file = subtitles_filename(filename, lang, name, ext, info.get('ext'))
sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
new_file = subtitles_filename(filename, lang, name, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning(
@ -636,10 +640,13 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
f.write(srt_data)
old_file = srt_file
subs[lang] = {
slist_new = [s for s in subs[lang] if s.get('name', '') != name]
slist_new.append({
'name': name,
'ext': 'srt',
'data': srt_data
}
})
subs[lang] = slist_new
if new_ext == 'srt':
continue
@ -649,9 +656,12 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f:
subs[lang] = {
slist_new = [s for s in subs[lang] if s.get('name', '') != name]
slist_new.append({
'name': name,
'ext': new_ext,
'data': f.read(),
}
})
subs[lang] = slist_new
return sub_filenames, info

View File

@ -3002,8 +3002,8 @@ def determine_ext(url, default_ext='unknown_video'):
return default_ext
def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
def subtitles_filename(filename, sub_lang, sub_name, sub_format, expected_real_ext=None):
return replace_extension(filename, (sub_name + '.' if sub_name else '') + sub_lang + '.' + sub_format, expected_real_ext)
def date_from_str(date_str):