1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 16:44:32 +01:00

Support multiple subs per lang, differentiate by name

Add a new column 'name' to --list-subs output.

Output all the subtitles for a given lang with a different 'name' with
--write-sub --sub-lang foo.

The name will be a component in the output subtitle file names, before
the language code, if it is not empty.

Also adapt ffmpeg postprocessing funcs accoordingly.
This commit is contained in:
Teemu Ikonen 2020-07-21 21:35:55 +03:00
parent c23c35c9c1
commit 51b70469ed
3 changed files with 115 additions and 94 deletions

View File

@ -1673,26 +1673,30 @@ class YoutubeDL(object):
formats_query = self.params.get('subtitlesformat', 'best') formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else [] formats_preference = formats_query.split('/') if formats_query else []
subs = {} subs = collections.defaultdict(list)
for lang in requested_langs: for lang in requested_langs:
formats = available_subs.get(lang) formats = available_subs.get(lang)
if formats is None: if formats is None:
self.report_warning('%s subtitles not available for %s' % (lang, video_id)) self.report_warning('%s subtitles not available for %s' % (lang, video_id))
continue continue
for ext in formats_preference: named = collections.defaultdict(list)
if ext == 'best': for f in formats:
f = formats[-1] named[f.get('name', '')].append(f)
break for name, fmts in named.items():
matches = list(filter(lambda f: f['ext'] == ext, formats)) for ext in formats_preference:
if matches: if ext == 'best':
f = matches[-1] f = fmts[-1]
break break
else: matches = [f for f in fmts if f['ext'] == ext]
f = formats[-1] if matches:
self.report_warning( f = matches[-1]
'No subtitle format found matching "%s" for language %s, ' break
'using %s' % (formats_query, lang, f['ext'])) else:
subs[lang] = f f = fmts[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
subs[lang].append(f)
return subs return subs
def __forced_printings(self, info_dict, filename, incomplete): def __forced_printings(self, info_dict, filename, incomplete):
@ -1813,32 +1817,34 @@ class YoutubeDL(object):
# that way it will silently go on when used with unsupporting IE # that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles'] subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key']) ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info_list in subtitles.items():
sub_format = sub_info['ext'] for sub_info in sub_info_list:
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) sub_format = sub_info['ext']
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): sub_name = sub_info.get('name', '')
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) sub_filename = subtitles_filename(filename, sub_lang, sub_name, sub_format, info_dict.get('ext'))
else: if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Writing video subtitles to: ' + sub_filename) self.to_screen('[info] Video subtitle %s is already present' % (sub_filename))
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else: else:
try: self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
sub_data = ie._request_webpage( if sub_info.get('data') is not None:
sub_info['url'], info_dict['id'], note=False).read() try:
with io.open(encodeFilename(sub_filename), 'wb') as subfile: # Use newline='' to prevent conversion of newline characters
subfile.write(sub_data) # See https://github.com/ytdl-org/youtube-dl/issues/10268
except (ExtractorError, IOError, OSError, ValueError) as err: with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
self.report_warning('Unable to download subtitle for "%s": %s' % subfile.write(sub_info['data'])
(sub_lang, error_to_compat_str(err))) except (OSError, IOError):
continue self.report_error('Cannot write subtitles file ' + sub_filename)
return
else:
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@ -2226,10 +2232,15 @@ class YoutubeDL(object):
return return
self.to_screen( self.to_screen(
'Available %s for %s:' % (name, video_id)) 'Available %s for %s:' % (name, video_id))
table = []
for lang, formats in subtitles.items():
named = collections.defaultdict(list)
for f in formats:
named[f.get('name', '')].append(f['ext'])
for name in named.keys():
table.append([lang, name, ', '.join(e for e in reversed(named[name]))])
self.to_screen(render_table( self.to_screen(render_table(
['Language', 'formats'], ['Language', 'name', 'formats'], table))
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
for lang, formats in subtitles.items()]))
def urlopen(self, req): def urlopen(self, req):
""" Start an HTTP download """ """ Start an HTTP download """

View File

@ -385,19 +385,21 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
filename = information['filepath'] filename = information['filepath']
ext = information['ext'] ext = information['ext']
sub_langs = [] sub_langs = set()
sub_filenames = [] sub_filenames = []
webm_vtt_warn = False webm_vtt_warn = False
for lang, sub_info in subtitles.items(): for lang, sub_info_list in subtitles.items():
sub_ext = sub_info['ext'] for sub_info in sub_info_list:
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': sub_ext = sub_info['ext']
sub_langs.append(lang) if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) sub_langs.add(lang)
else: sub_name = sub_info.get('name', '')
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': sub_filenames.append(subtitles_filename(filename, lang, sub_name, sub_ext, ext))
webm_vtt_warn = True else:
self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
if not sub_langs: if not sub_langs:
return [], information return [], information
@ -611,47 +613,55 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
return [], info return [], info
self._downloader.to_screen('[ffmpeg] Converting subtitles') self._downloader.to_screen('[ffmpeg] Converting subtitles')
sub_filenames = [] sub_filenames = []
for lang, sub in subs.items(): for lang, sublist in subs.items():
ext = sub['ext'] for sub in sublist:
if ext == new_ext: ext = sub['ext']
self._downloader.to_screen( if ext == new_ext:
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) self._downloader.to_screen(
continue '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss')
dfxp_file = old_file
srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data)
old_file = srt_file
subs[lang] = {
'ext': 'srt',
'data': srt_data
}
if new_ext == 'srt':
continue continue
else: name = sub.get('name', '')
sub_filenames.append(srt_file) old_file = subtitles_filename(filename, lang, name, ext, info.get('ext'))
sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, name, new_ext, info.get('ext'))
self.run_ffmpeg(old_file, new_file, ['-f', new_format]) if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss')
with io.open(new_file, 'rt', encoding='utf-8') as f: dfxp_file = old_file
subs[lang] = { srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
'ext': new_ext,
'data': f.read(), with open(dfxp_file, 'rb') as f:
} srt_data = dfxp2srt(f.read())
with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data)
old_file = srt_file
slist_new = [s for s in subs[lang] if s.get('name', '') != name]
slist_new.append({
'name': name,
'ext': 'srt',
'data': srt_data
})
subs[lang] = slist_new
if new_ext == 'srt':
continue
else:
sub_filenames.append(srt_file)
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f:
slist_new = [s for s in subs[lang] if s.get('name', '') != name]
slist_new.append({
'name': name,
'ext': new_ext,
'data': f.read(),
})
subs[lang] = slist_new
return sub_filenames, info return sub_filenames, info

View File

@ -3002,8 +3002,8 @@ def determine_ext(url, default_ext='unknown_video'):
return default_ext return default_ext
def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): def subtitles_filename(filename, sub_lang, sub_name, sub_format, expected_real_ext=None):
return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) return replace_extension(filename, (sub_name + '.' if sub_name else '') + sub_lang + '.' + sub_format, expected_real_ext)
def date_from_str(date_str): def date_from_str(date_str):