diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 19370f62b..be42aa691 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1673,26 +1673,30 @@ class YoutubeDL(object): formats_query = self.params.get('subtitlesformat', 'best') formats_preference = formats_query.split('/') if formats_query else [] - subs = {} + subs = collections.defaultdict(list) for lang in requested_langs: formats = available_subs.get(lang) if formats is None: self.report_warning('%s subtitles not available for %s' % (lang, video_id)) continue - for ext in formats_preference: - if ext == 'best': - f = formats[-1] - break - matches = list(filter(lambda f: f['ext'] == ext, formats)) - if matches: - f = matches[-1] - break - else: - f = formats[-1] - self.report_warning( - 'No subtitle format found matching "%s" for language %s, ' - 'using %s' % (formats_query, lang, f['ext'])) - subs[lang] = f + named = collections.defaultdict(list) + for f in formats: + named[f.get('name', '')].append(f) + for name, fmts in named.items(): + for ext in formats_preference: + if ext == 'best': + f = fmts[-1] + break + matches = [f for f in fmts if f['ext'] == ext] + if matches: + f = matches[-1] + break + else: + f = fmts[-1] + self.report_warning( + 'No subtitle format found matching "%s" for language %s, ' + 'using %s' % (formats_query, lang, f['ext'])) + subs[lang].append(f) return subs def __forced_printings(self, info_dict, filename, incomplete): @@ -1813,32 +1817,34 @@ class YoutubeDL(object): # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] ie = self.get_info_extractor(info_dict['extractor_key']) - for sub_lang, sub_info in subtitles.items(): - sub_format = sub_info['ext'] - sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) - else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - if sub_info.get('data') is not None: - try: - # Use newline='' to prevent conversion of newline characters - # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: - subfile.write(sub_info['data']) - except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + sub_filename) - return + for sub_lang, sub_info_list in subtitles.items(): + for sub_info in sub_info_list: + sub_format = sub_info['ext'] + sub_name = sub_info.get('name', '') + sub_filename = subtitles_filename(filename, sub_lang, sub_name, sub_format, info_dict.get('ext')) + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): + self.to_screen('[info] Video subtitle %s is already present' % (sub_filename)) else: - try: - sub_data = ie._request_webpage( - sub_info['url'], info_dict['id'], note=False).read() - with io.open(encodeFilename(sub_filename), 'wb') as subfile: - subfile.write(sub_data) - except (ExtractorError, IOError, OSError, ValueError) as err: - self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_compat_str(err))) - continue + self.to_screen('[info] Writing video subtitles to: ' + sub_filename) + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/ytdl-org/youtube-dl/issues/10268 + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + except (OSError, IOError): + self.report_error('Cannot write subtitles file ' + sub_filename) + return + else: + try: + sub_data = ie._request_webpage( + sub_info['url'], info_dict['id'], note=False).read() + with io.open(encodeFilename(sub_filename), 'wb') as subfile: + subfile.write(sub_data) + except (ExtractorError, IOError, OSError, ValueError) as err: + self.report_warning('Unable to download subtitle for "%s": %s' % + (sub_lang, error_to_compat_str(err))) + continue if self.params.get('writeinfojson', False): infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) @@ -2226,10 +2232,15 @@ class YoutubeDL(object): return self.to_screen( 'Available %s for %s:' % (name, video_id)) + table = [] + for lang, formats in subtitles.items(): + named = collections.defaultdict(list) + for f in formats: + named[f.get('name', '')].append(f['ext']) + for name in named.keys(): + table.append([lang, name, ', '.join(e for e in reversed(named[name]))]) self.to_screen(render_table( - ['Language', 'formats'], - [[lang, ', '.join(f['ext'] for f in reversed(formats))] - for lang, formats in subtitles.items()])) + ['Language', 'name', 'formats'], table)) def urlopen(self, req): """ Start an HTTP download """ diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 5f7298345..f360dc25c 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -385,19 +385,21 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): filename = information['filepath'] ext = information['ext'] - sub_langs = [] + sub_langs = set() sub_filenames = [] webm_vtt_warn = False - for lang, sub_info in subtitles.items(): - sub_ext = sub_info['ext'] - if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': - sub_langs.append(lang) - sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) - else: - if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': - webm_vtt_warn = True - self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + for lang, sub_info_list in subtitles.items(): + for sub_info in sub_info_list: + sub_ext = sub_info['ext'] + if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + sub_langs.add(lang) + sub_name = sub_info.get('name', '') + sub_filenames.append(subtitles_filename(filename, lang, sub_name, sub_ext, ext)) + else: + if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': + webm_vtt_warn = True + self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') if not sub_langs: return [], information @@ -611,47 +613,55 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): return [], info self._downloader.to_screen('[ffmpeg] Converting subtitles') sub_filenames = [] - for lang, sub in subs.items(): - ext = sub['ext'] - if ext == new_ext: - self._downloader.to_screen( - '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) - continue - old_file = subtitles_filename(filename, lang, ext, info.get('ext')) - sub_filenames.append(old_file) - new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) - - if ext in ('dfxp', 'ttml', 'tt'): - self._downloader.report_warning( - 'You have requested to convert dfxp (TTML) subtitles into another format, ' - 'which results in style information loss') - - dfxp_file = old_file - srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext')) - - with open(dfxp_file, 'rb') as f: - srt_data = dfxp2srt(f.read()) - - with io.open(srt_file, 'wt', encoding='utf-8') as f: - f.write(srt_data) - old_file = srt_file - - subs[lang] = { - 'ext': 'srt', - 'data': srt_data - } - - if new_ext == 'srt': + for lang, sublist in subs.items(): + for sub in sublist: + ext = sub['ext'] + if ext == new_ext: + self._downloader.to_screen( + '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) continue - else: - sub_filenames.append(srt_file) + name = sub.get('name', '') + old_file = subtitles_filename(filename, lang, name, ext, info.get('ext')) + sub_filenames.append(old_file) + new_file = subtitles_filename(filename, lang, name, new_ext, info.get('ext')) - self.run_ffmpeg(old_file, new_file, ['-f', new_format]) + if ext in ('dfxp', 'ttml', 'tt'): + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') - with io.open(new_file, 'rt', encoding='utf-8') as f: - subs[lang] = { - 'ext': new_ext, - 'data': f.read(), - } + dfxp_file = old_file + srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext')) + + with open(dfxp_file, 'rb') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + old_file = srt_file + + slist_new = [s for s in subs[lang] if s.get('name', '') != name] + slist_new.append({ + 'name': name, + 'ext': 'srt', + 'data': srt_data + }) + subs[lang] = slist_new + + if new_ext == 'srt': + continue + else: + sub_filenames.append(srt_file) + + self.run_ffmpeg(old_file, new_file, ['-f', new_format]) + + with io.open(new_file, 'rt', encoding='utf-8') as f: + slist_new = [s for s in subs[lang] if s.get('name', '') != name] + slist_new.append({ + 'name': name, + 'ext': new_ext, + 'data': f.read(), + }) + subs[lang] = slist_new return sub_filenames, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d1eca3760..40184acb6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3002,8 +3002,8 @@ def determine_ext(url, default_ext='unknown_video'): return default_ext -def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): - return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) +def subtitles_filename(filename, sub_lang, sub_name, sub_format, expected_real_ext=None): + return replace_extension(filename, (sub_name + '.' if sub_name else '') + sub_lang + '.' + sub_format, expected_real_ext) def date_from_str(date_str):