Support multiple subs per lang, differentiate by name

Add a new column 'name' to --list-subs output. Output all the subtitles for a given lang with a different 'name' with --write-sub --sub-lang foo. The name will be a component in the output subtitle file names, before the language code, if it is not empty. Also adapt ffmpeg postprocessing funcs accoordingly.
2024-11-22 16:44:32 +01:00 · 2020-07-21 21:35:55 +03:00 · 2020-07-21 21:35:55 +03:00 · 51b70469ed
commit 51b70469ed
parent c23c35c9c1
3 changed files with 115 additions and 94 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1673,26 +1673,30 @@ class YoutubeDL(object):

        formats_query = self.params.get('subtitlesformat', 'best')
        formats_preference = formats_query.split('/') if formats_query else []
-        subs = {}
+        subs = collections.defaultdict(list)
        for lang in requested_langs:
            formats = available_subs.get(lang)
            if formats is None:
                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
                continue
-            for ext in formats_preference:
-                if ext == 'best':
-                    f = formats[-1]
-                    break
-                matches = list(filter(lambda f: f['ext'] == ext, formats))
-                if matches:
-                    f = matches[-1]
-                    break
-            else:
-                f = formats[-1]
-                self.report_warning(
-                    'No subtitle format found matching "%s" for language %s, '
-                    'using %s' % (formats_query, lang, f['ext']))
-            subs[lang] = f
+            named = collections.defaultdict(list)
+            for f in formats:
+                named[f.get('name', '')].append(f)
+            for name, fmts in named.items():
+                for ext in formats_preference:
+                    if ext == 'best':
+                        f = fmts[-1]
+                        break
+                    matches = [f for f in fmts if f['ext'] == ext]
+                    if matches:
+                        f = matches[-1]
+                        break
+                    else:
+                        f = fmts[-1]
+                        self.report_warning(
+                            'No subtitle format found matching "%s" for language %s, '
+                            'using %s' % (formats_query, lang, f['ext']))
+                subs[lang].append(f)
        return subs

    def __forced_printings(self, info_dict, filename, incomplete):
@ -1813,32 +1817,34 @@ class YoutubeDL(object):
            # that way it will silently go on when used with unsupporting IE
            subtitles = info_dict['requested_subtitles']
            ie = self.get_info_extractor(info_dict['extractor_key'])
-            for sub_lang, sub_info in subtitles.items():
-                sub_format = sub_info['ext']
-                sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
-                    self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
-                else:
-                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
-                    if sub_info.get('data') is not None:
-                        try:
-                            # Use newline='' to prevent conversion of newline characters
-                            # See https://github.com/ytdl-org/youtube-dl/issues/10268
-                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
-                                subfile.write(sub_info['data'])
-                        except (OSError, IOError):
-                            self.report_error('Cannot write subtitles file ' + sub_filename)
-                            return
+            for sub_lang, sub_info_list in subtitles.items():
+                for sub_info in sub_info_list:
+                    sub_format = sub_info['ext']
+                    sub_name = sub_info.get('name', '')
+                    sub_filename = subtitles_filename(filename, sub_lang, sub_name, sub_format, info_dict.get('ext'))
+                    if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+                        self.to_screen('[info] Video subtitle %s is already present' % (sub_filename))
                    else:
-                        try:
-                            sub_data = ie._request_webpage(
-                                sub_info['url'], info_dict['id'], note=False).read()
-                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
-                                subfile.write(sub_data)
-                        except (ExtractorError, IOError, OSError, ValueError) as err:
-                            self.report_warning('Unable to download subtitle for "%s": %s' %
-                                                (sub_lang, error_to_compat_str(err)))
-                            continue
+                        self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+                        if sub_info.get('data') is not None:
+                            try:
+                                # Use newline='' to prevent conversion of newline characters
+                                # See https://github.com/ytdl-org/youtube-dl/issues/10268
+                                with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+                                    subfile.write(sub_info['data'])
+                            except (OSError, IOError):
+                                self.report_error('Cannot write subtitles file ' + sub_filename)
+                                return
+                        else:
+                            try:
+                                sub_data = ie._request_webpage(
+                                    sub_info['url'], info_dict['id'], note=False).read()
+                                with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                                    subfile.write(sub_data)
+                            except (ExtractorError, IOError, OSError, ValueError) as err:
+                                self.report_warning('Unable to download subtitle for "%s": %s' %
+                                                    (sub_lang, error_to_compat_str(err)))
+                                continue

        if self.params.get('writeinfojson', False):
            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@ -2226,10 +2232,15 @@ class YoutubeDL(object):
            return
        self.to_screen(
            'Available %s for %s:' % (name, video_id))
+        table = []
+        for lang, formats in subtitles.items():
+            named = collections.defaultdict(list)
+            for f in formats:
+                named[f.get('name', '')].append(f['ext'])
+            for name in named.keys():
+                table.append([lang, name, ', '.join(e for e in reversed(named[name]))])
        self.to_screen(render_table(
-            ['Language', 'formats'],
-            [[lang, ', '.join(f['ext'] for f in reversed(formats))]
-                for lang, formats in subtitles.items()]))
+            ['Language', 'name', 'formats'], table))

    def urlopen(self, req):
        """ Start an HTTP download """
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -385,19 +385,21 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
        filename = information['filepath']

        ext = information['ext']
-        sub_langs = []
+        sub_langs = set()
        sub_filenames = []
        webm_vtt_warn = False

-        for lang, sub_info in subtitles.items():
-            sub_ext = sub_info['ext']
-            if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
-                sub_langs.append(lang)
-                sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
-            else:
-                if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
-                    webm_vtt_warn = True
-                    self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+        for lang, sub_info_list in subtitles.items():
+            for sub_info in sub_info_list:
+                sub_ext = sub_info['ext']
+                if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+                    sub_langs.add(lang)
+                    sub_name = sub_info.get('name', '')
+                    sub_filenames.append(subtitles_filename(filename, lang, sub_name, sub_ext, ext))
+                else:
+                    if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+                        webm_vtt_warn = True
+                        self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')

        if not sub_langs:
            return [], information
@ -611,47 +613,55 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
            return [], info
        self._downloader.to_screen('[ffmpeg] Converting subtitles')
        sub_filenames = []
-        for lang, sub in subs.items():
-            ext = sub['ext']
-            if ext == new_ext:
-                self._downloader.to_screen(
-                    '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
-                continue
-            old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
-            sub_filenames.append(old_file)
-            new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
-
-            if ext in ('dfxp', 'ttml', 'tt'):
-                self._downloader.report_warning(
-                    'You have requested to convert dfxp (TTML) subtitles into another format, '
-                    'which results in style information loss')
-
-                dfxp_file = old_file
-                srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
-
-                with open(dfxp_file, 'rb') as f:
-                    srt_data = dfxp2srt(f.read())
-
-                with io.open(srt_file, 'wt', encoding='utf-8') as f:
-                    f.write(srt_data)
-                old_file = srt_file
-
-                subs[lang] = {
-                    'ext': 'srt',
-                    'data': srt_data
-                }
-
-                if new_ext == 'srt':
+        for lang, sublist in subs.items():
+            for sub in sublist:
+                ext = sub['ext']
+                if ext == new_ext:
+                    self._downloader.to_screen(
+                        '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
                    continue
-                else:
-                    sub_filenames.append(srt_file)
+                name = sub.get('name', '')
+                old_file = subtitles_filename(filename, lang, name, ext, info.get('ext'))
+                sub_filenames.append(old_file)
+                new_file = subtitles_filename(filename, lang, name, new_ext, info.get('ext'))

-            self.run_ffmpeg(old_file, new_file, ['-f', new_format])
+                if ext in ('dfxp', 'ttml', 'tt'):
+                    self._downloader.report_warning(
+                        'You have requested to convert dfxp (TTML) subtitles into another format, '
+                        'which results in style information loss')

-            with io.open(new_file, 'rt', encoding='utf-8') as f:
-                subs[lang] = {
-                    'ext': new_ext,
-                    'data': f.read(),
-                }
+                    dfxp_file = old_file
+                    srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
+
+                    with open(dfxp_file, 'rb') as f:
+                        srt_data = dfxp2srt(f.read())
+
+                    with io.open(srt_file, 'wt', encoding='utf-8') as f:
+                        f.write(srt_data)
+                    old_file = srt_file
+
+                    slist_new = [s for s in subs[lang] if s.get('name', '') != name]
+                    slist_new.append({
+                        'name': name,
+                        'ext': 'srt',
+                        'data': srt_data
+                    })
+                    subs[lang] = slist_new
+
+                    if new_ext == 'srt':
+                        continue
+                    else:
+                        sub_filenames.append(srt_file)
+
+                self.run_ffmpeg(old_file, new_file, ['-f', new_format])
+
+                with io.open(new_file, 'rt', encoding='utf-8') as f:
+                    slist_new = [s for s in subs[lang] if s.get('name', '') != name]
+                    slist_new.append({
+                        'name': name,
+                        'ext': new_ext,
+                        'data': f.read(),
+                    })
+                    subs[lang] = slist_new

        return sub_filenames, info
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -3002,8 +3002,8 @@ def determine_ext(url, default_ext='unknown_video'):
        return default_ext


-def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
-    return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
+def subtitles_filename(filename, sub_lang, sub_name, sub_format, expected_real_ext=None):
+    return replace_extension(filename, (sub_name + '.' if sub_name else '') + sub_lang + '.' + sub_format, expected_real_ext)


 def date_from_str(date_str):