1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 16:44:32 +01:00

Download and parse subtitle information from m3u8

The format is hardcoded to vtt for now and works with mpv out of the box. Test with e.g. youtuble-dl https://www.tv4play.se/program/scandinavian-star/12515629 --all-subs (and open with mpv with: "mpv --audio-file-auto=fuzzy"
This commit is contained in:
swedebugia 2020-04-22 18:22:45 +02:00 committed by GitHub
parent eedd717032
commit 4635796778
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -107,16 +107,29 @@ class TV4IE(InfoExtractor):
self._sort_formats(formats)
# The subtitles are defined in the manifest_url like this:
# # SUBTITLES groups
# #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="textstream",NAME="Swedish",LANGUAGE="sv",AUTOSELECT=YES,DEFAULT=YES,URI="bmetgl4z0mr(12579349_ISMUSP)-textstream_swe=3000.m3u8"
# but I don't know yet how to extract it dynamically from there so they are hardcoded as a start.
hardcoded_swedish_subs_url = manifest_url[:-5] + "-textstream_swe=3000.webvtt"
# Download manifest and extract subtitles. Extracting formats
# using this result resulted in an error. This means the
# manifest is currently being downloaded twice, which is not
# great.
res = self._download_webpage_handle(
manifest_url, video_id,
note='Downloading subtitle information',
errnote='Failed to download subtitle information',
fatal=True, data=None, headers={}, query={})
if res:
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
subtitles = self._parse_m3u8_subtitles(m3u8_doc, manifest_url)
# Hardcode webvtt for now
for item in subtitles:
# List inside dictionary
# Modify extension
url = subtitles[item][0]['url'].replace('m3u8', 'webvtt')
subtitles[item][0]['url'] = url
subtitles[item][0]['ext'] = 'vtt'
else:
subtitles = {}
subtitles.setdefault('sv', []).append({
'url': hardcoded_swedish_subs_url,
'ext': 'vtt'
})
return {
'id': video_id,