1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 18:34:32 +01:00

Download and parse subtitle information from m3u8

The format is hardcoded to vtt for now and works with mpv out of the box. Test with e.g. youtuble-dl https://www.tv4play.se/program/scandinavian-star/12515629 --all-subs (and open with mpv with: "mpv --audio-file-auto=fuzzy"
This commit is contained in:
swedebugia 2020-04-22 18:22:45 +02:00 committed by GitHub
parent eedd717032
commit 4635796778
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -107,16 +107,29 @@ class TV4IE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
# The subtitles are defined in the manifest_url like this: # Download manifest and extract subtitles. Extracting formats
# # SUBTITLES groups # using this result resulted in an error. This means the
# #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="textstream",NAME="Swedish",LANGUAGE="sv",AUTOSELECT=YES,DEFAULT=YES,URI="bmetgl4z0mr(12579349_ISMUSP)-textstream_swe=3000.m3u8" # manifest is currently being downloaded twice, which is not
# but I don't know yet how to extract it dynamically from there so they are hardcoded as a start. # great.
hardcoded_swedish_subs_url = manifest_url[:-5] + "-textstream_swe=3000.webvtt" res = self._download_webpage_handle(
manifest_url, video_id,
note='Downloading subtitle information',
errnote='Failed to download subtitle information',
fatal=True, data=None, headers={}, query={})
if res:
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
subtitles = self._parse_m3u8_subtitles(m3u8_doc, manifest_url)
# Hardcode webvtt for now
for item in subtitles:
# List inside dictionary
# Modify extension
url = subtitles[item][0]['url'].replace('m3u8', 'webvtt')
subtitles[item][0]['url'] = url
subtitles[item][0]['ext'] = 'vtt'
else:
subtitles = {} subtitles = {}
subtitles.setdefault('sv', []).append({
'url': hardcoded_swedish_subs_url,
'ext': 'vtt'
})
return { return {
'id': video_id, 'id': video_id,