Merge 05743aa624
into d65d89183f
This commit is contained in:
commit
623d90dc22
|
@ -1814,6 +1814,38 @@ class InfoExtractor(object):
|
|||
last_stream_inf = {}
|
||||
return formats
|
||||
|
||||
def _parse_m3u8_subtitles(self, m3u8_doc, m3u8_url):
|
||||
"""
|
||||
Parse subtitles from m3u8 file.
|
||||
Please avoid downloading the m3u8 twice.
|
||||
"""
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
subtitles = {}
|
||||
|
||||
def extract_media(x_media_line):
|
||||
media = parse_m3u8_attributes(x_media_line)
|
||||
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
|
||||
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
|
||||
if not (media_type and group_id and name):
|
||||
return
|
||||
# Check for subtitles
|
||||
if media_type not in ('SUBTITLES'):
|
||||
return
|
||||
subtitle_url = media.get('URI')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault(media.get('LANGUAGE'), []).append({
|
||||
'url': format_url(subtitle_url),
|
||||
})
|
||||
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
|
||||
return subtitles
|
||||
|
||||
@staticmethod
|
||||
def _xpath_ns(path, namespace=None):
|
||||
if not namespace:
|
||||
|
|
|
@ -107,11 +107,34 @@ class TV4IE(InfoExtractor):
|
|||
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Download manifest and extract subtitles. Extracting formats
|
||||
# using this result resulted in an error. This means the
|
||||
# manifest is currently being downloaded twice, which is not
|
||||
# great.
|
||||
res = self._download_webpage_handle(
|
||||
manifest_url, video_id,
|
||||
note='Downloading subtitle information',
|
||||
errnote='Failed to download subtitle information',
|
||||
fatal=True, data=None, headers={}, query={})
|
||||
|
||||
if res:
|
||||
m3u8_doc, urlh = res
|
||||
subtitles = self._parse_m3u8_subtitles(m3u8_doc, manifest_url)
|
||||
# Hardcode webvtt for now
|
||||
for item in subtitles:
|
||||
# List inside dictionary
|
||||
# Modify extension
|
||||
url = subtitles[item][0]['url'].replace('m3u8', 'webvtt')
|
||||
subtitles[item][0]['url'] = url
|
||||
subtitles[item][0]['ext'] = 'vtt'
|
||||
else:
|
||||
subtitles = {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
# 'subtitles': subtitles,
|
||||
'subtitles': subtitles,
|
||||
'description': info.get('description'),
|
||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
|
|
Loading…
Reference in New Issue