1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 02:14:32 +01:00

ČT currently uses WebVTT subtitles

So the extractor should save the subtitle file with correct extension and omit now unnecessary call of `_fix_subtitles` method.
This commit is contained in:
Václav Navrátil 2019-01-31 23:17:17 +01:00 committed by GitHub
parent 49fe4175ae
commit 5f9639c236
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -201,40 +201,15 @@ class CeskaTelevizeIE(InfoExtractor):
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs): def _get_subtitles(self, episode_id, subs):
original_subtitles = self._download_webpage( vtt_subs = self._download_webpage(
subs[0]['url'], episode_id, 'Downloading subtitles') subs[0]['url'], episode_id, 'Downloading subtitles')
srt_subs = self._fix_subtitles(original_subtitles)
return { return {
'cs': [{ 'cs': [{
'ext': 'srt', 'ext': 'vtt',
'data': srt_subs, 'data': vtt_subs,
}] }]
} }
@staticmethod
def _fix_subtitles(subtitles):
""" Convert millisecond-based subtitles to SRT """
def _msectotimecode(msec):
""" Helper utility to convert milliseconds to timecode """
components = []
for divider in [1000, 60, 60, 100]:
components.append(msec % divider)
msec //= divider
return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
def _fix_subtitle(subtitle):
for line in subtitle.splitlines():
m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
if m:
yield m.group(1)
start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
yield '{0} --> {1}'.format(start, stop)
else:
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
class CeskaTelevizePoradyIE(InfoExtractor): class CeskaTelevizePoradyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'