From 974aaa407bae67663d6b73a8c681227ea09d3079 Mon Sep 17 00:00:00 2001 From: Samik Some Date: Fri, 7 Feb 2020 18:27:32 +0530 Subject: [PATCH] Add subtitle support for roosterteeth.com --- youtube_dl/extractor/roosterteeth.py | 34 ++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 8883639b2..1b7a663b6 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -86,9 +86,11 @@ class RoosterTeethIE(InfoExtractor): api_episode_url = self._EPISODE_BASE_URL + display_id try: - m3u8_url = self._download_json( + video_data = self._download_json( api_episode_url + '/videos', display_id, - 'Downloading video JSON metadata')['data'][0]['attributes']['url'] + 'Downloading video JSON metadata')['data'][0] + m3u8_url = video_data['attributes']['url'] + subtitle_m3u8_url = video_data['links']['download'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if self._parse_json(e.cause.read().decode(), display_id).get('access') is False: @@ -118,6 +120,33 @@ class RoosterTeethIE(InfoExtractor): 'id': k, 'url': img_url, }) + + subtitles = {} + res = self._download_webpage_handle( + subtitle_m3u8_url, display_id, + 'Downloading m3u8 information', + 'Failed to download m3u8 information', + fatal=True, data=None, headers={}, query={}) + if res is not False: + subtitle_m3u8_doc, _ = res + for line in subtitle_m3u8_doc.split('\n'): + if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line: + parts = line.split(',') + for part in parts: + if 'LANGUAGE' in part: + lang = part[part.index('=')+2:-1] + elif 'URI' in part: + uri = part[part.index('=')+2:-1] + res = self._download_webpage_handle( + uri, display_id, + 'Downloading m3u8 information', + 'Failed to download m3u8 information', + fatal=True, data=None, headers={}, query={}) + doc, _ = res + for l in doc.split('\n'): + if not l.startswith('#'): + subtitles[lang] = [{'url':uri[:-uri[::-1].index('/')]+l}] + break return { 'id': video_id, @@ -134,4 +163,5 @@ class RoosterTeethIE(InfoExtractor): 'formats': formats, 'channel_id': attributes.get('channel_id'), 'duration': int_or_none(attributes.get('length')), + 'subtitles': subtitles }