From dd6ba65ff7387953270b4b2d5af48d2a0cff43d8 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Sun, 19 Apr 2020 19:22:36 -0700 Subject: [PATCH 01/10] Added get_suntitles function to funimation.py --- youtube_dl/extractor/funimation.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 8bbedca26..8fbff9bbb 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -105,6 +105,7 @@ class FunimationIE(InfoExtractor): if series: title = '%s - %s' % (series, title) description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) + subtitles = self.get_subtitles(video_id, display_id) try: headers = {} @@ -149,6 +150,25 @@ class FunimationIE(InfoExtractor): 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), 'episode_number': int_or_none(title_data.get('episodeNum')), 'episode': episode, + 'subtitles': subtitles, 'season_id': title_data.get('seriesId'), 'formats': formats, } + + def get_subtitles(self, video_id, display_id): + #TODO get url based on value passed in e.g., https://www.funimationnow.uk/ + player_url = 'https://www.funimation.com/player/' + video_id + player_page = self._download_webpage(player_url, display_id) + text_tracks_search = self._search_regex( + r'("textTracks": \[{.+?}\])', + player_page, 'player data', default='') + text_tracks_search = '{' + text_tracks_search + '}' + player_json = self._parse_json(text_tracks_search, display_id, js_to_json, fatal=False) or {} + subtitles = {} + for x in player_json['textTracks']: + data = {'url': x['src']} + if x['language'] in subtitles: + subtitles[x['language']].append(data) + else: + subtitles[x['language']] = [data] + return subtitles From bc455bfea2b568ff59f188d64afe522a9d0784e3 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Sun, 19 Apr 2020 20:32:57 -0700 Subject: [PATCH 02/10] Get player url from relative path --- youtube_dl/extractor/funimation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 8fbff9bbb..979e0fee8 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -11,7 +11,8 @@ from ..utils import ( int_or_none, js_to_json, ExtractorError, - urlencode_postdata + urlencode_postdata, + urljoin ) @@ -105,7 +106,7 @@ class FunimationIE(InfoExtractor): if series: title = '%s - %s' % (series, title) description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) - subtitles = self.get_subtitles(video_id, display_id) + subtitles = self.get_subtitles(url, video_id, display_id) try: headers = {} @@ -155,9 +156,8 @@ class FunimationIE(InfoExtractor): 'formats': formats, } - def get_subtitles(self, video_id, display_id): - #TODO get url based on value passed in e.g., https://www.funimationnow.uk/ - player_url = 'https://www.funimation.com/player/' + video_id + def get_subtitles(self, url, video_id, display_id): + player_url = urljoin(url, '/player/' + video_id) player_page = self._download_webpage(player_url, display_id) text_tracks_search = self._search_regex( r'("textTracks": \[{.+?}\])', From d4e94cb1105db9d2645aeddae12a02825d0c98c4 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Sun, 19 Apr 2020 20:53:06 -0700 Subject: [PATCH 03/10] Account for failure to find "textTracks" --- youtube_dl/extractor/funimation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 979e0fee8..f3bf6ca02 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -164,11 +164,12 @@ class FunimationIE(InfoExtractor): player_page, 'player data', default='') text_tracks_search = '{' + text_tracks_search + '}' player_json = self._parse_json(text_tracks_search, display_id, js_to_json, fatal=False) or {} + text_tracks = player_json.get('textTracks', []) subtitles = {} - for x in player_json['textTracks']: - data = {'url': x['src']} - if x['language'] in subtitles: - subtitles[x['language']].append(data) + for text_track in text_tracks: + data = {'url': text_track['src']} + if text_track['language'] in subtitles: + subtitles[text_track['language']].append(data) else: - subtitles[x['language']] = [data] + subtitles[text_track['language']] = [data] return subtitles From 460d7aef86efdc4d8f80a8aaed053c2adf4c9aa0 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Mon, 20 Apr 2020 00:26:15 -0700 Subject: [PATCH 04/10] Simplified json parsing logic --- youtube_dl/extractor/funimation.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index f3bf6ca02..1f1661bd9 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -159,12 +159,10 @@ class FunimationIE(InfoExtractor): def get_subtitles(self, url, video_id, display_id): player_url = urljoin(url, '/player/' + video_id) player_page = self._download_webpage(player_url, display_id) - text_tracks_search = self._search_regex( - r'("textTracks": \[{.+?}\])', + text_tracks_json_string = self._search_regex( + r'"textTracks": (\[{.+?}\])', player_page, 'player data', default='') - text_tracks_search = '{' + text_tracks_search + '}' - player_json = self._parse_json(text_tracks_search, display_id, js_to_json, fatal=False) or {} - text_tracks = player_json.get('textTracks', []) + text_tracks = self._parse_json(text_tracks_json_string, display_id, js_to_json, fatal=False) or [] subtitles = {} for text_track in text_tracks: data = {'url': text_track['src']} From 157cf0bb37edfa97a1f816eb63ae948a5b24c425 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Mon, 20 Apr 2020 00:38:10 -0700 Subject: [PATCH 05/10] Refactored vars to be easier to understand --- youtube_dl/extractor/funimation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 1f1661bd9..fc12e689f 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -165,9 +165,10 @@ class FunimationIE(InfoExtractor): text_tracks = self._parse_json(text_tracks_json_string, display_id, js_to_json, fatal=False) or [] subtitles = {} for text_track in text_tracks: - data = {'url': text_track['src']} - if text_track['language'] in subtitles: - subtitles[text_track['language']].append(data) + url_element = {'url': text_track['src']} + language = text_track.get('language') + if language in subtitles: + subtitles[language].append(url_element) else: - subtitles[text_track['language']] = [data] + subtitles[language] = [url_element] return subtitles From ad60ff4700b55d4e93634b3fc508ec77e4a73312 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Mon, 20 Apr 2020 00:41:29 -0700 Subject: [PATCH 06/10] Shortened a line to 80 chars --- youtube_dl/extractor/funimation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index fc12e689f..703cec8ec 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -162,7 +162,8 @@ class FunimationIE(InfoExtractor): text_tracks_json_string = self._search_regex( r'"textTracks": (\[{.+?}\])', player_page, 'player data', default='') - text_tracks = self._parse_json(text_tracks_json_string, display_id, js_to_json, fatal=False) or [] + text_tracks = self._parse_json( + text_tracks_json_string, display_id, js_to_json, fatal=False) or [] subtitles = {} for text_track in text_tracks: url_element = {'url': text_track['src']} From dae0247122f14760331997a346863fc7d3887928 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Mon, 20 Apr 2020 10:00:45 -0700 Subject: [PATCH 07/10] Added check for robot detection on player page --- youtube_dl/extractor/funimation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 703cec8ec..7d4667315 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -162,6 +162,10 @@ class FunimationIE(InfoExtractor): text_tracks_json_string = self._search_regex( r'"textTracks": (\[{.+?}\])', player_page, 'player data', default='') + if not text_tracks_json_string: + # Funimation player page unavailable due to robot detection. + # Don't warn so that unit tests still pass this step. + return {} text_tracks = self._parse_json( text_tracks_json_string, display_id, js_to_json, fatal=False) or [] subtitles = {} From 9c433165625f57d59bb6d99585060ad7855ee2cb Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Mon, 20 Apr 2020 10:37:04 -0700 Subject: [PATCH 08/10] Swapped a square bracket for get --- youtube_dl/extractor/funimation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 7d4667315..6fa09c21a 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -170,7 +170,7 @@ class FunimationIE(InfoExtractor): text_tracks_json_string, display_id, js_to_json, fatal=False) or [] subtitles = {} for text_track in text_tracks: - url_element = {'url': text_track['src']} + url_element = {'url': text_track.get('src')} language = text_track.get('language') if language in subtitles: subtitles[language].append(url_element) From a8c4b05a9cde3592503837aceb21a809467f5f4c Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Tue, 21 Apr 2020 18:11:11 -0700 Subject: [PATCH 09/10] Use existing subtitle methods for extraction --- youtube_dl/extractor/funimation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 6fa09c21a..73608d115 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -106,7 +106,7 @@ class FunimationIE(InfoExtractor): if series: title = '%s - %s' % (series, title) description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) - subtitles = self.get_subtitles(url, video_id, display_id) + subtitles = self.extract_subtitles(url, video_id, display_id) try: headers = {} @@ -156,7 +156,7 @@ class FunimationIE(InfoExtractor): 'formats': formats, } - def get_subtitles(self, url, video_id, display_id): + def _get_subtitles(self, url, video_id, display_id): player_url = urljoin(url, '/player/' + video_id) player_page = self._download_webpage(player_url, display_id) text_tracks_json_string = self._search_regex( From 98e84c5b6f5f5b3bc94b288b9c239a2af35d1f33 Mon Sep 17 00:00:00 2001 From: Ben Bryant Date: Thu, 23 Apr 2020 16:10:20 -0700 Subject: [PATCH 10/10] Updated the name of the subtitle search regex --- youtube_dl/extractor/funimation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 73608d115..118a10293 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -161,7 +161,7 @@ class FunimationIE(InfoExtractor): player_page = self._download_webpage(player_url, display_id) text_tracks_json_string = self._search_regex( r'"textTracks": (\[{.+?}\])', - player_page, 'player data', default='') + player_page, 'subtitles data', default='') if not text_tracks_json_string: # Funimation player page unavailable due to robot detection. # Don't warn so that unit tests still pass this step.