From d89e94415bcef6a801131b37e48eba1cc9aa3dea Mon Sep 17 00:00:00 2001 From: rj Date: Sat, 18 Jul 2020 23:55:21 +0100 Subject: [PATCH 1/2] [theweatherchannel] Fix regex and tests for extraction --- youtube_dl/extractor/theweatherchannel.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py index c34a49d03..6c947f0a0 100644 --- a/youtube_dl/extractor/theweatherchannel.py +++ b/youtube_dl/extractor/theweatherchannel.py @@ -12,7 +12,7 @@ class TheWeatherChannelIE(ThePlatformIE): _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', - 'md5': 'ab924ac9574e79689c24c6b95e957def', + 'md5': 'c4cbe74c9c17c5676b704b950b73dd92', 'info_dict': { 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8', 'ext': 'mp4', @@ -26,10 +26,7 @@ class TheWeatherChannelIE(ThePlatformIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), display_id) - video_id = drupal_settings['twc']['contexts']['node']['uuid'] + video_id = self._search_regex(r'"activeVideo":{"id":"(.*?)"',webpage, 'video id') video_data = self._download_json( 'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) seo_meta = video_data.get('seometa', {}) From eb0a5a4d5a7889eaea2c0588ad6b9a59196741dd Mon Sep 17 00:00:00 2001 From: rj Date: Sun, 19 Jul 2020 00:12:51 +0100 Subject: [PATCH 2/2] Fix flake8 error --- youtube_dl/extractor/theweatherchannel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py index 6c947f0a0..af169e1bd 100644 --- a/youtube_dl/extractor/theweatherchannel.py +++ b/youtube_dl/extractor/theweatherchannel.py @@ -26,7 +26,7 @@ class TheWeatherChannelIE(ThePlatformIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'"activeVideo":{"id":"(.*?)"',webpage, 'video id') + video_id = self._search_regex(r'"activeVideo":{"id":"(.*?)"', webpage, 'video id') video_data = self._download_json( 'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) seo_meta = video_data.get('seometa', {})