From a38b8a609b7687a40471dd3f61d8c031f687f9c9 Mon Sep 17 00:00:00 2001 From: Will Beaufoy Date: Sat, 25 Apr 2020 19:17:29 +0100 Subject: [PATCH] [cbsnews] Handle iframes with src (closes #24790) Currently only iframes with a data-src attribute are recognised by the extractor, meaning no video is found for the URL in the linked bug (extraction fails with RegexNotFoundError). This fix removes "data-" from the regex pattern, meaning both data-src and src will be matched. Technically something like xyzsrc would be matched as well, but I do not think this is a problem. --- youtube_dl/extractor/cbsnews.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 345debcf0..bccc09cc9 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -95,7 +95,8 @@ class CBSNewsIE(CBSIE): webpage = self._download_webpage(url, display_id) entries = [] - for embed_url in re.findall(r']+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): + # This regex is intended to match attributes src and data-src + for embed_url in re.findall(r']+src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key())) if entries: return self.playlist_result(