From 00bc75ca0115fa57ffc700357ba6ef86f3355bb9 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 19 Mar 2017 21:01:47 -0400 Subject: [PATCH] [generic] Allow parsing when first 512 bytes are whitespace is_html(first_bytes) will fail if the first 512 bytes of the URL are all whitespace, for some weird reason. Such a case probably is not a direct video link, the case we're concerned about downloading inadvertently, since that wouldn't be a valid video binary file format. But it's still peculiar, so don't silently ignore it -- print a warning and continue on. --- youtube_dl/extractor/generic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b70b1dd6d..54fadf7d8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1759,9 +1759,12 @@ class GenericIE(InfoExtractor): self._sort_formats(info_dict['formats']) return info_dict - # Maybe it's a direct link to a video? - # Be careful not to download the whole thing! - if not is_html(first_bytes): + if re.match(r'^\s+$', first_bytes): + self._downloader.report_warning( + 'First block is just whitespace? Continuing...') + elif not is_html(first_bytes): + # Maybe it's a direct link to a video? + # Be careful not to download the whole thing! self._downloader.report_warning( 'URL could be a direct video link, returning it as such.') info_dict.update({