From 00bc75ca0115fa57ffc700357ba6ef86f3355bb9 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sun, 19 Mar 2017 21:01:47 -0400
Subject: [PATCH] [generic] Allow parsing when first 512 bytes are whitespace

is_html(first_bytes) will fail if the first 512 bytes of the URL are
all whitespace, for some weird reason. Such a case probably is not a
direct video link, the case we're concerned about downloading
inadvertently, since that wouldn't be a valid video binary file
format.

But it's still peculiar, so don't silently ignore it -- print a
warning and continue on.
---
 youtube_dl/extractor/generic.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index b70b1dd6d..54fadf7d8 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1759,9 +1759,12 @@ class GenericIE(InfoExtractor):
             self._sort_formats(info_dict['formats'])
             return info_dict
 
-        # Maybe it's a direct link to a video?
-        # Be careful not to download the whole thing!
-        if not is_html(first_bytes):
+        if re.match(r'^\s+$', first_bytes):
+            self._downloader.report_warning(
+                'First block is just whitespace? Continuing...')
+        elif not is_html(first_bytes):
+            # Maybe it's a direct link to a video?
+            # Be careful not to download the whole thing!
             self._downloader.report_warning(
                 'URL could be a direct video link, returning it as such.')
             info_dict.update({