From 6206194c5ace95e5a825b4a58a395030804e7c20 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sun, 19 Mar 2017 20:52:25 -0400
Subject: [PATCH 1/3] [generic] Replace LazyYT test with skiplagged

discourse.ubuntu.com has gone away, repalce with skiplagged.com.
Be nice to have a non-frontpage URL that might be more stable,
though I don't have one. Maybe this should move to html
in test/test_InfoExtractor.py?
---
 youtube_dl/extractor/generic.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a71d6bac0..b70b1dd6d 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -902,12 +902,13 @@ class GenericIE(InfoExtractor):
         },
         # LazyYT
         {
-            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+            'url': 'https://skiplagged.com/',
             'info_dict': {
-                'id': '1986',
-                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+                'id': 'skiplagged',
+                'title': 'Skiplagged: The smart way to find cheap flights',
             },
-            'playlist_mincount': 2,
+            'playlist_mincount': 1,
+            'add_ie': ['Youtube'],
         },
         # Cinchcast embed
         {

From 00bc75ca0115fa57ffc700357ba6ef86f3355bb9 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sun, 19 Mar 2017 21:01:47 -0400
Subject: [PATCH 2/3] [generic] Allow parsing when first 512 bytes are
 whitespace

is_html(first_bytes) will fail if the first 512 bytes of the URL are
all whitespace, for some weird reason. Such a case probably is not a
direct video link, the case we're concerned about downloading
inadvertently, since that wouldn't be a valid video binary file
format.

But it's still peculiar, so don't silently ignore it -- print a
warning and continue on.
---
 youtube_dl/extractor/generic.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index b70b1dd6d..54fadf7d8 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1759,9 +1759,12 @@ class GenericIE(InfoExtractor):
             self._sort_formats(info_dict['formats'])
             return info_dict
 
-        # Maybe it's a direct link to a video?
-        # Be careful not to download the whole thing!
-        if not is_html(first_bytes):
+        if re.match(r'^\s+$', first_bytes):
+            self._downloader.report_warning(
+                'First block is just whitespace? Continuing...')
+        elif not is_html(first_bytes):
+            # Maybe it's a direct link to a video?
+            # Be careful not to download the whole thing!
             self._downloader.report_warning(
                 'URL could be a direct video link, returning it as such.')
             info_dict.update({

From a5d5a2c068b00a8118fa9e3c32a9d93f316b2edd Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sun, 19 Mar 2017 21:52:13 -0400
Subject: [PATCH 3/3] [generic] utf8 decode before re.match(), for Python 3

Otherwise we raise
  TypeError: can't use a string pattern on a bytes-like object
This perhaps argues for putting it in is_html(), which already
does this decoding. But of course plain whitespace isn't just
html. So perhaps renaming is_html()? I dunno what is simpler.
Let's start with this.
---
 youtube_dl/extractor/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 54fadf7d8..dc73d23ff 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1759,7 +1759,7 @@ class GenericIE(InfoExtractor):
             self._sort_formats(info_dict['formats'])
             return info_dict
 
-        if re.match(r'^\s+$', first_bytes):
+        if re.match(r'^\s+$', first_bytes.decode('utf-8', 'replace')):
             self._downloader.report_warning(
                 'First block is just whitespace? Continuing...')
         elif not is_html(first_bytes):