mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-26 02:14:32 +01:00
[generic] utf8 decode before re.match(), for Python 3
Otherwise we raise TypeError: can't use a string pattern on a bytes-like object This perhaps argues for putting it in is_html(), which already does this decoding. But of course plain whitespace isn't just html. So perhaps renaming is_html()? I dunno what is simpler. Let's start with this.
This commit is contained in:
parent
00bc75ca01
commit
a5d5a2c068
@ -1759,7 +1759,7 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
|
||||
if re.match(r'^\s+$', first_bytes):
|
||||
if re.match(r'^\s+$', first_bytes.decode('utf-8', 'replace')):
|
||||
self._downloader.report_warning(
|
||||
'First block is just whitespace? Continuing...')
|
||||
elif not is_html(first_bytes):
|
||||
|
Loading…
Reference in New Issue
Block a user