diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index aed988b88..85fe1e34a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3979,18 +3979,27 @@ def js_to_json(code): def fix_kv(m): v = m.group(0) + if v in ('true', 'false', 'null'): return v elif v.startswith('/*') or v.startswith('//') or v == ',': return "" if v[0] in ("'", '"'): + + v = v[1:-1] + + # convert Javascript's octal escape sequences (and '\0') + # into valid JSON escape sequences ( e.g '\347' => '\u00e7', '\0' => '\u0000' ) + # regex based on https://mathiasbynens.be/notes/javascript-escapes + v = re.sub(r'\\([0-7]{1,3})', lambda x: "\\u%04x" % int(x.group(1), 8), v) + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', "\\'": "'", '\\\n': '', '\\x': '\\u00', - }.get(m.group(0), m.group(0)), v[1:-1]) + }.get(m.group(0), m.group(0)), v) for regex, base in INTEGER_TABLE: im = re.match(regex, v) @@ -4001,8 +4010,8 @@ def js_to_json(code): return '"%s"' % v return re.sub(r'''(?sx) - "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]?))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]?))*[^'\\]*'| {comment}|,(?={skip}[\]}}])| (?:(?