mirror of
https://codeberg.org/polarisfm/youtube-dl
synced 2024-11-22 16:44:32 +01:00
[utils.py:js_to_json] add support for octal escape sequences
This commit is contained in:
parent
2a5c26c980
commit
5c62bedd6b
@ -918,6 +918,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
|
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
|
||||||
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
|
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
|
||||||
|
|
||||||
|
inp = '''{label: "Fran\347ais"}'''
|
||||||
|
self.assertEqual(js_to_json(inp), '''{"label": "Fran\u00e7ais"}''')
|
||||||
|
|
||||||
def test_js_to_json_edgecases(self):
|
def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
|
@ -3982,20 +3982,31 @@ def js_to_json(code):
|
|||||||
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def convert_escapes(m):
|
||||||
|
# convert Javascript's octal escape sequences (and '\0')
|
||||||
|
# into valid JSON escape sequences (e.g. '\347' => '\u00e7', '\0' => '\u0000')
|
||||||
|
if m.group(1):
|
||||||
|
return "\\u%04x" % int(m.group(1), 8)
|
||||||
|
|
||||||
|
# convert the remaining escape sequences
|
||||||
|
# into valid JSON
|
||||||
|
return {
|
||||||
|
'"': '\\"',
|
||||||
|
"\\'": "'",
|
||||||
|
'\\\n': '',
|
||||||
|
'\\x': '\\u00',
|
||||||
|
}.get(m.group(0), m.group(0))
|
||||||
|
|
||||||
def fix_kv(m):
|
def fix_kv(m):
|
||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
|
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
return v
|
return v
|
||||||
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
if v[0] in ("'", '"'):
|
if v[0] in ("'", '"'):
|
||||||
v = re.sub(r'(?s)\\.|"', lambda m: {
|
v = re.sub(r'(?s)\\(?:([0-7]{1,3})|.)|"', convert_escapes, v[1:-1])
|
||||||
'"': '\\"',
|
|
||||||
"\\'": "'",
|
|
||||||
'\\\n': '',
|
|
||||||
'\\x': '\\u00',
|
|
||||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
|
||||||
|
|
||||||
for regex, base in INTEGER_TABLE:
|
for regex, base in INTEGER_TABLE:
|
||||||
im = re.match(regex, v)
|
im = re.match(regex, v)
|
||||||
@ -4006,8 +4017,8 @@ def js_to_json(code):
|
|||||||
return '"%s"' % v
|
return '"%s"' % v
|
||||||
|
|
||||||
return re.sub(r'''(?sx)
|
return re.sub(r'''(?sx)
|
||||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n01234567]))*[^"\\]*"|
|
||||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n01234567]))*[^'\\]*'|
|
||||||
{comment}|,(?={skip}[\]}}])|
|
{comment}|,(?={skip}[\]}}])|
|
||||||
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
||||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||||
|
Loading…
Reference in New Issue
Block a user