1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-26 10:24:33 +01:00

[utils] js_to_json: various improvements

now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.
This commit is contained in:
felix 2016-03-13 12:29:15 +01:00 committed by Sergey M․
parent a834622b89
commit bd1e484448
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 28 additions and 14 deletions

View File

@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": "def",}') on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'}) self.assertEqual(json.loads(on), {'abc': 'def'})
on = js_to_json('{ 0: /* " \n */ ",]" , }')
self.assertEqual(json.loads(on), {'0': ',]'})
on = js_to_json(r'["<p>x<\/p>"]')
self.assertEqual(json.loads(on), ['<p>x</p>'])
on = js_to_json(r'["\xaa"]')
self.assertEqual(json.loads(on), ['\u00aa'])
on = js_to_json("['a\\\nb']")
self.assertEqual(json.loads(on), ['ab'])
def test_extract_attributes(self): def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})

View File

@ -1914,24 +1914,26 @@ def js_to_json(code):
v = m.group(0) v = m.group(0)
if v in ('true', 'false', 'null'): if v in ('true', 'false', 'null'):
return v return v
if v.startswith('"'): elif v.startswith('/*') or v == ',':
v = re.sub(r"\\'", "'", v[1:-1]) return ""
elif v.startswith("'"):
v = v[1:-1] if v[0] in ("'", '"'):
v = re.sub(r"\\\\|\\'|\"", lambda m: { v = re.sub(r'(?s)\\.|"', lambda m: {
'\\\\': '\\\\',
"\\'": "'",
'"': '\\"', '"': '\\"',
}[m.group(0)], v) "\\'": "'",
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
return '"%s"' % v return '"%s"' % v
res = re.sub(r'''(?x) return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]* /\*.*?\*/|,(?=\s*[\]}])|
[a-zA-Z_][.a-zA-Z_0-9]*|
[0-9]+(?=\s*:)
''', fix_kv, code) ''', fix_kv, code)
res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
def qualities(quality_ids): def qualities(quality_ids):