1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 08:34:32 +01:00

[utils] Fix handling of comments in js_to_json (closes #23707, closes #23785)

This commit is contained in:
TinyToweringTree 2020-01-28 22:37:29 +01:00
parent 51c7f40c83
commit 13a91f1642
2 changed files with 41 additions and 9 deletions

View File

@ -918,6 +918,34 @@ class TestUtil(unittest.TestCase):
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
inp = '''{
foo: "value",
// bar: { nested:'x' },
bar: { nested:'x' },
chaff: "something"
}'''
self.assertEqual(js_to_json(inp), '''{
"foo": "value",
"bar": { "nested":"x" },
"chaff": "something"
}''')
inp = '''{
id: "player_prog",
googleCast: true,
//extraSettings: { googleCastReceiverAppId:'1A6F2224', skin:'s3', skinAccentColor: '0073FF'},
extraSettings: { googleCastReceiverAppId:'1A6F2224'},
mediaType: "video",
}'''
self.assertEqual(js_to_json(inp), '''{
"id": "player_prog",
"googleCast": true,
"extraSettings": { "googleCastReceiverAppId":"1A6F2224"},
"mediaType": "video"
}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@ -3975,18 +3975,22 @@ def strip_jsonp(code):
def js_to_json(code):
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
INTEGER_TABLE = (
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
(r'(?s)^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
(r'(?s)^(0+[0-7]+)\s*:?$', 8),
)
# Remove all comments first, including all whitespace leading up to them.
# This regular expression is based on this Stack Overflow answer:
# https://stackoverflow.com/a/25735600
code = re.sub(r'("(?:[^"\\]|\\[\s\S])*"|\'(?:[^\'\\]|\\[\s\S])*\')|[ \t]*//.*|[ \t]*/\*(?:[^*]|\*(?!/))*\*/',
'\\1', code)
def fix_kv(m):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
elif v.startswith('/*') or v.startswith('//') or v == ',':
elif v == ',':
return ""
if v[0] in ("'", '"'):
@ -4008,11 +4012,11 @@ def js_to_json(code):
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
{comment}|,(?={skip}[\]}}])|
,(?=\s*[\]}}])|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
[0-9]+(?={skip}:)
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
[0-9]+(?=\s*:)
''', fix_kv, code)
def qualities(quality_ids):