1
0
mirror of https://codeberg.org/polarisfm/youtube-dl synced 2024-11-22 16:44:32 +01:00

[utils] Fix handling of comments in js_to_json (closes #23707, closes #23785)

This commit is contained in:
TinyToweringTree 2020-01-28 22:37:29 +01:00
parent 51c7f40c83
commit 13a91f1642
2 changed files with 41 additions and 9 deletions

View File

@ -918,6 +918,34 @@ class TestUtil(unittest.TestCase):
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
inp = '''{
foo: "value",
// bar: { nested:'x' },
bar: { nested:'x' },
chaff: "something"
}'''
self.assertEqual(js_to_json(inp), '''{
"foo": "value",
"bar": { "nested":"x" },
"chaff": "something"
}''')
inp = '''{
id: "player_prog",
googleCast: true,
//extraSettings: { googleCastReceiverAppId:'1A6F2224', skin:'s3', skinAccentColor: '0073FF'},
extraSettings: { googleCastReceiverAppId:'1A6F2224'},
mediaType: "video",
}'''
self.assertEqual(js_to_json(inp), '''{
"id": "player_prog",
"googleCast": true,
"extraSettings": { "googleCastReceiverAppId":"1A6F2224"},
"mediaType": "video"
}''')
def test_js_to_json_edgecases(self): def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@ -3975,18 +3975,22 @@ def strip_jsonp(code):
def js_to_json(code): def js_to_json(code):
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
INTEGER_TABLE = ( INTEGER_TABLE = (
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16), (r'(?s)^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8), (r'(?s)^(0+[0-7]+)\s*:?$', 8),
) )
# Remove all comments first, including all whitespace leading up to them.
# This regular expression is based on this Stack Overflow answer:
# https://stackoverflow.com/a/25735600
code = re.sub(r'("(?:[^"\\]|\\[\s\S])*"|\'(?:[^\'\\]|\\[\s\S])*\')|[ \t]*//.*|[ \t]*/\*(?:[^*]|\*(?!/))*\*/',
'\\1', code)
def fix_kv(m): def fix_kv(m):
v = m.group(0) v = m.group(0)
if v in ('true', 'false', 'null'): if v in ('true', 'false', 'null'):
return v return v
elif v.startswith('/*') or v.startswith('//') or v == ',': elif v == ',':
return "" return ""
if v[0] in ("'", '"'): if v[0] in ("'", '"'):
@ -4008,11 +4012,11 @@ def js_to_json(code):
return re.sub(r'''(?sx) return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
{comment}|,(?={skip}[\]}}])| ,(?=\s*[\]}}])|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
[0-9]+(?={skip}:) [0-9]+(?=\s*:)
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code) ''', fix_kv, code)
def qualities(quality_ids): def qualities(quality_ids):