diff --git a/test/test_utils.py b/test/test_utils.py index c2d1e4fb1..d7df60800 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -920,6 +920,34 @@ class TestUtil(unittest.TestCase): inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') + inp = '''{ + foo: "value", + // bar: { nested:'x' }, + bar: { nested:'x' }, + chaff: "something" + }''' + self.assertEqual(js_to_json(inp), '''{ + "foo": "value", + + "bar": { "nested":"x" }, + "chaff": "something" + }''') + + inp = '''{ + id: "player_prog", + googleCast: true, + //extraSettings: { googleCastReceiverAppId:'1A6F2224', skin:'s3', skinAccentColor: '0073FF'}, + extraSettings: { googleCastReceiverAppId:'1A6F2224'}, + mediaType: "video", + }''' + self.assertEqual(js_to_json(inp), '''{ + "id": "player_prog", + "googleCast": true, + + "extraSettings": { "googleCastReceiverAppId":"1A6F2224"}, + "mediaType": "video" + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 737e2810e..173555edc 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4067,18 +4067,22 @@ def strip_jsonp(code): def js_to_json(code): - COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*' - SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) INTEGER_TABLE = ( - (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16), - (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8), + (r'(?s)^(0[xX][0-9a-fA-F]+)\s*:?$', 16), + (r'(?s)^(0+[0-7]+)\s*:?$', 8), ) + # Remove all comments first, including all whitespace leading up to them. + # This regular expression is based on this Stack Overflow answer: + # https://stackoverflow.com/a/25735600 + code = re.sub(r'("(?:[^"\\]|\\[\s\S])*"|\'(?:[^\'\\]|\\[\s\S])*\')|[ \t]*//.*|[ \t]*/\*(?:[^*]|\*(?!/))*\*/', + lambda m: m.group(1) or '', code) + def fix_kv(m): v = m.group(0) if v in ('true', 'false', 'null'): return v - elif v.startswith('/*') or v.startswith('//') or v == ',': + elif v == ',': return "" if v[0] in ("'", '"'): @@ -4100,11 +4104,11 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - {comment}|,(?={skip}[\]}}])| + ,(?=\s*[\]}}])| (?:(?