mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 09:07:58 +01:00
[utils] js_to_json
: Improve escape handling (#5217)
Authored by: Grub4K
This commit is contained in:
parent
c6989aa3ae
commit
a71b812f53
2 changed files with 39 additions and 26 deletions
|
@ -1100,6 +1100,12 @@ def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json('[1,//{},\n2]')
|
on = js_to_json('[1,//{},\n2]')
|
||||||
self.assertEqual(json.loads(on), [1, 2])
|
self.assertEqual(json.loads(on), [1, 2])
|
||||||
|
|
||||||
|
on = js_to_json(R'"\^\$\#"')
|
||||||
|
self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
|
||||||
|
|
||||||
|
on = js_to_json('\'"\\""\'')
|
||||||
|
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
|
@ -3275,6 +3275,8 @@ def strip_jsonp(code):
|
||||||
|
|
||||||
def js_to_json(code, vars={}, *, strict=False):
|
def js_to_json(code, vars={}, *, strict=False):
|
||||||
# vars is a dict of var, val pairs to substitute
|
# vars is a dict of var, val pairs to substitute
|
||||||
|
STRING_QUOTES = '\'"'
|
||||||
|
STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
|
||||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
||||||
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
||||||
INTEGER_TABLE = (
|
INTEGER_TABLE = (
|
||||||
|
@ -3282,6 +3284,15 @@ def js_to_json(code, vars={}, *, strict=False):
|
||||||
(fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
|
(fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def process_escape(match):
|
||||||
|
JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
|
||||||
|
escape = match.group(1) or match.group(2)
|
||||||
|
|
||||||
|
return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
|
||||||
|
else R'\u00' if escape == 'x'
|
||||||
|
else '' if escape == '\n'
|
||||||
|
else escape)
|
||||||
|
|
||||||
def fix_kv(m):
|
def fix_kv(m):
|
||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
|
@ -3289,28 +3300,25 @@ def fix_kv(m):
|
||||||
elif v in ('undefined', 'void 0'):
|
elif v in ('undefined', 'void 0'):
|
||||||
return 'null'
|
return 'null'
|
||||||
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
||||||
return ""
|
return ''
|
||||||
|
|
||||||
if v[0] in ("'", '"'):
|
if v[0] in STRING_QUOTES:
|
||||||
v = re.sub(r'(?s)\\.|"', lambda m: {
|
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
|
||||||
'"': '\\"',
|
return f'"{escaped}"'
|
||||||
"\\'": "'",
|
|
||||||
'\\\n': '',
|
|
||||||
'\\x': '\\u00',
|
|
||||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
|
||||||
else:
|
|
||||||
for regex, base in INTEGER_TABLE:
|
|
||||||
im = re.match(regex, v)
|
|
||||||
if im:
|
|
||||||
i = int(im.group(1), base)
|
|
||||||
return '"%d":' % i if v.endswith(':') else '%d' % i
|
|
||||||
|
|
||||||
if v in vars:
|
for regex, base in INTEGER_TABLE:
|
||||||
return json.dumps(vars[v])
|
im = re.match(regex, v)
|
||||||
if strict:
|
if im:
|
||||||
raise ValueError(f'Unknown value: {v}')
|
i = int(im.group(1), base)
|
||||||
|
return f'"{i}":' if v.endswith(':') else str(i)
|
||||||
|
|
||||||
return '"%s"' % v
|
if v in vars:
|
||||||
|
return json.dumps(vars[v])
|
||||||
|
|
||||||
|
if not strict:
|
||||||
|
return f'"{v}"'
|
||||||
|
|
||||||
|
raise ValueError(f'Unknown value: {v}')
|
||||||
|
|
||||||
def create_map(mobj):
|
def create_map(mobj):
|
||||||
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
|
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
|
||||||
|
@ -3320,15 +3328,14 @@ def create_map(mobj):
|
||||||
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
||||||
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
|
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
|
||||||
|
|
||||||
return re.sub(r'''(?sx)
|
return re.sub(rf'''(?sx)
|
||||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
{STRING_RE}|
|
||||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
{COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
|
||||||
{comment}|,(?={skip}[\]}}])|
|
|
||||||
void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
|
void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
|
||||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
|
||||||
[0-9]+(?={skip}:)|
|
[0-9]+(?={SKIP_RE}:)|
|
||||||
!+
|
!+
|
||||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
''', fix_kv, code)
|
||||||
|
|
||||||
|
|
||||||
def qualities(quality_ids):
|
def qualities(quality_ids):
|
||||||
|
|
Loading…
Reference in a new issue