mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 09:07:58 +01:00
hack for apparently broken parse_qs in python2
This commit is contained in:
parent
be0f77d075
commit
40b35b4aa6
1 changed files with 75 additions and 1 deletions
|
@ -49,7 +49,81 @@
|
||||||
try:
|
try:
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urlparse import parse_qs as compat_parse_qs
|
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||||
|
# Python 2's version is apparently totally broken
|
||||||
|
def _unquote(string, encoding='utf-8', errors='replace'):
|
||||||
|
if string == '':
|
||||||
|
return string
|
||||||
|
res = string.split('%')
|
||||||
|
if len(res) == 1:
|
||||||
|
return string
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
if errors is None:
|
||||||
|
errors = 'replace'
|
||||||
|
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
|
||||||
|
pct_sequence = b''
|
||||||
|
string = res[0]
|
||||||
|
for item in res[1:]:
|
||||||
|
try:
|
||||||
|
if not item:
|
||||||
|
raise ValueError
|
||||||
|
pct_sequence += item[:2].decode('hex')
|
||||||
|
rest = item[2:]
|
||||||
|
if not rest:
|
||||||
|
# This segment was just a single percent-encoded character.
|
||||||
|
# May be part of a sequence of code units, so delay decoding.
|
||||||
|
# (Stored in pct_sequence).
|
||||||
|
continue
|
||||||
|
except ValueError:
|
||||||
|
rest = '%' + item
|
||||||
|
# Encountered non-percent-encoded characters. Flush the current
|
||||||
|
# pct_sequence.
|
||||||
|
string += pct_sequence.decode(encoding, errors) + rest
|
||||||
|
pct_sequence = b''
|
||||||
|
if pct_sequence:
|
||||||
|
# Flush the final pct_sequence
|
||||||
|
string += pct_sequence.decode(encoding, errors)
|
||||||
|
return string
|
||||||
|
|
||||||
|
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
qs, _coerce_result = qs, unicode
|
||||||
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
|
r = []
|
||||||
|
for name_value in pairs:
|
||||||
|
if not name_value and not strict_parsing:
|
||||||
|
continue
|
||||||
|
nv = name_value.split('=', 1)
|
||||||
|
if len(nv) != 2:
|
||||||
|
if strict_parsing:
|
||||||
|
raise ValueError("bad query field: %r" % (name_value,))
|
||||||
|
# Handle case of a control-name with no equal sign
|
||||||
|
if keep_blank_values:
|
||||||
|
nv.append('')
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if len(nv[1]) or keep_blank_values:
|
||||||
|
name = nv[0].replace('+', ' ')
|
||||||
|
name = _unquote(name, encoding=encoding, errors=errors)
|
||||||
|
name = _coerce_result(name)
|
||||||
|
value = nv[1].replace('+', ' ')
|
||||||
|
value = _unquote(value, encoding=encoding, errors=errors)
|
||||||
|
value = _coerce_result(value)
|
||||||
|
r.append((name, value))
|
||||||
|
return r
|
||||||
|
|
||||||
|
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
parsed_result = {}
|
||||||
|
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||||
|
encoding=encoding, errors=errors)
|
||||||
|
for name, value in pairs:
|
||||||
|
if name in parsed_result:
|
||||||
|
parsed_result[name].append(value)
|
||||||
|
else:
|
||||||
|
parsed_result[name] = [value]
|
||||||
|
return parsed_result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
compat_str = unicode # Python 2
|
compat_str = unicode # Python 2
|
||||||
|
|
Loading…
Reference in a new issue