Skip to content

Instantly share code, notes, and snippets.

@olance
Forked from john1king/parse_nested_query.py
Last active August 29, 2015 14:02
Show Gist options
  • Save olance/8ce102401cc6cecaf938 to your computer and use it in GitHub Desktop.
Save olance/8ce102401cc6cecaf938 to your computer and use it in GitHub Desktop.
Parses a query string to obtain a params dict as it's done in Rails using Rack's Utils lib.
# This is a port from https://github.com/rack/rack/blob/master/lib/rack/utils.rb from john1king,
# forked to be fixed, completed & documented
import re
import urllib
class RangeError(Exception):
pass
# Number of bytes allowed for params keys. This limitation
# prevents clients from flooding a request
KEY_SPACE_LIMIT = 65536
# Default delimiter to split params
# See: http://stackoverflow.com/a/1178285/283481
DEFAULT_SEP = '[&;] *'
class KeySpaceConstrainedParams(object):
"""
Encapsulate a dict to make sure it doesn't grows up too much (limit memory taken up by keys)
"""
def __init__(self, limit=KEY_SPACE_LIMIT):
self.limit = limit
self.size = 0
self.params = {}
def __getitem__(self, key):
return self.params[key]
def __setitem__(self, key, value):
"""
This is the main logic: when adding a key to our dict,
make sure it'll not overflow the decided key_space_limit size
"""
if key and not key in self.params:
self.size += len(key)
if self.size > self.limit:
raise RangeError('exceeded available parameter key space')
self.params[key] = value
def setdefault(self, key, value):
self.params.setdefault(key, value)
def keys(self):
return self.params.keys()
def to_params_dict(self):
"""
Convert the instance back to a regular dict (recursively)
:return: dict
"""
_dict = self.params
for key in _dict:
value = _dict[key]
if isinstance(value, self.__class__):
_dict[key] = value.to_params_dict()
elif isinstance(value, list):
for i in range(len(value)):
x = value[i]
if isinstance(x, self.__class__):
value[i] = x.to_params_dict()
else:
value[i] = x
return _dict
def unescape(s):
"""
Unescapes a URL-encoded string, transforming '+' characters to spaces.
:param s: [str] URL-encoded string such as '%7e/abc+def'
:return: [str] The URL-decoded string: '~/abc def'
"""
return urllib.unquote_plus(s)
def is_params_dict_type(obj):
return isinstance(obj, (KeySpaceConstrainedParams, dict))
def normalize_params(params, name, v=None):
"""
This is the core function: it populates the params dictionary depending on the inferred type for the given param
name.
* If name is just a simple identifier like 'param_name', then params['param_name'] = value.
* If name contains square brackets at the end it is considered a dict or a list:
- 'list[]': params['list'] is initialized to an empty list if needed, and value is appended to this list
- 'dict[key]' : params['dict'] is initialized to an empty constrained params dictionary and
params['dict']['key1'] receives the value
:param params: [KeySpaceConstrainedParams, dict] A dict to add normalized params to
:param name: [str] The param name to be normalized
:param v: [str] The value to associate to the param
:return: [KeySpaceConstrainedParams, dict, None] The given params instance or None
"""
# Extract the "real" param name, ie. "list" from "list[]" or "dict" from "dict[key][subkey]", but also "key" from
# "key[subkey]" or "subkey" from "subkey]".
# Those last two cases will occur from recursive calls to normalize_params() when name originally is
# "dict[key][subkey]", so as to create nested dictionaries recursively.
try:
_, k, after = re.split('^[\[\]]*([^\[\]]+)\]*', name)
except ValueError:
return
# after will be empty if param is not a list nor a dict (eg. name == "param_name")
if after == "":
params[k] = v
# Special case when name ends with an opening square bracket (?)
elif after == "[":
params[name] = v
# when given param is a list
elif after == "[]":
# Initialize new empty list if needed and check that we really have a list associated to our param name
params.setdefault(k, [])
if not isinstance(params[k], list):
raise TypeError("expected list (got %s) for param %s" % (params[k].__class__.__name__, k))
# Append given value to this list
params[k].append(v)
# Otherwise it has to be a dict (eg. name == "dict[key]" or name == "dict[key][subkey]"), or a list of dictionaries
# (eg. name == "list[][key]"/"list[][key2] or name == "list[]key1"/"list[]key2")
else:
# is the dict nested in a list? (eg. name == "list[][key]"....
match = re.match('^\[\]\[([^\[\]]+)\]$', after)
if not match:
# .... or name == "list[]key")
match = re.match('^\[\](.+)$', after)
# in this case a dict item in the list has to be fully defined before a new dict is added; thus, if we encounter
# a key that is already in the last dict of the list, we'll append a new dict and put this key/value pair inside
# it.
if match:
# Get the key to add a value for
child_key = match.group(1)
# Ensure we've got a list at params[k]
params.setdefault(k, [])
if not isinstance(params[k], list):
raise TypeError("expected list (got %s) for param %s" % (params[k].__class__.__name__, k))
# if the last list element is a dict and doesn't hold child_key yet, add the key to the existing dict. This
# is done through a recursive call to normalize_params so that nested dict can be built if needed
if params[k] and is_params_dict_type(params[k][-1]) and not child_key in params[k][-1].keys():
normalize_params(params[k][-1], child_key, v)
# if the key is already present, simply add it to a new constrained dict
else:
params[k].append(normalize_params(params.__class__(), child_key, v))
# when the dict isn't nested inside a list, simply ensure params[k] is a dict and add the key to it, possibly
# creating nested dict along the way
else:
params.setdefault(k, params.__class__())
if not is_params_dict_type(params[k]):
raise TypeError("expected dict (got %s) for param %s" % (params[k].__class__.__name__, k))
params[k] = normalize_params(params[k], after, v)
# return the given dict instance, augmented with recursively parsed params
return params
def parse_nested_query(qs, d=DEFAULT_SEP):
"""
Parse the given query string to a dict of key/value pairs, where values can be nested lists or dicts.
:param qs: [str] The query string to be parsed
:param d: [str] A regex to be used as params separator. Optional, defaults to DEFAULT_SEP
:return: [dict] A dictionary representing the parsed query string
"""
params = KeySpaceConstrainedParams()
# Split the query string qs using given or default separator regex and, for each param=value pair, 'normalize' param
# and value and add the pair to the params dict
for p in re.split(d or DEFAULT_SEP, qs or ''):
k, v = [unescape(s) for s in p.split('=', 2)]
normalize_params(params, k, v)
# Return a real dict from the constrained params dict
return params.to_params_dict()
if __name__ == '__main__':
print parse_nested_query('foo[x]=1')
print parse_nested_query('foo[][a]=1&foo[][b]=2&foo[][a]=3&foo[][b]=4')
print parse_nested_query('foo[x]=1;foo[o]=2')
print parse_nested_query('foo[x][]=1;foo[x][]=2')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment