-
-
Save olance/8ce102401cc6cecaf938 to your computer and use it in GitHub Desktop.
Parses a query string to obtain a params dict as it's done in Rails using Rack's Utils lib.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a port from https://github.com/rack/rack/blob/master/lib/rack/utils.rb from john1king, | |
# forked to be fixed, completed & documented | |
import re | |
import urllib | |
class RangeError(Exception): | |
pass | |
# Number of bytes allowed for params keys. This limitation | |
# prevents clients from flooding a request | |
KEY_SPACE_LIMIT = 65536 | |
# Default delimiter to split params | |
# See: http://stackoverflow.com/a/1178285/283481 | |
DEFAULT_SEP = '[&;] *' | |
class KeySpaceConstrainedParams(object): | |
""" | |
Encapsulate a dict to make sure it doesn't grows up too much (limit memory taken up by keys) | |
""" | |
def __init__(self, limit=KEY_SPACE_LIMIT): | |
self.limit = limit | |
self.size = 0 | |
self.params = {} | |
def __getitem__(self, key): | |
return self.params[key] | |
def __setitem__(self, key, value): | |
""" | |
This is the main logic: when adding a key to our dict, | |
make sure it'll not overflow the decided key_space_limit size | |
""" | |
if key and not key in self.params: | |
self.size += len(key) | |
if self.size > self.limit: | |
raise RangeError('exceeded available parameter key space') | |
self.params[key] = value | |
def setdefault(self, key, value): | |
self.params.setdefault(key, value) | |
def keys(self): | |
return self.params.keys() | |
def to_params_dict(self): | |
""" | |
Convert the instance back to a regular dict (recursively) | |
:return: dict | |
""" | |
_dict = self.params | |
for key in _dict: | |
value = _dict[key] | |
if isinstance(value, self.__class__): | |
_dict[key] = value.to_params_dict() | |
elif isinstance(value, list): | |
for i in range(len(value)): | |
x = value[i] | |
if isinstance(x, self.__class__): | |
value[i] = x.to_params_dict() | |
else: | |
value[i] = x | |
return _dict | |
def unescape(s): | |
""" | |
Unescapes a URL-encoded string, transforming '+' characters to spaces. | |
:param s: [str] URL-encoded string such as '%7e/abc+def' | |
:return: [str] The URL-decoded string: '~/abc def' | |
""" | |
return urllib.unquote_plus(s) | |
def is_params_dict_type(obj): | |
return isinstance(obj, (KeySpaceConstrainedParams, dict)) | |
def normalize_params(params, name, v=None): | |
""" | |
This is the core function: it populates the params dictionary depending on the inferred type for the given param | |
name. | |
* If name is just a simple identifier like 'param_name', then params['param_name'] = value. | |
* If name contains square brackets at the end it is considered a dict or a list: | |
- 'list[]': params['list'] is initialized to an empty list if needed, and value is appended to this list | |
- 'dict[key]' : params['dict'] is initialized to an empty constrained params dictionary and | |
params['dict']['key1'] receives the value | |
:param params: [KeySpaceConstrainedParams, dict] A dict to add normalized params to | |
:param name: [str] The param name to be normalized | |
:param v: [str] The value to associate to the param | |
:return: [KeySpaceConstrainedParams, dict, None] The given params instance or None | |
""" | |
# Extract the "real" param name, ie. "list" from "list[]" or "dict" from "dict[key][subkey]", but also "key" from | |
# "key[subkey]" or "subkey" from "subkey]". | |
# Those last two cases will occur from recursive calls to normalize_params() when name originally is | |
# "dict[key][subkey]", so as to create nested dictionaries recursively. | |
try: | |
_, k, after = re.split('^[\[\]]*([^\[\]]+)\]*', name) | |
except ValueError: | |
return | |
# after will be empty if param is not a list nor a dict (eg. name == "param_name") | |
if after == "": | |
params[k] = v | |
# Special case when name ends with an opening square bracket (?) | |
elif after == "[": | |
params[name] = v | |
# when given param is a list | |
elif after == "[]": | |
# Initialize new empty list if needed and check that we really have a list associated to our param name | |
params.setdefault(k, []) | |
if not isinstance(params[k], list): | |
raise TypeError("expected list (got %s) for param %s" % (params[k].__class__.__name__, k)) | |
# Append given value to this list | |
params[k].append(v) | |
# Otherwise it has to be a dict (eg. name == "dict[key]" or name == "dict[key][subkey]"), or a list of dictionaries | |
# (eg. name == "list[][key]"/"list[][key2] or name == "list[]key1"/"list[]key2") | |
else: | |
# is the dict nested in a list? (eg. name == "list[][key]".... | |
match = re.match('^\[\]\[([^\[\]]+)\]$', after) | |
if not match: | |
# .... or name == "list[]key") | |
match = re.match('^\[\](.+)$', after) | |
# in this case a dict item in the list has to be fully defined before a new dict is added; thus, if we encounter | |
# a key that is already in the last dict of the list, we'll append a new dict and put this key/value pair inside | |
# it. | |
if match: | |
# Get the key to add a value for | |
child_key = match.group(1) | |
# Ensure we've got a list at params[k] | |
params.setdefault(k, []) | |
if not isinstance(params[k], list): | |
raise TypeError("expected list (got %s) for param %s" % (params[k].__class__.__name__, k)) | |
# if the last list element is a dict and doesn't hold child_key yet, add the key to the existing dict. This | |
# is done through a recursive call to normalize_params so that nested dict can be built if needed | |
if params[k] and is_params_dict_type(params[k][-1]) and not child_key in params[k][-1].keys(): | |
normalize_params(params[k][-1], child_key, v) | |
# if the key is already present, simply add it to a new constrained dict | |
else: | |
params[k].append(normalize_params(params.__class__(), child_key, v)) | |
# when the dict isn't nested inside a list, simply ensure params[k] is a dict and add the key to it, possibly | |
# creating nested dict along the way | |
else: | |
params.setdefault(k, params.__class__()) | |
if not is_params_dict_type(params[k]): | |
raise TypeError("expected dict (got %s) for param %s" % (params[k].__class__.__name__, k)) | |
params[k] = normalize_params(params[k], after, v) | |
# return the given dict instance, augmented with recursively parsed params | |
return params | |
def parse_nested_query(qs, d=DEFAULT_SEP): | |
""" | |
Parse the given query string to a dict of key/value pairs, where values can be nested lists or dicts. | |
:param qs: [str] The query string to be parsed | |
:param d: [str] A regex to be used as params separator. Optional, defaults to DEFAULT_SEP | |
:return: [dict] A dictionary representing the parsed query string | |
""" | |
params = KeySpaceConstrainedParams() | |
# Split the query string qs using given or default separator regex and, for each param=value pair, 'normalize' param | |
# and value and add the pair to the params dict | |
for p in re.split(d or DEFAULT_SEP, qs or ''): | |
k, v = [unescape(s) for s in p.split('=', 2)] | |
normalize_params(params, k, v) | |
# Return a real dict from the constrained params dict | |
return params.to_params_dict() | |
if __name__ == '__main__': | |
print parse_nested_query('foo[x]=1') | |
print parse_nested_query('foo[][a]=1&foo[][b]=2&foo[][a]=3&foo[][b]=4') | |
print parse_nested_query('foo[x]=1;foo[o]=2') | |
print parse_nested_query('foo[x][]=1;foo[x][]=2') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment