Skip to content

Instantly share code, notes, and snippets.

@karanlyons
Created May 28, 2012 15:47
Show Gist options
  • Save karanlyons/2819800 to your computer and use it in GitHub Desktop.
Save karanlyons/2819800 to your computer and use it in GitHub Desktop.
duct(): Almost like dict(), but worse.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import copy
class _DuctView(object):
"""
*Almost* like `DictView`, but worse.
"""
def __init__(self, hashtable):
self._hashtable = hashtable
def __len__(self):
return self._hashtable._length
class duct_keys(_DuctView):
"""
*Almost* like `dict_keys()`, but worse.
"""
def __iter__(self):
return self._hashtable.iterkeys()
def __and__(self, other):
return set(key for key in self.__iter__()) & set(other)
def __or__(self, other):
return set(key for key in self.__iter__()) | set(other)
def __sub__(self, other):
return set(key for key in self.__iter__()) - set(other)
def __xor__(self, other):
return set(key for key in self.__iter__()) ^ set(other)
def __str__(self):
return "duct_keys([%s])" % ', '.join(["'%s'" % key for key in self._hashtable.iterkeys()])
def __repr__(self):
return self.__str__()
class duct_values(_DuctView):
"""
*Almost* like `dict_values()`, but worse.
"""
def __iter__(self):
return self._hashtable.itervalues()
def __str__(self):
return "duct_values([%s])" % ', '.join(["'%s'" % value for value in self._hashtable.itervalues()])
def __repr__(self):
return self.__str__()
class duct_items(_DuctView):
"""
*Almost* like `dict_items()`, but worse.
"""
def __iter__(self):
return self._hashtable.iteritems()
def __str__(self):
return "duct_items([%s])" % ', '.join(["('%s', '%s')" % (str(item[0]), str(item[1])) for item in self._hashtable.iteritems()])
def __repr__(self):
return self.__str__()
class duct(object):
"""
*Almost* like `dict()`, but worse.
"""
def __init__(self):
self._buckets = [[] for i in range(8)]
self._num_buckets = len(self._buckets)
self._length = 0
self._resize = True
def __hash(self, key):
return key.__hash__() % self._num_buckets
def __resize(self):
length = self._length
self._resize = False
if length > self._num_buckets * 4:
self._buckets.extend([[] for i in range(self._num_buckets * 3)])
for key_value in self.items():
self.__delitem__(key_value[0])
self._buckets[key_value[0].__hash__() % (self._num_buckets * 4)].append(key_value)
elif self._num_buckets > 8 and length <= self._num_buckets / 4:
for key_value in self.items():
self.__delitem__(key_value[0])
self._buckets[key_value[0].__hash__() % (self._num_buckets / 4)].append(key_value)
del self._buckets[self._num_buckets / 4:]
self._num_buckets = len(self._buckets)
self._length = length
self._resize = True
def clear(self):
__init__()
def copy(self):
return copy.copy(self)
def items(self):
key_values = []
for bucket in self._buckets:
if len(bucket):
for key_value in bucket:
key_values.append(key_value)
return key_values
def keys(self):
return [key_value[0] for key_value in [list(*bucket) for bucket in self._buckets if len(bucket)]]
def values(self):
return [key_value[1] for key_value in [list(*bucket) for bucket in self._buckets if len(bucket)]]
def iteritems(self):
for key_value in self.items():
yield key_value
def iterkeys(self):
for key in self.keys():
yield key
def itervalues(self):
for value in self.values():
yield value
def viewitems(self):
return duct_items(self)
def viewkeys(self):
return duct_keys(self)
def viewvalues(self):
return duct_values(self)
def has_key(self, key):
try:
self.__getitem__(key)
return True
except KeyError:
return False
def pop(self, key, *args):
if len(args) > 1:
raise TypeError('pop expected at most 2 arguments, got %i' % len(args))
try:
value = self.__getitem__(key)
self.__delitem__(key)
return value
except KeyError as error:
if len(args):
return args[0]
else:
raise KeyError(*error.args)
def popitem(self):
if self._length:
for bucket in self._buckets:
if len(bucket):
self._length -= 1
key_value = bucket.pop(0)
self.__resize()
return key_value
else:
raise KeyError
def setdefault(self, key, default=None):
try:
return self.__getitem__(key)
except KeyError:
self.__setitem__(key, default)
return default
def update(*args, **kwargs):
if len(args) > 1:
raise TypeError('update expected at most 1 arguments, got %i' % len(args))
elif len(args) == 1:
for key_value in args:
if len(key_value) != 2:
raise ValueError('dictionary update sequence element #0 has length 1; %i is required' % len(key_value))
else:
self.__setitem__(key_value[0], key_value[1])
def __getitem__(self, key):
bucket = self._buckets[self.__hash(key)]
for key_value in bucket:
if (key_value[0] == key):
return key_value[1]
raise KeyError(key)
def __setitem__(self, key, value):
self.__delitem__(key)
self._buckets[self.__hash(key)].append((key, value))
self._length += 1
self.__resize()
def __delitem__(self, key):
bucket = self._buckets[self.__hash(key)]
for key_value in bucket:
if (key_value[0] == key):
bucket.remove(key_value)
self._length -= 1
if self._resize: self.__resize()
def __iter__(self):
return self.iterkeys()
def __len__(self):
return self._length
def __str__(self):
return "{%s}" % ', '.join(["'%s': '%s'" % (str(item[0]), str(item[1])) for item in self.iteritems()])
def __repr__(self):
return self.__str__()
if __name__ == '__main__':
import string
import random
from attest import Tests
suite = Tests()
@suite.test
def data_is_not_lost(): # I could add more tests, but you *really* shouldn't use this anyway.
test = duct()
control = dict();
for i in xrange(1024):
key = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10))
value = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10))
control[key] = value
test[key] = value
for key in control:
assert control[key] == test[key]
del test[key]
suite.run()
@karanlyons
Copy link
Author

I don't perfectly recreate Python's native dict resizing here. The starting size is right, but I resize by factors of 4 instead of 2, and fake the sparseness check. I also potentially resize after any addition or removal operation, whereas dict only potentially resizes after additions.

But then again, I don't perfectly recreate anything here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment