Created
May 28, 2012 15:47
-
-
Save karanlyons/2819800 to your computer and use it in GitHub Desktop.
duct(): Almost like dict(), but worse.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import copy | |
class _DuctView(object): | |
""" | |
*Almost* like `DictView`, but worse. | |
""" | |
def __init__(self, hashtable): | |
self._hashtable = hashtable | |
def __len__(self): | |
return self._hashtable._length | |
class duct_keys(_DuctView): | |
""" | |
*Almost* like `dict_keys()`, but worse. | |
""" | |
def __iter__(self): | |
return self._hashtable.iterkeys() | |
def __and__(self, other): | |
return set(key for key in self.__iter__()) & set(other) | |
def __or__(self, other): | |
return set(key for key in self.__iter__()) | set(other) | |
def __sub__(self, other): | |
return set(key for key in self.__iter__()) - set(other) | |
def __xor__(self, other): | |
return set(key for key in self.__iter__()) ^ set(other) | |
def __str__(self): | |
return "duct_keys([%s])" % ', '.join(["'%s'" % key for key in self._hashtable.iterkeys()]) | |
def __repr__(self): | |
return self.__str__() | |
class duct_values(_DuctView): | |
""" | |
*Almost* like `dict_values()`, but worse. | |
""" | |
def __iter__(self): | |
return self._hashtable.itervalues() | |
def __str__(self): | |
return "duct_values([%s])" % ', '.join(["'%s'" % value for value in self._hashtable.itervalues()]) | |
def __repr__(self): | |
return self.__str__() | |
class duct_items(_DuctView): | |
""" | |
*Almost* like `dict_items()`, but worse. | |
""" | |
def __iter__(self): | |
return self._hashtable.iteritems() | |
def __str__(self): | |
return "duct_items([%s])" % ', '.join(["('%s', '%s')" % (str(item[0]), str(item[1])) for item in self._hashtable.iteritems()]) | |
def __repr__(self): | |
return self.__str__() | |
class duct(object): | |
""" | |
*Almost* like `dict()`, but worse. | |
""" | |
def __init__(self): | |
self._buckets = [[] for i in range(8)] | |
self._num_buckets = len(self._buckets) | |
self._length = 0 | |
self._resize = True | |
def __hash(self, key): | |
return key.__hash__() % self._num_buckets | |
def __resize(self): | |
length = self._length | |
self._resize = False | |
if length > self._num_buckets * 4: | |
self._buckets.extend([[] for i in range(self._num_buckets * 3)]) | |
for key_value in self.items(): | |
self.__delitem__(key_value[0]) | |
self._buckets[key_value[0].__hash__() % (self._num_buckets * 4)].append(key_value) | |
elif self._num_buckets > 8 and length <= self._num_buckets / 4: | |
for key_value in self.items(): | |
self.__delitem__(key_value[0]) | |
self._buckets[key_value[0].__hash__() % (self._num_buckets / 4)].append(key_value) | |
del self._buckets[self._num_buckets / 4:] | |
self._num_buckets = len(self._buckets) | |
self._length = length | |
self._resize = True | |
def clear(self): | |
__init__() | |
def copy(self): | |
return copy.copy(self) | |
def items(self): | |
key_values = [] | |
for bucket in self._buckets: | |
if len(bucket): | |
for key_value in bucket: | |
key_values.append(key_value) | |
return key_values | |
def keys(self): | |
return [key_value[0] for key_value in [list(*bucket) for bucket in self._buckets if len(bucket)]] | |
def values(self): | |
return [key_value[1] for key_value in [list(*bucket) for bucket in self._buckets if len(bucket)]] | |
def iteritems(self): | |
for key_value in self.items(): | |
yield key_value | |
def iterkeys(self): | |
for key in self.keys(): | |
yield key | |
def itervalues(self): | |
for value in self.values(): | |
yield value | |
def viewitems(self): | |
return duct_items(self) | |
def viewkeys(self): | |
return duct_keys(self) | |
def viewvalues(self): | |
return duct_values(self) | |
def has_key(self, key): | |
try: | |
self.__getitem__(key) | |
return True | |
except KeyError: | |
return False | |
def pop(self, key, *args): | |
if len(args) > 1: | |
raise TypeError('pop expected at most 2 arguments, got %i' % len(args)) | |
try: | |
value = self.__getitem__(key) | |
self.__delitem__(key) | |
return value | |
except KeyError as error: | |
if len(args): | |
return args[0] | |
else: | |
raise KeyError(*error.args) | |
def popitem(self): | |
if self._length: | |
for bucket in self._buckets: | |
if len(bucket): | |
self._length -= 1 | |
key_value = bucket.pop(0) | |
self.__resize() | |
return key_value | |
else: | |
raise KeyError | |
def setdefault(self, key, default=None): | |
try: | |
return self.__getitem__(key) | |
except KeyError: | |
self.__setitem__(key, default) | |
return default | |
def update(*args, **kwargs): | |
if len(args) > 1: | |
raise TypeError('update expected at most 1 arguments, got %i' % len(args)) | |
elif len(args) == 1: | |
for key_value in args: | |
if len(key_value) != 2: | |
raise ValueError('dictionary update sequence element #0 has length 1; %i is required' % len(key_value)) | |
else: | |
self.__setitem__(key_value[0], key_value[1]) | |
def __getitem__(self, key): | |
bucket = self._buckets[self.__hash(key)] | |
for key_value in bucket: | |
if (key_value[0] == key): | |
return key_value[1] | |
raise KeyError(key) | |
def __setitem__(self, key, value): | |
self.__delitem__(key) | |
self._buckets[self.__hash(key)].append((key, value)) | |
self._length += 1 | |
self.__resize() | |
def __delitem__(self, key): | |
bucket = self._buckets[self.__hash(key)] | |
for key_value in bucket: | |
if (key_value[0] == key): | |
bucket.remove(key_value) | |
self._length -= 1 | |
if self._resize: self.__resize() | |
def __iter__(self): | |
return self.iterkeys() | |
def __len__(self): | |
return self._length | |
def __str__(self): | |
return "{%s}" % ', '.join(["'%s': '%s'" % (str(item[0]), str(item[1])) for item in self.iteritems()]) | |
def __repr__(self): | |
return self.__str__() | |
if __name__ == '__main__': | |
import string | |
import random | |
from attest import Tests | |
suite = Tests() | |
@suite.test | |
def data_is_not_lost(): # I could add more tests, but you *really* shouldn't use this anyway. | |
test = duct() | |
control = dict(); | |
for i in xrange(1024): | |
key = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10)) | |
value = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10)) | |
control[key] = value | |
test[key] = value | |
for key in control: | |
assert control[key] == test[key] | |
del test[key] | |
suite.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I don't perfectly recreate Python's native
dict
resizing here. The starting size is right, but I resize by factors of 4 instead of 2, and fake the sparseness check. I also potentially resize after any addition or removal operation, whereasdict
only potentially resizes after additions.But then again, I don't perfectly recreate anything here.