Skip to content

Instantly share code, notes, and snippets.

@lfborjas
Forked from ryanwitt/huffman.py
Created July 24, 2012 02:12
Show Gist options
  • Save lfborjas/3167577 to your computer and use it in GitHub Desktop.
Save lfborjas/3167577 to your computer and use it in GitHub Desktop.
silly huffman coding example
import subprocess
import sys
from subprocess import PIPE, Popen
import os
__all__ = (
'build',
'endode',
'decode',
)
phrase = 'A man a plan a canal Panama'.lower()
def build(phrase):
"""
>>> build(phrase)
[('a', 10), (' ', 6), ('n', 4), ('p', 2), ('l', 2), ('m', 2), ('c', 1)]
"""
d = {}
for c in phrase:
d[c] = d.get(c, 0) + 1
return list(reversed(sorted(((k,v) for k,v in d.items()), key = lambda x: x[1])))
def encode(phrase, tree=None):
"""
>>> ''.join(encode(phrase))
'01011111001101001011101111001101001011111100110011110101110011001111100'
"""
if tree is None:
tree = build(phrase)
for c in phrase:
for character, count in tree:
if character == c:
yield '0'
break
else:
yield '1'
def decode(phrase, tree):
"""
>>> ''.join(decode('01011111001101001011101111001101001011111100110011110101110011001111100', build(phrase)))
'a man a plan a canal panama'
"""
cur = iter(tree)
for c in phrase:
if c == '1':
cur.next()
elif c == '0':
yield cur.next()[0]
cur = iter(tree)
def tree2dot(tree):
acc = ""
acc += 'digraph g {'
last = 'root'
i = 0
for c,freq in tree:
acc += "%s\t%s\t%s\t%s" % (last, '-> ', ('"%s:%d"'%(c, freq)).replace(' ','space'),
'[label=0];')
new = bin(i).replace('b','')
acc += "%s\t%s\t%s\t%s" % (last, '->', new, '[label=1];')
last = new
i += 1
acc += '}'
return acc
if __name__ == "__main__":
import sys
phrase = sys.argv[1]
with open('dotit', "w") as f:
f.write(tree2dot(build(phrase)))
os.system("dot -Tpng dotit -o dotted.png && open ./dotted.png")
print "Original phrase",
print ''.join(bin(ord(c)).replace('b','') for c in phrase)
print "Compressed phrase",
print ''.join(encode(phrase))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment