Created
December 4, 2019 20:35
-
-
Save pabloferz/676ce7a8de79edd736e9d1e9c0535b28 to your computer and use it in GitHub Desktop.
HuffmanCodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This example work for strings but can be easily adapted to other streams of data | |
function compress(str::String, hdict) | |
nbits = sum(c -> hdict[c][2], str) | |
bv = falses(nbits) | |
n = 0 | |
for c in str | |
code, i = hdict[c] # code and number of bits | |
d = div(n, 64) + 1 # current storage chunk | |
r = 64 * d - n # number of bits available in the current chunk | |
u = (i < r) ? (code << (r - i)) : (code >> (i - r)) | |
bv.chunks[d] |= u | |
# If there are not enough bits left, use the next chunk | |
if i > r | |
u = code << (64 - i + r) | |
bv.chunks[d + 1] |= u | |
end | |
n += i | |
end | |
return bv.chunks | |
end | |
function save(name, data) | |
file = open("name" * ".bin", "w") | |
for entry in data | |
write(file, entry) | |
end | |
close(file) | |
end | |
######################################################## | |
## Example string and corresponding Huffman Coding. ## | |
######################################################## | |
str = "this is an example for huffman encoding" | |
hdict = Dict(' ' => (UInt(0), 3, "000" ), | |
'a' => (UInt(8), 4, "1000" ), | |
'c' => (UInt(13), 5, "01101" ), | |
'd' => (UInt(12), 5, "01100" ), | |
'e' => (UInt(5), 4, "0101" ), | |
'f' => (UInt(2), 4, "0010" ), | |
'g' => (UInt(16), 6, "010000"), | |
'h' => (UInt(13), 4, "1101" ), | |
'i' => (UInt(3), 4, "0011" ), | |
'l' => (UInt(17), 6, "010001"), | |
'm' => (UInt(15), 4, "1111" ), | |
'n' => (UInt(5), 3, "101" ), | |
'o' => (UInt(14), 4, "1110" ), | |
'p' => (UInt(19), 5, "10011" ), | |
'r' => (UInt(18), 5, "10010" ), | |
's' => (UInt(12), 4, "1100" ), | |
't' => (UInt(15), 5, "01111" ), | |
'u' => (UInt(14), 5, "01110" ), | |
'x' => (UInt(9), 5, "01001" ), | |
) | |
nbits = sum(c -> hdict[c][2], str) | |
data = compress(str, hdict) | |
encoding = join(Iterators.take(join(bitstring(entry) for entry in data), nbits)) | |
reference = join(hdict[c][3] for c in str) | |
encoding == reference # true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment