Created
January 20, 2023 09:49
-
-
Save akamhy/4ba8fe28e7cfae9c21c6031642705ab9 to your computer and use it in GitHub Desktop.
Compress The Data - Huffman Coding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import heapq | |
from collections import defaultdict | |
def huffman_encoding(data_list): | |
""" | |
Huffman encoding algorithm. | |
""" | |
freq_table = defaultdict(int) | |
for data in data_list: | |
for char in data: | |
freq_table[char] += 1 | |
priority_queue = [[weight, [char, ""]] for char, weight in freq_table.items()] | |
heapq.heapify(priority_queue) | |
while len(priority_queue) > 1: | |
lo = heapq.heappop(priority_queue) | |
hi = heapq.heappop(priority_queue) | |
for pair in lo[1:]: | |
pair[1] = "0" + pair[1] | |
for pair in hi[1:]: | |
pair[1] = "1" + pair[1] | |
heapq.heappush(priority_queue, [lo[0] + hi[0]] + lo[1:] + hi[1:]) | |
encoding_table = dict(priority_queue[0][1:]) | |
encoded_data_list = [] | |
for data in data_list: | |
encoded_data = "" | |
for char in data: | |
encoded_data += encoding_table[char] | |
encoded_data_list.append(encoded_data) | |
return encoded_data_list, encoding_table | |
def huffman_decoding(data_list, encoding_table): | |
""" | |
Huffman decoding algorithm. | |
""" | |
decoding_table = { | |
code: char for char, code in encoding_table.items() | |
} # Reverse the encoding table | |
decoded_data_list = [] | |
for data in data_list: | |
decoded_data = "" | |
current_code = "" | |
for bit in data: | |
current_code += bit | |
if current_code in decoding_table: | |
decoded_data += decoding_table[current_code] | |
current_code = "" | |
decoded_data_list.append(decoded_data) | |
return decoded_data_list, decoding_table | |
if __name__ == "__main__": | |
data_list = ["abc", "aa", "b", "aaa", "baz", "zzx"] | |
encoded_data_list, encoding_table = huffman_encoding(data_list) | |
decoded_data_list, decoding_table = huffman_decoding( | |
encoded_data_list, encoding_table | |
) | |
original_size = sum( | |
[len(data) * 7 for data in data_list] | |
) # 7 bits per character (ASCII) | |
compressed_size = sum([len(encoded_data) for encoded_data in encoded_data_list]) | |
compression_percentage = (1 - (compressed_size / original_size)) * 100 | |
print("Original size:", original_size) | |
print("Compressed size:", compressed_size) | |
print("Compression percentage: {:.2f}%".format(compression_percentage)) | |
print("Original data:", data_list) | |
print("Encoded data:", encoded_data_list) | |
print("Encoding table:", encoding_table) | |
print("Decoded data:", decoded_data_list) | |
print("Decoding table:", decoding_table) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment