Created
December 27, 2014 02:14
-
-
Save inmatarian/d71b0413bb0f3240f341 to your computer and use it in GitHub Desktop.
LZW Compression in Lua, rosetta-code style, also includes a 8-bit only mode for 7bit plain text strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local lzw = {} | |
local function enc_reset(dict, size) | |
for k, _ in pairs(dict) do dict[k] = nil end | |
for i = 0, size-1 do dict[string.char(i)] = i end | |
return dict | |
end | |
local function dec_reset(dict, size) | |
for k, _ in pairs(dict) do dict[k] = nil end | |
for i = 0, size-1 do dict[i] = string.char(i) end | |
return dict | |
end | |
lzw.encode = function(message) | |
local w, result, size = "", {}, 256 | |
local dict = enc_reset({}, size) | |
for k in message:gmatch('.') do | |
local wk = w .. k | |
if dict[wk] then | |
w = wk | |
else | |
result[#result+1] = dict[w] | |
dict[wk] = size | |
size = size + 1 | |
w = k | |
end | |
end | |
if w:len() > 0 then | |
result[#result+1] = dict[w] | |
end | |
return result | |
end | |
lzw.short_encode = function(message) | |
local w, result, size = "", {}, 128 | |
local dict = enc_reset({}, size) | |
for k in string.gmatch(message, '.') do | |
local wk = w .. k | |
if dict[wk] then | |
w = wk | |
else | |
result[#result+1] = string.char(dict[w]) | |
dict[wk] = size | |
size = size + 1 | |
if size == 256 then | |
size = 128 | |
enc_reset(dict, size) | |
end | |
w = k | |
end | |
end | |
if w:len() > 0 then | |
result[#result+1] = string.char(dict[w]) | |
end | |
return table.concat(result) | |
end | |
lzw.decode = function(cipher) | |
local w, entry, result = "", "", {} | |
local size = 256 | |
local dict = dec_reset({}, size) | |
w = string.char(cipher[1]) | |
result[1] = w | |
for i = 2, #cipher do | |
local codeword = cipher[i] | |
if dict[codeword] then | |
entry = dict[codeword] | |
else | |
entry = w .. w:sub(1,1) | |
end | |
dict[size] = w .. entry:sub(1, 1) | |
result[#result+1], w, size = entry, entry, size + 1 | |
end | |
return table.concat(result) | |
end | |
lzw.short_decode = function(cipher) | |
local w, entry, result, size = "", "", {}, 128 | |
local dict = dec_reset({}, size) | |
w = cipher:sub(1, 1) | |
result[1] = w | |
for i = 2, cipher:len() do | |
local k = string.byte(cipher:sub(i, i)) | |
if dict[k] then | |
entry = dict[k] | |
else | |
entry = w .. w:sub(1,1) | |
end | |
dict[size] = w .. entry:sub(1, 1) | |
result[#result+1], w, size = entry, entry, size + 1 | |
if size >= 256 then | |
size = 128 | |
dec_reset(dict, size) | |
end | |
end | |
return table.concat(result) | |
end | |
return lzw |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local lzw = require 'lzw' | |
local TOBEmessage = 'TOBEORNOTTOBEORTOBEORNOT' | |
local LOREMmessage = [=[Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]=] | |
local test = {} | |
test["It encodes TOBEORNOTTOBEORTOBEORNOT"] = function() | |
local cipher = lzw.encode(TOBEmessage) | |
for i, v in ipairs{84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263} do | |
assert(v == cipher[i]) | |
end | |
end | |
test["It decodes TOBEORNOTTOBEORTOBEORNOT"] = function() | |
local message = lzw.decode{84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263} | |
-- print(message) | |
assert(message == TOBEmessage) | |
end | |
test["It short-encodes lorem-ipsum"] = function() | |
local m = LOREMmessage | |
local cipher = lzw.short_encode(m) | |
assert(cipher:len() <= m:len()) | |
end | |
test["It short-decodes lorem-ipsum"] = function() | |
local m = LOREMmessage | |
local cipher = lzw.short_encode(m) | |
local decoded = lzw.short_decode(cipher) | |
assert(decoded == m) | |
end | |
for description, run in pairs(test) do | |
if xpcall(run, function(...) | |
print("FAILED: ", description) | |
for i = 1, select('#', ...) do | |
print(select(i, ...)) | |
end | |
print(debug.traceback()) | |
end) then | |
print("Passed: ", description) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment