Skip to content

Instantly share code, notes, and snippets.

@inmatarian
Created December 27, 2014 02:14
Show Gist options
  • Save inmatarian/d71b0413bb0f3240f341 to your computer and use it in GitHub Desktop.
Save inmatarian/d71b0413bb0f3240f341 to your computer and use it in GitHub Desktop.
LZW Compression in Lua, rosetta-code style, also includes a 8-bit only mode for 7bit plain text strings.
local lzw = {}
local function enc_reset(dict, size)
for k, _ in pairs(dict) do dict[k] = nil end
for i = 0, size-1 do dict[string.char(i)] = i end
return dict
end
local function dec_reset(dict, size)
for k, _ in pairs(dict) do dict[k] = nil end
for i = 0, size-1 do dict[i] = string.char(i) end
return dict
end
lzw.encode = function(message)
local w, result, size = "", {}, 256
local dict = enc_reset({}, size)
for k in message:gmatch('.') do
local wk = w .. k
if dict[wk] then
w = wk
else
result[#result+1] = dict[w]
dict[wk] = size
size = size + 1
w = k
end
end
if w:len() > 0 then
result[#result+1] = dict[w]
end
return result
end
lzw.short_encode = function(message)
local w, result, size = "", {}, 128
local dict = enc_reset({}, size)
for k in string.gmatch(message, '.') do
local wk = w .. k
if dict[wk] then
w = wk
else
result[#result+1] = string.char(dict[w])
dict[wk] = size
size = size + 1
if size == 256 then
size = 128
enc_reset(dict, size)
end
w = k
end
end
if w:len() > 0 then
result[#result+1] = string.char(dict[w])
end
return table.concat(result)
end
lzw.decode = function(cipher)
local w, entry, result = "", "", {}
local size = 256
local dict = dec_reset({}, size)
w = string.char(cipher[1])
result[1] = w
for i = 2, #cipher do
local codeword = cipher[i]
if dict[codeword] then
entry = dict[codeword]
else
entry = w .. w:sub(1,1)
end
dict[size] = w .. entry:sub(1, 1)
result[#result+1], w, size = entry, entry, size + 1
end
return table.concat(result)
end
lzw.short_decode = function(cipher)
local w, entry, result, size = "", "", {}, 128
local dict = dec_reset({}, size)
w = cipher:sub(1, 1)
result[1] = w
for i = 2, cipher:len() do
local k = string.byte(cipher:sub(i, i))
if dict[k] then
entry = dict[k]
else
entry = w .. w:sub(1,1)
end
dict[size] = w .. entry:sub(1, 1)
result[#result+1], w, size = entry, entry, size + 1
if size >= 256 then
size = 128
dec_reset(dict, size)
end
end
return table.concat(result)
end
return lzw
local lzw = require 'lzw'
local TOBEmessage = 'TOBEORNOTTOBEORTOBEORNOT'
local LOREMmessage = [=[Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]=]
local test = {}
test["It encodes TOBEORNOTTOBEORTOBEORNOT"] = function()
local cipher = lzw.encode(TOBEmessage)
for i, v in ipairs{84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263} do
assert(v == cipher[i])
end
end
test["It decodes TOBEORNOTTOBEORTOBEORNOT"] = function()
local message = lzw.decode{84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263}
-- print(message)
assert(message == TOBEmessage)
end
test["It short-encodes lorem-ipsum"] = function()
local m = LOREMmessage
local cipher = lzw.short_encode(m)
assert(cipher:len() <= m:len())
end
test["It short-decodes lorem-ipsum"] = function()
local m = LOREMmessage
local cipher = lzw.short_encode(m)
local decoded = lzw.short_decode(cipher)
assert(decoded == m)
end
for description, run in pairs(test) do
if xpcall(run, function(...)
print("FAILED: ", description)
for i = 1, select('#', ...) do
print(select(i, ...))
end
print(debug.traceback())
end) then
print("Passed: ", description)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment