Created
July 26, 2014 00:11
-
-
Save mmozeiko/c00f6ef5a14c7ec9708e to your computer and use it in GitHub Desktop.
lz4 decompression in Python 2.x
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Warning: this implementation doesn't check if writes or reads will happen | |
# out of input/output buffer range, so that will generate IndexError exception | |
def LZ4_decompress(source, osize): | |
isize = len(source) | |
src = bytearray(source) | |
dst = bytearray(osize) | |
si = 0 | |
di = 0 | |
while True: | |
token = src[si] | |
si += 1 | |
literals = token >> 4 | |
match = token & 0xF | |
if literals > 0: | |
# process literal section | |
if literals == 15: | |
while src[si] == 255: | |
literals += 255 | |
si += 1 | |
literals += src[si] | |
si += 1 | |
dst[di:di+literals] = src[si:si+literals] | |
di += literals | |
si += literals | |
# lz4 data can end only with block that has literals and has no match copy | |
if si == len(src): | |
break | |
# process match copy section | |
offset = src[si] + (src[si+1] << 8) | |
si += 2 | |
assert offset != 0, "this doesn't make sense, offset=0 means this must be literal" | |
if match == 15: | |
while src[si] == 255: | |
match += 255 | |
si += 1 | |
match += src[si] | |
si += 1 | |
match += 4 | |
# use faster copy using slicing if possible | |
# it can be used only when di - offset + match <= di | |
if match <= offset: | |
# fast copy | |
dst[di:di+match] = dst[di-offset:di-offset+match] | |
else: | |
# slower copy | |
for i in xrange(match): | |
dst[di+i] = dst[di-offset+i] | |
di += match | |
return str(dst) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment