Created
January 21, 2016 19:14
-
-
Save jaseg/3c533e6d2b32f7f54d01 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
import itertools | |
import lzma | |
import threading | |
import functools | |
class Stringstore: | |
def __init__(self, dbfile, max_block_size=262144): | |
self.db = sqlite3.connect(dbfile) | |
self.db.execute('CREATE TABLE IF NOT EXISTS blobs(data BLOB)') | |
self._makeblock() | |
self.max_block_size = max_block_size | |
def _unpack(self, data): | |
unpacked = lzma.decompress(data) | |
return len(unpacked), unpacked.decode().splitlines() | |
@functools.lru_cache(maxsize=32) | |
def _read_block(self, cur, block_id): | |
return self._unpack(cur.execute('SELECT data FROM blobs WHERE oid=?', block_id).fetchone()[0])[1] | |
def _makeblock(self, data=lzma.compress(b'')): | |
cur = self.db.cursor() | |
cur.execute('INSERT INTO blobs(data) VALUES (?)', (data,)) | |
return cur.lastrowid | |
def get_string(self, block_id, string_id): | |
with self.db as conn: | |
return self._read_block(conn.cursor(), block_id)[string_id] | |
def _write_block(self, cur, block_id, lines): | |
self._read_block.cache_clear() | |
cur.execute('REPLACE INTO blobs(oid, data) VALUES (?, ?)', | |
(block_id, lzma.compress('\n'.join(lines).encode(), preset=1))) | |
def replace_string(self, block_id, string_id, value): | |
with self.db as conn: | |
cur = conn.cursor() | |
blocklines = self._read_block(cur, block_id) | |
blocklines[string_id] = value | |
self._write_block(cur, block_id, blocklines) | |
def insert_strings(self, values): | |
with self.db as conn: | |
cur = conn.cursor() | |
block_id, data = cur.execute('SELECT oid, data FROM blobs ORDER BY oid LIMIT 1').fetchone() | |
sz, curlines = self._unpack(data) | |
for val in values: | |
if sz > self.max_block_size: | |
self._write_block(cur, block_id, curlines) | |
block_id, curlines, sz = self._makeblock(), [], 0 | |
yield block_id, len(curlines) | |
curlines.append(val) | |
sz += len(val)+1 #+1 for newline | |
self._write_block(cur, block_id, curlines) | |
import sqlite3 | |
import itertools | |
import lzma | |
import threading | |
import functools | |
class Stringstore: | |
def __init__(self, dbfile, max_block_size=262144): | |
self.db = sqlite3.connect(dbfile) | |
self.db.execute('CREATE TABLE IF NOT EXISTS blobs(data BLOB)') | |
self._makeblock() | |
self.max_block_size = max_block_size | |
def _unpack(self, data): | |
unpacked = lzma.decompress(data) | |
return len(unpacked), unpacked.decode().splitlines() | |
@functools.lru_cache(maxsize=32) | |
def _read_block(self, cur, block_id): | |
return self._unpack(cur.execute('SELECT data FROM blobs WHERE oid=?', block_id).fetchone()[0])[1] | |
def _makeblock(self, data=lzma.compress(b'')): | |
cur = self.db.cursor() | |
cur.execute('INSERT INTO blobs(data) VALUES (?)', (data,)) | |
return cur.lastrowid | |
def get_string(self, block_id, string_id): | |
with self.db as conn: | |
return self._read_block(conn.cursor(), block_id)[string_id] | |
def _write_block(self, cur, block_id, lines): | |
self._read_block.cache_clear() | |
cur.execute('REPLACE INTO blobs(oid, data) VALUES (?, ?)', | |
(block_id, lzma.compress('\n'.join(lines).encode(), preset=1))) | |
def replace_string(self, block_id, string_id, value): | |
with self.db as conn: | |
cur = conn.cursor() | |
blocklines = self._read_block(cur, block_id) | |
blocklines[string_id] = value | |
self._write_block(cur, block_id, blocklines) | |
def insert_strings(self, values): | |
with self.db as conn: | |
cur = conn.cursor() | |
block_id, data = cur.execute('SELECT oid, data FROM blobs ORDER BY oid LIMIT 1').fetchone() | |
sz, curlines = self._unpack(data) | |
for val in values: | |
if sz > self.max_block_size: | |
self._write_block(cur, block_id, curlines) | |
block_id, curlines, sz = self._makeblock(), [], 0 | |
yield block_id, len(curlines) | |
curlines.append(val) | |
sz += len(val)+1 #+1 for newline | |
self._write_block(cur, block_id, curlines) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment