Skip to content

Instantly share code, notes, and snippets.

@jaseg
Created January 21, 2016 19:14
Show Gist options
  • Save jaseg/3c533e6d2b32f7f54d01 to your computer and use it in GitHub Desktop.
Save jaseg/3c533e6d2b32f7f54d01 to your computer and use it in GitHub Desktop.
import sqlite3
import itertools
import lzma
import threading
import functools
class Stringstore:
def __init__(self, dbfile, max_block_size=262144):
self.db = sqlite3.connect(dbfile)
self.db.execute('CREATE TABLE IF NOT EXISTS blobs(data BLOB)')
self._makeblock()
self.max_block_size = max_block_size
def _unpack(self, data):
unpacked = lzma.decompress(data)
return len(unpacked), unpacked.decode().splitlines()
@functools.lru_cache(maxsize=32)
def _read_block(self, cur, block_id):
return self._unpack(cur.execute('SELECT data FROM blobs WHERE oid=?', block_id).fetchone()[0])[1]
def _makeblock(self, data=lzma.compress(b'')):
cur = self.db.cursor()
cur.execute('INSERT INTO blobs(data) VALUES (?)', (data,))
return cur.lastrowid
def get_string(self, block_id, string_id):
with self.db as conn:
return self._read_block(conn.cursor(), block_id)[string_id]
def _write_block(self, cur, block_id, lines):
self._read_block.cache_clear()
cur.execute('REPLACE INTO blobs(oid, data) VALUES (?, ?)',
(block_id, lzma.compress('\n'.join(lines).encode(), preset=1)))
def replace_string(self, block_id, string_id, value):
with self.db as conn:
cur = conn.cursor()
blocklines = self._read_block(cur, block_id)
blocklines[string_id] = value
self._write_block(cur, block_id, blocklines)
def insert_strings(self, values):
with self.db as conn:
cur = conn.cursor()
block_id, data = cur.execute('SELECT oid, data FROM blobs ORDER BY oid LIMIT 1').fetchone()
sz, curlines = self._unpack(data)
for val in values:
if sz > self.max_block_size:
self._write_block(cur, block_id, curlines)
block_id, curlines, sz = self._makeblock(), [], 0
yield block_id, len(curlines)
curlines.append(val)
sz += len(val)+1 #+1 for newline
self._write_block(cur, block_id, curlines)
import sqlite3
import itertools
import lzma
import threading
import functools
class Stringstore:
def __init__(self, dbfile, max_block_size=262144):
self.db = sqlite3.connect(dbfile)
self.db.execute('CREATE TABLE IF NOT EXISTS blobs(data BLOB)')
self._makeblock()
self.max_block_size = max_block_size
def _unpack(self, data):
unpacked = lzma.decompress(data)
return len(unpacked), unpacked.decode().splitlines()
@functools.lru_cache(maxsize=32)
def _read_block(self, cur, block_id):
return self._unpack(cur.execute('SELECT data FROM blobs WHERE oid=?', block_id).fetchone()[0])[1]
def _makeblock(self, data=lzma.compress(b'')):
cur = self.db.cursor()
cur.execute('INSERT INTO blobs(data) VALUES (?)', (data,))
return cur.lastrowid
def get_string(self, block_id, string_id):
with self.db as conn:
return self._read_block(conn.cursor(), block_id)[string_id]
def _write_block(self, cur, block_id, lines):
self._read_block.cache_clear()
cur.execute('REPLACE INTO blobs(oid, data) VALUES (?, ?)',
(block_id, lzma.compress('\n'.join(lines).encode(), preset=1)))
def replace_string(self, block_id, string_id, value):
with self.db as conn:
cur = conn.cursor()
blocklines = self._read_block(cur, block_id)
blocklines[string_id] = value
self._write_block(cur, block_id, blocklines)
def insert_strings(self, values):
with self.db as conn:
cur = conn.cursor()
block_id, data = cur.execute('SELECT oid, data FROM blobs ORDER BY oid LIMIT 1').fetchone()
sz, curlines = self._unpack(data)
for val in values:
if sz > self.max_block_size:
self._write_block(cur, block_id, curlines)
block_id, curlines, sz = self._makeblock(), [], 0
yield block_id, len(curlines)
curlines.append(val)
sz += len(val)+1 #+1 for newline
self._write_block(cur, block_id, curlines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment