-
-
Save jedp/925979 to your computer and use it in GitHub Desktop.
autocomplete.py - redis autocompleter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A redis autocomplete example for multi-word phrases. | |
Based on: | |
Ruby original: http://gist.github.com/574044 | |
Python original: https://gist.github.com/577852 | |
See options below for usage | |
Requires http://github.com/andymccurdy/redis-py/ | |
""" | |
from redis import Redis | |
import sys | |
r = Redis() | |
ZKEY_COMPL = 'compl' | |
SKEY_DOCS_PREFIX = 'docs:' | |
def deleteAll(): | |
"""Clear out the completions db""" | |
return r.zremrangebyrank(ZKEY_COMPL, 0, -1) | |
def addCompletions(text): | |
"""Create the completion sorted set.""" | |
text = text.strip() | |
if not text: | |
return | |
for word in text.split(): | |
word = word.lower() | |
for end_index in range(1, len(word)+1): | |
prefix = word[:end_index] | |
r.zadd(ZKEY_COMPL, prefix, 0) | |
r.zadd(ZKEY_COMPL, word + '*', 0) | |
r.sadd(SKEY_DOCS_PREFIX + word, text) | |
r.zadd(ZKEY_COMPL, text + '*', 0) | |
r.sadd(SKEY_DOCS_PREFIX + text.lower(), text) | |
def addFromFile(filename): | |
"""Create completions for all lines in filename""" | |
print "Adding completions for", filename, "...", | |
sys.stdout.flush() | |
for line in open(filename): | |
addCompletions(line) | |
print "done" | |
def getWordCompletions(r, word, count, rangelen=50): | |
"""Get up to count completions for the given word""" | |
prefix = word.lower().strip() | |
results = set() | |
if not prefix: | |
return results | |
start = r.zrank(ZKEY_COMPL, prefix) | |
if start is None: | |
return results | |
while len(results) <= count: | |
entries = r.zrange(ZKEY_COMPL, start, start + rangelen - 1) | |
start += rangelen | |
if not entries or len(entries) == 0: | |
break | |
for entry in entries: | |
minlen = min((len(entry), len(prefix))) | |
if entry[:minlen] != prefix[:minlen]: | |
return results | |
if entry[-1] == '*' and len(results) <= count: | |
results.add(entry[0:-1]) | |
return results | |
def getPhraseCompletions(r, text, count): | |
""" | |
Get up to @count completions for @text | |
For an input text of N words, uses N+1 redis calls: | |
One ZRANK per word, and one SUNION at the end. | |
""" | |
results = set() | |
for prefix in text.lower().split(): | |
results.update(getWordCompletions(r, prefix, count)) | |
keys = map(lambda k: SKEY_DOCS_PREFIX+k, results) | |
if keys: | |
return sorted(r.sunion( keys ), key=str.lower)[:count] | |
else: | |
return [] | |
if __name__ == '__main__': | |
from optparse import OptionParser | |
parser = OptionParser() | |
parser.add_option("-f", "--file", dest="filename", | |
help="Create completions for lines in FILE", metavar="FILE") | |
parser.add_option("-i", "--insert", dest="text", | |
help="Create completions for TEXT", metavar="TEXT") | |
parser.add_option("-d", "--delete-all", dest="delete", | |
action="store_true", default=False, | |
help="Delete everything in the completions db") | |
(options, args) = parser.parse_args() | |
if options.delete: | |
deleteAll() | |
if options.filename: | |
addFromFile(options.filename) | |
if options.text: | |
addCompletions(options.text) | |
if args: | |
for arg in args: | |
print arg, '==>' | |
print '\n'.join(getPhraseCompletions(r, arg, 10)) | |
Right you are, streeter. Thank you for that catch. Cheers,
j
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that in
getWordCompletions
on line 57, you'll return early for the first element of the ZSET. That is because ZSETs are indexed from 0. So your test doesn't pass. Change line 57 to read:and you should be good to go.