Last active
August 29, 2015 13:56
-
-
Save mchaput/9192099 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from whoosh import index, fields | |
from whoosh import qparser | |
# Fake data from external files | |
files = [ | |
(u"t1.txt", u"Earth is a beautiful planet.Earth is the third planet from the Sun. It is the densest and fifth-largest of the eight planets in the Solar System. It is also the largest of the Solar System's four terrestrial planets. It is sometimes referred to as the world or the Blue Planet."), | |
(u"t2.txt", u"jupiter is the biggest planet."), | |
(u"t3.txt", u"earth is known as blue planet."), | |
(u"t4.txt", u"earth is the 3rd most planet in the universe."), | |
(u"t5.txt", u"Distance between earth and sun is some light years."), | |
] | |
schema = fields.Schema(content=fields.TEXT, name=fields.ID(stored=True)) | |
ix = index.create_in("index", schema) | |
with ix.writer() as w: | |
for filename, content in files: | |
w.add_document(content=content, name=filename) | |
ix = index.open_dir("index") | |
qp = qparser.QueryParser("content", ix.schema) | |
with ix.searcher() as s: | |
q = qp.parse(u"earth") | |
results = s.search(q) | |
for hit in results: | |
print(hit["name"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment