Created
March 6, 2012 19:42
-
-
Save avar/1988581 to your computer and use it in GitHub Desktop.
ElasticSearch fuzzy ngram powered search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export http_proxy= | |
export https_proxy= | |
curl -XDELETE 'http://localhost:9200/test/' | |
echo "Creating the mapping" | |
curl -XPUT 'http://localhost:9200/test/?pretty=1' -d ' | |
{ | |
"mappings" : { | |
"member" : { | |
"properties" : { | |
"person_name" : { | |
"fields" : { | |
"partial_person_name" : { | |
"search_analyzer" : "ngrams_for_cjk", | |
"index_analyzer" : "ngrams_for_cjk", | |
"type" : "string" | |
}, | |
"person_name" : { | |
"type" : "string", | |
"analyzer" : "full_name" | |
} | |
}, | |
"type" : "multi_field" | |
}, | |
"city_name" : { | |
"fields" : { | |
"partial_city_name" : { | |
"search_analyzer" : "ngrams_for_cjk", | |
"index_analyzer" : "ngrams_for_cjk", | |
"type" : "string" | |
}, | |
"partial_city_name_prefix" : { | |
"search_analyzer" : "prefix_ngrams_for_cjk", | |
"index_analyzer" : "prefix_ngrams_for_cjk", | |
"type" : "string" | |
}, | |
"city_name" : { | |
"type" : "string", | |
"analyzer" : "full_name" | |
} | |
}, | |
"type" : "multi_field" | |
} | |
} | |
} | |
}, | |
"settings" : { | |
"analysis" : { | |
"filter" : { | |
"ngrams_for_every_few_characters" : { | |
"type" : "nGram", | |
"max_gram": "10", | |
"min_gram" : "2" | |
}, | |
"ngrams_for_prefix" : { | |
"type" : "edgeNGram", | |
"max_gram": "10", | |
"min_gram" : "2", | |
"side" : "front" | |
} | |
}, | |
"analyzer" : { | |
"full_name" : { | |
"filter" : [ | |
"standard", | |
"lowercase", | |
"asciifolding" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
}, | |
"ngrams_for_cjk" : { | |
"filter" : [ | |
"lowercase", | |
"ngrams_for_every_few_characters" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
}, | |
"prefix_ngrams_for_cjk" : { | |
"filter" : [ | |
"lowercase", | |
"ngrams_for_prefix" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
} | |
} | |
} | |
} | |
} | |
'; echo | |
echo "Indexing" | |
curl -XPOST 'http://localhost:9200/_bulk?pretty=1' -d ' | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "John Smith", "city_name" : "Amsterdam"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Ævar Arnfjörð Bjarmason", "city_name" : "Amsterdam"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Lucy Lue", "city_name" : "London"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Ævar Arnfjörð Bjarmason", "city_name" : "كوالالمبور"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Ævar Johnson", "city_name" : "The city of كوالالمبور"} | |
{"index" : {"_index" : "test", "_type" : "member"}} | |
{"person_name" : "Juhen Smith", "city_name" : "Amsterdam"} | |
'; echo | |
# Wait for indexing | |
echo "Refreshing" | |
curl -XPOST 'http://localhost:9200/_refresh?pretty=yes'; echo | |
echo "Doing the search" | |
curl -XGET 'http://localhost:9200/test/member/_search?pretty=1' -d ' | |
{ | |
"explain" : "false", | |
"query" : { | |
"bool" : { | |
"minimum_number_should_match" : 2, | |
"should" : [ | |
{ | |
"dis_max" : { | |
"queries" : [ | |
{ | |
"bool" : { | |
"boost" : 10, | |
"must" : { | |
"text" : { | |
"person_name.person_name" : "Xvar" | |
} | |
} | |
} | |
}, | |
{ | |
"text" : { | |
"person_name.partial_person_name" : "Xvar" | |
} | |
} | |
] | |
} | |
}, | |
{ | |
"dis_max" : { | |
"queries" : [ | |
{ | |
"bool" : { | |
"boost" : 5, | |
"must" : { | |
"text" : { | |
"city_name.city_name" : "كوالFUZZYالمبورWOO" | |
} | |
} | |
} | |
}, | |
{ | |
"text" : { | |
"city_name.partial_city_name" : "كوالFUZZYالمبورWOO" | |
} | |
} | |
] | |
} | |
} | |
] | |
} | |
} | |
} | |
'; echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment