This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XGET "http://localhost:9200/meetuprsvps/_xpack/_graph/_explore" -d' | |
{ | |
"query": { | |
"term": { | |
"_all": "elasticsearch" | |
} | |
}, | |
"vertices": [ | |
{ | |
"field": "member_id", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TransportClient esClient = new TransportClient(ImmutableSettings.settingsBuilder().put("cluster.name", clusterName).build()); | |
// ======= Start hack====== | |
Field f = esClient.getClass().getDeclaredField("injector"); | |
f.setAccessible(true); | |
Injector injector = (Injector) f.get(esClient); | |
injector.createChildInjector(new SignificantTermsHeuristicModule()); | |
// ======= End hack====== |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import csv | |
import time | |
import datetime | |
import json | |
# This script reads a data file (car test results) and uses it to create a CSV file of query clauses for benchmarking. | |
# It creates mostly well-behaved queries and randomly throws in "problem" queries of various forms | |
# Each CSV row in the output is a set of clauses (query/agg/filter) and some metadata for reporting purposes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import csv | |
from elasticsearch import helpers | |
from elasticsearch.client import Elasticsearch | |
import time | |
csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE) | |
# See http://postcodepal.com/dbgen/postcode_areas_true-centroids.zip | |
pf=open('/Users/Mark/Documents/work/irdata/MOT/postcode_areas.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//import org.apache.lucene.codecs.bloom.FuzzySet; | |
// Extract the doc source to a field | |
doc = ctx._source; | |
// Convert basic array into map for ease of manipulation | |
tagMap = doc.tags.collectEntries{[it.tag, it]}; | |
// Patch the new tags into the data structure, adding one to a usercount |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"title": "eslogs", | |
"services": { | |
"query": { | |
"list": { | |
"0": { | |
"query": "", | |
"alias": "", | |
"color": "#7EB26D", | |
"id": 0, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//================================ | |
// Here a script for gathering the precision/recall stats for a query (see http://en.wikipedia.org/wiki/Precision_and_recall) | |
// A candidate classifier query's effectiveness is determined by counting hits on pre-classified content | |
// If we compute the F-measure we can potentially use it as the fitness function for a genetic algo that mutates our query | |
// (introducing phrases, minShouldMatch clauses etc) to move us towards our target goal of balancing precision/recall in our classifier. | |
//================================= | |
// Our candidate query for classifying documents in a category | |
var candidateQuery={ "terms": {"body": ["vs", "shr", "cts", "net", "revs", "note", "loss", "mths", "shrs", "avg", "profit"]}}; | |
// Our filter criteria for identifying documents in our target category |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
es_host="http://localhost:9200" | |
curl -XDELETE "$es_host/contests" | |
curl -XPUT "$es_host/contests" -d ' | |
{ | |
"settings": { | |
"index.number_of_replicas": 0, | |
"index.number_of_shards": 1, | |
"index.refresh_interval": -1 | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//This function was used in my test rig to convert elasticsearch results to a KML structure which is | |
// later fed to an iFrame wrapping the GoogleEarth plugin | |
var data=[]; | |
var buckets=inData.aggregations.map.buckets; | |
function addCommas(nStr) | |
{ | |
nStr += ''; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Groups a related set of terms, typically from the results of some auto-expansion, | |
// and provides the average DocFreq of the set in order to avoid Lucene's IDF ranking | |
// favouring the rarest interpretation, which is often a poor choice for auto-expanded | |
// terms e.g. the terms produced by a fuzzy query or trying alternative fields | |
class CommonIDFContext { | |
int commonDf = -1; | |
Map<Term, Integer> balancedDfs; | |
List<Term> commonTerms = new ArrayList<Term>(); | |
public void add(Term unbalancedQueryTerm) { |
NewerOlder