Created
February 26, 2014 11:33
-
-
Save markharwood/9227964 to your computer and use it in GitHub Desktop.
Crime anomalies using significant terms aggregation (coming in 1.1)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//This function was used in my test rig to convert elasticsearch results to a KML structure which is | |
// later fed to an iFrame wrapping the GoogleEarth plugin | |
var data=[]; | |
var buckets=inData.aggregations.map.buckets; | |
function addCommas(nStr) | |
{ | |
nStr += ''; | |
x = nStr.split('.'); | |
x1 = x[0]; | |
x2 = x.length > 1 ? '.' + x[1] : ''; | |
var rgx = /(\d+)(\d{3})/; | |
while (rgx.test(x1)) { | |
x1 = x1.replace(rgx, '$1' + ',' + '$2'); | |
} | |
return x1 + x2; | |
} | |
for(var b=0;b<buckets.length;b++) | |
{ | |
var p=decodeGeoHash(buckets[b].key); | |
var cell={}; | |
cellStats=buckets[b].weirdCrimes.buckets[0] | |
cell.bbox=p; | |
cell.height=50*buckets[b].doc_count; | |
cell.numCrimes=buckets[b].doc_count; | |
cell.numIncidents=buckets[b].doc_count; | |
cell.group="Unknown"; | |
cell.mostSignifStatDocCount=0; | |
if(buckets[b].weirdCrimes.buckets.length>0){ | |
mostSignifStat=buckets[b].weirdCrimes.buckets[0]; | |
cell.group=mostSignifStat.key; | |
cell.height=5000*mostSignifStat.significance_score; | |
cell.actualCount=mostSignifStat.doc_count | |
cell.expectedCount=parseInt((mostSignifStat.superset_doc_freq/buckets[b].weirdCrimes.superset_size) | |
*buckets[b].weirdCrimes.subset_size); | |
cell.mostSignifStatDocCount=mostSignifStat.doc_count; | |
} | |
var times=buckets[b].times.buckets; | |
var timesTable="<table border='1' cellpadding='2' cellspacing='0'><tr><th>Month</th><th>Num Incidents</th></tr>"; | |
for(var t=0;t<times.length;t++){ | |
timesTable+="<tr><td>"+times[t]["key_as_string"]+"</td><td>"+addCommas(times[t]["doc_count"])+"</td></tr>" | |
} | |
timesTable+="</table>"; | |
cell.timesTable=timesTable; | |
if((cell.actualCount/cell.expectedCount)>3) //if a big shift | |
if(cell.mostSignifStatDocCount>50) // if corresponding weight of evidence is there | |
data.push(cell); | |
} | |
//Create KML: | |
var colors=["99bde7ff", "e3d754ff", "8ee684ff", "e7974cff", "e4878dff", "67adabff", "43ebccff", "e4b4eaff", "a1a655ff", | |
"78b36eff", "e68364ff", "5fecf0ff", "bee467ff", "dd8bb5ff", "68b492ff", "6fe5a8ff", "dce37eff", "d6a84aff", | |
"b7e794ff", "86d5c4ff", "d6c26aff", "d99964ff", "93d2e8ff", "8f9fbbff", "53a6bdff", "b8abd9ff", "3ebaa6ff", | |
"4ecee0ff", "ce9bbdff", "7ce9d3ff"]; | |
var kml='<?xml version="1.0" encoding="UTF-8"?>' + | |
'<kml xmlns="http://www.opengis.net/kml/2.2">'+ | |
'<Document>'; | |
var uniqueGroups=0; | |
var groupNameToStyleNumMap={}; | |
for (i=0;i<data.length;i++) { | |
var group= data[i].group; | |
if(!(group in groupNameToStyleNumMap)) | |
{ | |
groupNameToStyleNumMap[group]=uniqueGroups++; | |
} | |
data[i].style=groupNameToStyleNumMap[group]; | |
} | |
var colorNum=0; | |
for (group in groupNameToStyleNumMap) { | |
var color=colors[colorNum++]; | |
if(colorNum>=colors.length){ | |
colorNum=0; | |
} | |
kml+='<Style id="style'+groupNameToStyleNumMap[group]+'">'+ | |
'<LineStyle>'+ | |
'<color>'+color+'</color>'+ | |
'<width>4</width>'+ | |
'</LineStyle> '+ | |
' <PolyStyle>'+ | |
' <colorMode>normal</colorMode>'+ | |
'<color>'+color+'</color>'+ | |
'<fill>1</fill>'+ | |
'<outline>1</outline>'+ | |
'</PolyStyle> '+ | |
'</Style>'; | |
} | |
for (i=0;i<data.length;i++) { | |
var bbox=data[i].bbox; | |
var height=data[i].height; | |
var kmlPlace= | |
' <Placemark>' + | |
'<description><hr/>' | |
+'Has '+data[i].actualCount +" incidents of <b>"+data[i].group+'</b> vs expected ' | |
+data[i].expectedCount+"<br/>"+ | |
data[i].timesTable+ | |
'</description>'+ | |
'<name>'+addCommas(data[i].numCrimes)+' crimes</name>'+ | |
'<styleUrl>#style'+data[i].style+'</styleUrl>'+ | |
' <Polygon>' + | |
' <extrude>1</extrude>' + | |
' <altitudeMode>relativeToGround</altitudeMode>' + | |
' <outerBoundaryIs>' + | |
' <LinearRing>' + | |
' <coordinates>' + | |
' '+bbox.longitude[0]+','+bbox.latitude[0]+','+height + | |
' '+bbox.longitude[0]+','+bbox.latitude[1]+','+height + | |
' '+bbox.longitude[1]+','+bbox.latitude[1]+','+height + | |
' '+bbox.longitude[1]+','+bbox.latitude[0]+','+height + | |
' '+bbox.longitude[0]+','+bbox.latitude[0]+','+height + | |
' </coordinates>' + | |
' </LinearRing>' + | |
' </outerBoundaryIs>' + | |
' </Polygon>' + | |
' </Placemark>'; | |
kml+=kmlPlace; | |
} | |
kml+= '</Document></kml>'; | |
return { | |
"showBorders":true, | |
"showRoads":true, | |
"showBuildings":true, | |
"kml": kml | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
The 2 scripts were the main pieces required to produce this visualization: | |
https://twitter.com/elasticmark/status/426717055803330562 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"aggs" : { | |
"map" : { | |
"geohash_grid" : { | |
"field":"location", | |
"precision":5, | |
}, | |
"aggregations":{ | |
"weirdCrimes":{"significant_terms":{"field":"crimeType", "min_doc_count":3}}, | |
"times" : { "date_histogram" : { | |
"field" : "date", | |
"interval" : "month", | |
"format" : "yyyyMM" | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment