Skip to content

Instantly share code, notes, and snippets.

@markharwood
Created February 26, 2014 11:33
Show Gist options
  • Save markharwood/9227964 to your computer and use it in GitHub Desktop.
Save markharwood/9227964 to your computer and use it in GitHub Desktop.
Crime anomalies using significant terms aggregation (coming in 1.1)
//This function was used in my test rig to convert elasticsearch results to a KML structure which is
// later fed to an iFrame wrapping the GoogleEarth plugin
var data=[];
var buckets=inData.aggregations.map.buckets;
function addCommas(nStr)
{
nStr += '';
x = nStr.split('.');
x1 = x[0];
x2 = x.length > 1 ? '.' + x[1] : '';
var rgx = /(\d+)(\d{3})/;
while (rgx.test(x1)) {
x1 = x1.replace(rgx, '$1' + ',' + '$2');
}
return x1 + x2;
}
for(var b=0;b<buckets.length;b++)
{
var p=decodeGeoHash(buckets[b].key);
var cell={};
cellStats=buckets[b].weirdCrimes.buckets[0]
cell.bbox=p;
cell.height=50*buckets[b].doc_count;
cell.numCrimes=buckets[b].doc_count;
cell.numIncidents=buckets[b].doc_count;
cell.group="Unknown";
cell.mostSignifStatDocCount=0;
if(buckets[b].weirdCrimes.buckets.length>0){
mostSignifStat=buckets[b].weirdCrimes.buckets[0];
cell.group=mostSignifStat.key;
cell.height=5000*mostSignifStat.significance_score;
cell.actualCount=mostSignifStat.doc_count
cell.expectedCount=parseInt((mostSignifStat.superset_doc_freq/buckets[b].weirdCrimes.superset_size)
*buckets[b].weirdCrimes.subset_size);
cell.mostSignifStatDocCount=mostSignifStat.doc_count;
}
var times=buckets[b].times.buckets;
var timesTable="<table border='1' cellpadding='2' cellspacing='0'><tr><th>Month</th><th>Num Incidents</th></tr>";
for(var t=0;t<times.length;t++){
timesTable+="<tr><td>"+times[t]["key_as_string"]+"</td><td>"+addCommas(times[t]["doc_count"])+"</td></tr>"
}
timesTable+="</table>";
cell.timesTable=timesTable;
if((cell.actualCount/cell.expectedCount)>3) //if a big shift
if(cell.mostSignifStatDocCount>50) // if corresponding weight of evidence is there
data.push(cell);
}
//Create KML:
var colors=["99bde7ff", "e3d754ff", "8ee684ff", "e7974cff", "e4878dff", "67adabff", "43ebccff", "e4b4eaff", "a1a655ff",
"78b36eff", "e68364ff", "5fecf0ff", "bee467ff", "dd8bb5ff", "68b492ff", "6fe5a8ff", "dce37eff", "d6a84aff",
"b7e794ff", "86d5c4ff", "d6c26aff", "d99964ff", "93d2e8ff", "8f9fbbff", "53a6bdff", "b8abd9ff", "3ebaa6ff",
"4ecee0ff", "ce9bbdff", "7ce9d3ff"];
var kml='<?xml version="1.0" encoding="UTF-8"?>' +
'<kml xmlns="http://www.opengis.net/kml/2.2">'+
'<Document>';
var uniqueGroups=0;
var groupNameToStyleNumMap={};
for (i=0;i<data.length;i++) {
var group= data[i].group;
if(!(group in groupNameToStyleNumMap))
{
groupNameToStyleNumMap[group]=uniqueGroups++;
}
data[i].style=groupNameToStyleNumMap[group];
}
var colorNum=0;
for (group in groupNameToStyleNumMap) {
var color=colors[colorNum++];
if(colorNum>=colors.length){
colorNum=0;
}
kml+='<Style id="style'+groupNameToStyleNumMap[group]+'">'+
'<LineStyle>'+
'<color>'+color+'</color>'+
'<width>4</width>'+
'</LineStyle> '+
' <PolyStyle>'+
' <colorMode>normal</colorMode>'+
'<color>'+color+'</color>'+
'<fill>1</fill>'+
'<outline>1</outline>'+
'</PolyStyle> '+
'</Style>';
}
for (i=0;i<data.length;i++) {
var bbox=data[i].bbox;
var height=data[i].height;
var kmlPlace=
' <Placemark>' +
'<description><hr/>'
+'Has '+data[i].actualCount +" incidents of <b>"+data[i].group+'</b> vs expected '
+data[i].expectedCount+"<br/>"+
data[i].timesTable+
'</description>'+
'<name>'+addCommas(data[i].numCrimes)+' crimes</name>'+
'<styleUrl>#style'+data[i].style+'</styleUrl>'+
' <Polygon>' +
' <extrude>1</extrude>' +
' <altitudeMode>relativeToGround</altitudeMode>' +
' <outerBoundaryIs>' +
' <LinearRing>' +
' <coordinates>' +
' '+bbox.longitude[0]+','+bbox.latitude[0]+','+height +
' '+bbox.longitude[0]+','+bbox.latitude[1]+','+height +
' '+bbox.longitude[1]+','+bbox.latitude[1]+','+height +
' '+bbox.longitude[1]+','+bbox.latitude[0]+','+height +
' '+bbox.longitude[0]+','+bbox.latitude[0]+','+height +
' </coordinates>' +
' </LinearRing>' +
' </outerBoundaryIs>' +
' </Polygon>' +
' </Placemark>';
kml+=kmlPlace;
}
kml+= '</Document></kml>';
return {
"showBorders":true,
"showRoads":true,
"showBuildings":true,
"kml": kml
}
The 2 scripts were the main pieces required to produce this visualization:
https://twitter.com/elasticmark/status/426717055803330562
"aggs" : {
"map" : {
"geohash_grid" : {
"field":"location",
"precision":5,
},
"aggregations":{
"weirdCrimes":{"significant_terms":{"field":"crimeType", "min_doc_count":3}},
"times" : { "date_histogram" : {
"field" : "date",
"interval" : "month",
"format" : "yyyyMM"
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment