Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jamiewanderi/d3b35b8709299b841563d6913e964a5e to your computer and use it in GitHub Desktop.
Save jamiewanderi/d3b35b8709299b841563d6913e964a5e to your computer and use it in GitHub Desktop.
Script that geocodes address data stored in a PostgreSQL (PostGIS) database using Google.
##
## Mike Tafel and Justin Lewis
## 2010/07/26 : updated Dec. 2012
##
## Script that geocodes address data stored in a PostgreSQL (PostGIS) database using Google.
### Preps the db table, reads address data, builds a request url, returns lat/long/precision valuse to the same table,
### transforms coordinates to 4326 then 2232.
##
## Dependancies:
#### PostGIS database, Google Private Key and Client ID, all the libraries listed in the import below
##
import urllib, urllib2, pg, simplejson as json, time, sys, hashlib, hmac, base64, urlparse
class TablePrep:
def check_columns(self):
print "Checking if required fields are in the base table..."
check_columns_q = "select column_name from information_schema.columns where table_name = '%(table)s'" % {'table' : table }
self.columns = db.query(check_columns_q)
self.fieldnames = self.columns.getresult()
if ('lat',) in self.fieldnames:
print "Table has lat"
clear = "UPDATE %(table)s SET lat = null" % {'table' : table }
db.query(clear)
else:
print "Adding lat column..."
add_column = "ALTER TABLE %(table)s ADD COLUMN lat double precision;" % {'table' : table }
db.query(add_column)
if ('long',) in self.fieldnames:
print "Table has long"
clear = "UPDATE %(table)s SET long = null" % {'table' : table }
db.query(clear)
else:
print "Adding long column..."
add_column = "ALTER TABLE %(table)s ADD COLUMN long double precision;" % {'table' : table }
db.query(add_column)
if ('precision',) in self.fieldnames:
print "Table has precision"
clear = "UPDATE %(table)s SET precision = null" % {'table' : table }
db.query(clear)
else:
print "Adding precision column..."
add_column = "ALTER TABLE %(table)s ADD COLUMN precision character varying (50);" % {'table' : table }
db.query(add_column)
if ('geom_4326',) in self.fieldnames:
print "Table has geom_4326"
clear = "UPDATE %(table)s SET geom_2232 = null" % {'table' : table }
db.query(clear)
else:
print "Adding geom_4326 column..."
add_column = "SELECT AddGeometryColumn('public', '%(table)s', 'geom_4326', '4326', 'POINT', 2);" % {'table' : table }
db.query(add_column)
if ('geom_2232',) in self.fieldnames:
print "Table has geom_2232"
else:
print "Adding geom_2232 column..."
add_column = "SELECT AddGeometryColumn('public', '%(table)s', 'geom_2232', '2232', 'POINT', 2);" % {'table' : table }
db.query(add_column)
#### This works but need to determine what the row # is on creation
## if ('gid',) in self.fieldnames:
## print "Table has gid"
## else:
## print "Adding gid column..."
## add_column = "ALTER TABLE %(table)s ADD COLUMN gid SERIAL;" % {'table' : table }
## db.query(add_column)
class Geocode_Address:
def geocode_single_address_field(self, table, address_row, city_row, zip_row, ojectid_row, oid_name):
#### Selecting data from the geocode table
selectallSql = "SELECT * FROM %(table)s;" % {'table' : table} ## add where clause variable
sel = db.query(selectallSql)
result = sel.getresult()
rows = len(result)
#### Set url variables
base_url = "http://maps.google.com/maps/api/geocode/json?"
privateKey = "YOUR_PRIVATE_KEY="
client_id = "YOUR_CLIENT_ID"
x = 0
while x < rows:
print " "
row = result[x]
address_field = str(row[address_row])
city_field = str(row[city_row])
zip_field = str(row[zip_row])
objectid_field = int(row[ojectid_row])
values = { 'sensor' : 'false','address' :address_field+' '+city_field+', CO '+zip_field, 'client':client_id}
data = urllib.urlencode(values)
print values
#### build request url
request_url = base_url+data
url = urlparse.urlparse(request_url)
urlToSign = url.path + "?"+ url.query
decodedKey = base64.urlsafe_b64decode(privateKey)
signature = hmac.new(decodedKey, urlToSign, hashlib.sha1)
encodedSignature = base64.urlsafe_b64encode(signature.digest())
originalUrl = url.scheme + "://" + url.netloc + url.path + "?" + url.query
geocode_url = originalUrl + "&signature=" + encodedSignature
print geocode_url
#### Send request
start = time.time()
response = urllib2.urlopen(geocode_url)
output = response.read()
#### Read and parse json output.
data = json.loads(output)
status = data["status"]
print status
if status == "OVER_QUERY_LIMIT":
x = rows
print "over the query limit"
elif status == "OK":
long = data["results"][0]["geometry"]["location"]["lng"]
lat = data["results"][0]["geometry"]["location"]["lat"]
precision = data["results"][0]["geometry"]["location_type"]
if long < -106 or long > -104 or lat > 41 or lat < 38:
update_query = "UPDATE %(table)s SET lat = -2, precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'prec': precision, 'table' : table, 'oid_name' : oid_name}
db.query(update_query)
print "out of region"
else:
update_query = "UPDATE %(table)s SET long = '%(long)s', lat = '%(lat)s', precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'long' : long, 'lat' : lat, 'prec': precision, 'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name}
db.query(update_query)
print precision
elif status != "OK":
#### Update the data
update_query = "UPDATE %(table)s SET lat = -1 WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name}
db.query(update_query)
else:
pass
x = x+1
class Geocode_Intersection:
def intersection(self, table, address_row_1, address_row_2, city_row, ojectid_row, oid_name):
#### Selecting data from the geocode table
selectallSql = "SELECT * FROM %(table)s;" % {'table' : table} ## add where clause variable
sel = db.query(selectallSql)
result = sel.getresult()
rows = len(result)
#### Set url variables
base_url = "http://maps.google.com/maps/api/geocode/json?"
privateKey = "0bTz96TLfiOmPIwGLc0SgFl2ZBQ="
client_id = "gme-denverregionalcouncil"
x = 0
while x < rows:
print " "
row = result[x]
address_1 = str(row[address_row_1])
address_2 = str(row[address_row_2])
city_field = str(row[city_row])
## zip_field = str(row[zip_row])
objectid_field = row[ojectid_row]
values = { 'sensor' : 'false','address' :address_1+' & '+address_2+' '+city_field+', CO ', 'client':client_id}
data = urllib.urlencode(values)
print values
#### build request url
request_url = base_url+data
url = urlparse.urlparse(request_url)
urlToSign = url.path + "?"+ url.query
decodedKey = base64.urlsafe_b64decode(privateKey)
signature = hmac.new(decodedKey, urlToSign, hashlib.sha1)
encodedSignature = base64.urlsafe_b64encode(signature.digest())
originalUrl = url.scheme + "://" + url.netloc + url.path + "?" + url.query
geocode_url = originalUrl + "&signature=" + encodedSignature
print geocode_url
#### Send request
start = time.time()
response = urllib2.urlopen(geocode_url)
output = response.read()
#### Read and parse json output.
data = json.loads(output)
status = data["status"]
print status
if status == "OVER_QUERY_LIMIT":
x = rows
print "over the query limit"
elif status == "OK":
long = data["results"][0]["geometry"]["location"]["lng"]
lat = data["results"][0]["geometry"]["location"]["lat"]
precision = data["results"][0]["geometry"]["location_type"]
if long < -106 or long > -104 or lat > 41 or lat < 38:
update_query = "UPDATE %(table)s SET lat = -2, precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'prec': precision, 'table' : table, 'oid_name' : oid_name}
db.query(update_query)
print "out of region"
else:
update_query = "UPDATE %(table)s SET long = '%(long)s', lat = '%(lat)s', precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'long' : long, 'lat' : lat, 'prec': precision, 'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name}
db.query(update_query)
print precision
elif status != "OK":
#### Update the data
update_query = "UPDATE %(table)s SET lat = -1 WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name}
db.query(update_query)
else:
pass
x = x+1
class Build_Geometry:
def populate_geom(self, table):
print "Populating the geom_4326 column..."
popGeom = "UPDATE %(table)s SET geom_4326 = PointFromText('POINT(' || long|| ' ' || lat|| ')', 4326) WHERE lat > 0" % {'table' : table}
db.query(popGeom)
print "Populating the geom_2232 column..."
transformGeom = "UPDATE %(table)s SET geom_2232 = (SELECT ST_Transform(geom_4326, 2232))" % {'table' : table}
db.query(transformGeom)
def drop_geom_4326(self, table):
print "Dropping the geom_4326 column."
popGeom = """ALTER TABLE %(table)s DROP COLUMN geom_4326
DELETE FROM geometry_columns WHERE f_table_name = '%(table)s' AND f_geometry_column = 'geom_4326';""" % {'table' : table}
db.query(popGeom)
def pop_geom_table(self, table):
print "Populating the geometry_columns table..."
popGeomTable = "SELECT Populate_Geometry_Columns('public.%(table)s'::regclass);" % {'table' : table}
db.query(popGeomTable)
if __name__ == '__main__':
#DB connection properties
db = pg.connect(dbname = 'geocode', host= 'postgresql', port= 5432, user = 'USER',passwd= 'PASS')
#### Set this variable to the name of the table to be geocoded.
table = 'geocode_test'
tablePrep = TablePrep()
tablePrep.check_columns()
#### FOR GEOCODING STANDARD ADDRESS DATA
#### REQUIRES: single address field, city, zip, objectid
#### geocode_single_address_field(table name, address row #, city row #, zip row #, ojectid row #)
geocode = Geocode_Address()
geocode.geocode_single_address_field(table, 6, 2, 7, 0, 'gid')
#### FOR GEOCODING INTERSECTION DATA
#### REQUIRES: 1 field for each intersecting road, city, objectid
#### intersection(table name, address row 1 #, address row 2 #, city row #, ojectid row #)
## geocode = Geocode_Intersection()
## geocode.intersection(table, 0, 1, 2, 22, 'gid')
build_geom = Build_Geometry()
build_geom.populate_geom(table)
build_geom.drop_geom_4326(table)
build_geom.pop_geom_table(table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment