-
-
Save jamiewanderi/d3b35b8709299b841563d6913e964a5e to your computer and use it in GitHub Desktop.
Script that geocodes address data stored in a PostgreSQL (PostGIS) database using Google.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## Mike Tafel and Justin Lewis | |
## 2010/07/26 : updated Dec. 2012 | |
## | |
## Script that geocodes address data stored in a PostgreSQL (PostGIS) database using Google. | |
### Preps the db table, reads address data, builds a request url, returns lat/long/precision valuse to the same table, | |
### transforms coordinates to 4326 then 2232. | |
## | |
## Dependancies: | |
#### PostGIS database, Google Private Key and Client ID, all the libraries listed in the import below | |
## | |
import urllib, urllib2, pg, simplejson as json, time, sys, hashlib, hmac, base64, urlparse | |
class TablePrep: | |
def check_columns(self): | |
print "Checking if required fields are in the base table..." | |
check_columns_q = "select column_name from information_schema.columns where table_name = '%(table)s'" % {'table' : table } | |
self.columns = db.query(check_columns_q) | |
self.fieldnames = self.columns.getresult() | |
if ('lat',) in self.fieldnames: | |
print "Table has lat" | |
clear = "UPDATE %(table)s SET lat = null" % {'table' : table } | |
db.query(clear) | |
else: | |
print "Adding lat column..." | |
add_column = "ALTER TABLE %(table)s ADD COLUMN lat double precision;" % {'table' : table } | |
db.query(add_column) | |
if ('long',) in self.fieldnames: | |
print "Table has long" | |
clear = "UPDATE %(table)s SET long = null" % {'table' : table } | |
db.query(clear) | |
else: | |
print "Adding long column..." | |
add_column = "ALTER TABLE %(table)s ADD COLUMN long double precision;" % {'table' : table } | |
db.query(add_column) | |
if ('precision',) in self.fieldnames: | |
print "Table has precision" | |
clear = "UPDATE %(table)s SET precision = null" % {'table' : table } | |
db.query(clear) | |
else: | |
print "Adding precision column..." | |
add_column = "ALTER TABLE %(table)s ADD COLUMN precision character varying (50);" % {'table' : table } | |
db.query(add_column) | |
if ('geom_4326',) in self.fieldnames: | |
print "Table has geom_4326" | |
clear = "UPDATE %(table)s SET geom_2232 = null" % {'table' : table } | |
db.query(clear) | |
else: | |
print "Adding geom_4326 column..." | |
add_column = "SELECT AddGeometryColumn('public', '%(table)s', 'geom_4326', '4326', 'POINT', 2);" % {'table' : table } | |
db.query(add_column) | |
if ('geom_2232',) in self.fieldnames: | |
print "Table has geom_2232" | |
else: | |
print "Adding geom_2232 column..." | |
add_column = "SELECT AddGeometryColumn('public', '%(table)s', 'geom_2232', '2232', 'POINT', 2);" % {'table' : table } | |
db.query(add_column) | |
#### This works but need to determine what the row # is on creation | |
## if ('gid',) in self.fieldnames: | |
## print "Table has gid" | |
## else: | |
## print "Adding gid column..." | |
## add_column = "ALTER TABLE %(table)s ADD COLUMN gid SERIAL;" % {'table' : table } | |
## db.query(add_column) | |
class Geocode_Address: | |
def geocode_single_address_field(self, table, address_row, city_row, zip_row, ojectid_row, oid_name): | |
#### Selecting data from the geocode table | |
selectallSql = "SELECT * FROM %(table)s;" % {'table' : table} ## add where clause variable | |
sel = db.query(selectallSql) | |
result = sel.getresult() | |
rows = len(result) | |
#### Set url variables | |
base_url = "http://maps.google.com/maps/api/geocode/json?" | |
privateKey = "YOUR_PRIVATE_KEY=" | |
client_id = "YOUR_CLIENT_ID" | |
x = 0 | |
while x < rows: | |
print " " | |
row = result[x] | |
address_field = str(row[address_row]) | |
city_field = str(row[city_row]) | |
zip_field = str(row[zip_row]) | |
objectid_field = int(row[ojectid_row]) | |
values = { 'sensor' : 'false','address' :address_field+' '+city_field+', CO '+zip_field, 'client':client_id} | |
data = urllib.urlencode(values) | |
print values | |
#### build request url | |
request_url = base_url+data | |
url = urlparse.urlparse(request_url) | |
urlToSign = url.path + "?"+ url.query | |
decodedKey = base64.urlsafe_b64decode(privateKey) | |
signature = hmac.new(decodedKey, urlToSign, hashlib.sha1) | |
encodedSignature = base64.urlsafe_b64encode(signature.digest()) | |
originalUrl = url.scheme + "://" + url.netloc + url.path + "?" + url.query | |
geocode_url = originalUrl + "&signature=" + encodedSignature | |
print geocode_url | |
#### Send request | |
start = time.time() | |
response = urllib2.urlopen(geocode_url) | |
output = response.read() | |
#### Read and parse json output. | |
data = json.loads(output) | |
status = data["status"] | |
print status | |
if status == "OVER_QUERY_LIMIT": | |
x = rows | |
print "over the query limit" | |
elif status == "OK": | |
long = data["results"][0]["geometry"]["location"]["lng"] | |
lat = data["results"][0]["geometry"]["location"]["lat"] | |
precision = data["results"][0]["geometry"]["location_type"] | |
if long < -106 or long > -104 or lat > 41 or lat < 38: | |
update_query = "UPDATE %(table)s SET lat = -2, precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'prec': precision, 'table' : table, 'oid_name' : oid_name} | |
db.query(update_query) | |
print "out of region" | |
else: | |
update_query = "UPDATE %(table)s SET long = '%(long)s', lat = '%(lat)s', precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'long' : long, 'lat' : lat, 'prec': precision, 'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} | |
db.query(update_query) | |
print precision | |
elif status != "OK": | |
#### Update the data | |
update_query = "UPDATE %(table)s SET lat = -1 WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} | |
db.query(update_query) | |
else: | |
pass | |
x = x+1 | |
class Geocode_Intersection: | |
def intersection(self, table, address_row_1, address_row_2, city_row, ojectid_row, oid_name): | |
#### Selecting data from the geocode table | |
selectallSql = "SELECT * FROM %(table)s;" % {'table' : table} ## add where clause variable | |
sel = db.query(selectallSql) | |
result = sel.getresult() | |
rows = len(result) | |
#### Set url variables | |
base_url = "http://maps.google.com/maps/api/geocode/json?" | |
privateKey = "0bTz96TLfiOmPIwGLc0SgFl2ZBQ=" | |
client_id = "gme-denverregionalcouncil" | |
x = 0 | |
while x < rows: | |
print " " | |
row = result[x] | |
address_1 = str(row[address_row_1]) | |
address_2 = str(row[address_row_2]) | |
city_field = str(row[city_row]) | |
## zip_field = str(row[zip_row]) | |
objectid_field = row[ojectid_row] | |
values = { 'sensor' : 'false','address' :address_1+' & '+address_2+' '+city_field+', CO ', 'client':client_id} | |
data = urllib.urlencode(values) | |
print values | |
#### build request url | |
request_url = base_url+data | |
url = urlparse.urlparse(request_url) | |
urlToSign = url.path + "?"+ url.query | |
decodedKey = base64.urlsafe_b64decode(privateKey) | |
signature = hmac.new(decodedKey, urlToSign, hashlib.sha1) | |
encodedSignature = base64.urlsafe_b64encode(signature.digest()) | |
originalUrl = url.scheme + "://" + url.netloc + url.path + "?" + url.query | |
geocode_url = originalUrl + "&signature=" + encodedSignature | |
print geocode_url | |
#### Send request | |
start = time.time() | |
response = urllib2.urlopen(geocode_url) | |
output = response.read() | |
#### Read and parse json output. | |
data = json.loads(output) | |
status = data["status"] | |
print status | |
if status == "OVER_QUERY_LIMIT": | |
x = rows | |
print "over the query limit" | |
elif status == "OK": | |
long = data["results"][0]["geometry"]["location"]["lng"] | |
lat = data["results"][0]["geometry"]["location"]["lat"] | |
precision = data["results"][0]["geometry"]["location_type"] | |
if long < -106 or long > -104 or lat > 41 or lat < 38: | |
update_query = "UPDATE %(table)s SET lat = -2, precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'prec': precision, 'table' : table, 'oid_name' : oid_name} | |
db.query(update_query) | |
print "out of region" | |
else: | |
update_query = "UPDATE %(table)s SET long = '%(long)s', lat = '%(lat)s', precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'long' : long, 'lat' : lat, 'prec': precision, 'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} | |
db.query(update_query) | |
print precision | |
elif status != "OK": | |
#### Update the data | |
update_query = "UPDATE %(table)s SET lat = -1 WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} | |
db.query(update_query) | |
else: | |
pass | |
x = x+1 | |
class Build_Geometry: | |
def populate_geom(self, table): | |
print "Populating the geom_4326 column..." | |
popGeom = "UPDATE %(table)s SET geom_4326 = PointFromText('POINT(' || long|| ' ' || lat|| ')', 4326) WHERE lat > 0" % {'table' : table} | |
db.query(popGeom) | |
print "Populating the geom_2232 column..." | |
transformGeom = "UPDATE %(table)s SET geom_2232 = (SELECT ST_Transform(geom_4326, 2232))" % {'table' : table} | |
db.query(transformGeom) | |
def drop_geom_4326(self, table): | |
print "Dropping the geom_4326 column." | |
popGeom = """ALTER TABLE %(table)s DROP COLUMN geom_4326 | |
DELETE FROM geometry_columns WHERE f_table_name = '%(table)s' AND f_geometry_column = 'geom_4326';""" % {'table' : table} | |
db.query(popGeom) | |
def pop_geom_table(self, table): | |
print "Populating the geometry_columns table..." | |
popGeomTable = "SELECT Populate_Geometry_Columns('public.%(table)s'::regclass);" % {'table' : table} | |
db.query(popGeomTable) | |
if __name__ == '__main__': | |
#DB connection properties | |
db = pg.connect(dbname = 'geocode', host= 'postgresql', port= 5432, user = 'USER',passwd= 'PASS') | |
#### Set this variable to the name of the table to be geocoded. | |
table = 'geocode_test' | |
tablePrep = TablePrep() | |
tablePrep.check_columns() | |
#### FOR GEOCODING STANDARD ADDRESS DATA | |
#### REQUIRES: single address field, city, zip, objectid | |
#### geocode_single_address_field(table name, address row #, city row #, zip row #, ojectid row #) | |
geocode = Geocode_Address() | |
geocode.geocode_single_address_field(table, 6, 2, 7, 0, 'gid') | |
#### FOR GEOCODING INTERSECTION DATA | |
#### REQUIRES: 1 field for each intersecting road, city, objectid | |
#### intersection(table name, address row 1 #, address row 2 #, city row #, ojectid row #) | |
## geocode = Geocode_Intersection() | |
## geocode.intersection(table, 0, 1, 2, 22, 'gid') | |
build_geom = Build_Geometry() | |
build_geom.populate_geom(table) | |
build_geom.drop_geom_4326(table) | |
build_geom.pop_geom_table(table) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment