Created
September 30, 2014 20:00
-
-
Save KhepryQuixote/4109506fbde3133a71e0 to your computer and use it in GitHub Desktop.
Python scripts to download, extract, and output oil, gas, and/or fracking well information from various states in the United States of America.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
This Python script is oriented towards oil, gas, and fracking well | |
information download, extract, and output at the state-level within | |
the United States of America. Not all states are present as not all | |
states either allow fracking or have any sites available for download. | |
This Python script is depended upon by another Python script entitled | |
PyFrackETL.py, which is also available as a Gist under the same name. | |
''' | |
import collections | |
import csv | |
import ogr | |
import os | |
import pyodbc | |
import shutil | |
import tempfile | |
import time | |
import xlrd | |
import zipfile | |
# from dbfpy import dbf | |
class Src2Tgt: | |
cp1252 = { | |
# from http://www.microsoft.com/typography/unicode/1252.htm | |
u"\u20AC": u"\x80", # EURO SIGN | |
u"\u201A": u"\x82", # SINGLE LOW-9 QUOTATION MARK | |
u"\u0192": u"\x83", # LATIN SMALL LETTER F WITH HOOK | |
u"\u201E": u"\x84", # DOUBLE LOW-9 QUOTATION MARK | |
u"\u2026": u"\x85", # HORIZONTAL ELLIPSIS | |
u"\u2020": u"\x86", # DAGGER | |
u"\u2021": u"\x87", # DOUBLE DAGGER | |
u"\u02C6": u"\x88", # MODIFIER LETTER CIRCUMFLEX ACCENT | |
u"\u2030": u"\x89", # PER MILLE SIGN | |
u"\u0160": u"\x8A", # LATIN CAPITAL LETTER S WITH CARON | |
u"\u2039": u"\x8B", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK | |
u"\u0152": u"\x8C", # LATIN CAPITAL LIGATURE OE | |
u"\u017D": u"\x8E", # LATIN CAPITAL LETTER Z WITH CARON | |
u"\u2018": u"\x91", # LEFT SINGLE QUOTATION MARK | |
u"\u2019": u"\x92", # RIGHT SINGLE QUOTATION MARK | |
u"\u201C": u"\x93", # LEFT DOUBLE QUOTATION MARK | |
u"\u201D": u"\x94", # RIGHT DOUBLE QUOTATION MARK | |
u"\u2022": u"\x95", # BULLET | |
u"\u2013": u"\x96", # EN DASH | |
u"\u2014": u"\x97", # EM DASH | |
u"\u02DC": u"\x98", # SMALL TILDE | |
u"\u2122": u"\x99", # TRADE MARK SIGN | |
u"\u0161": u"\x9A", # LATIN SMALL LETTER S WITH CARON | |
u"\u203A": u"\x9B", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | |
u"\u0153": u"\x9C", # LATIN SMALL LIGATURE OE | |
u"\u017E": u"\x9E", # LATIN SMALL LETTER Z WITH CARON | |
u"\u0178": u"\x9F", # LATIN CAPITAL LETTER Y WITH DIAERESIS | |
} | |
def __init__(self): | |
return | |
# ------------------------------------------------------------------------- | |
# define the CSV to KML file method | |
# ------------------------------------------------------------------------- | |
def csv2kml(self, csvFile, tgtPath, kmlFile, maxRecords, flushCount): | |
print ("") | |
print ("========================") | |
print ("CSV to KML conversion...") | |
print ("------------------------") | |
ogr.UseExceptions() | |
# expand any leading tilde | |
# to the user's home path | |
csvFile = os.path.expanduser(csvFile) | |
kmlFile = os.path.expanduser(kmlFile) | |
# verify that SHP file exists | |
if not os.path.exists(csvFile): | |
print ("CSV file '%s' does NOT exist!" % csvFile) | |
return | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if kmlFile != "": | |
tgtFldr = os.path.dirname(kmlFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("csvFile = %s" % csvFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(csvFile).lower().replace('.csv','.kml').replace('.txt','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
csvreader=csv.reader(open(csvFile,'r')) | |
headers=csvreader.next() | |
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile) | |
kmlLyr = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0]) | |
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column) | |
field_def = ogr.FieldDefn(field) | |
kmlLyr.CreateField(field_def) | |
bgnTime = time.time() | |
rows = 0 | |
for rec in csvreader: | |
feat = ogr.Feature(kmlLyr.GetLayerDefn()) | |
for i,field in enumerate(headers[:-1]): #skip kmlgeometry | |
feat.SetField(field, rec[i]) | |
feat.SetGeometry(ogr.CreateGeometryFromGML(rec[-1])) | |
kmlLyr.CreateFeature(feat) | |
rows += 1 | |
if rows % flushCount == 0: | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
del kmlLyr,kmlDs | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the DBF file to CSV file method | |
# ------------------------------------------------------------------------- | |
def dbf2csv(self, dbfFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("DBF to CSV file conversion...") | |
print("-----------------------------") | |
print("dbfFile '%s'" % dbfFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
dbfFile = os.path.expanduser(dbfFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that DBF file exists | |
if not os.path.exists(dbfFile): | |
print ("DBF file '%s' does NOT exist!" % dbfFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(dbfFile).lower().replace('.dbf','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
dbffile = dbf.Dbf(dbfFile) | |
colNames = [] | |
for colName in dbffile.header.fields: | |
colNames.append(colName) | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.Writer(fWriter, delimiter=csvDelimiter) | |
csvwriter.writerow(colNames) | |
bgnTime = time.time() | |
rows = 0 | |
for row in dbffile: | |
csvwriter.writerow(row) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
dbffile.close() | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the KML to CSV file method | |
# ------------------------------------------------------------------------- | |
def kml2csv(self, kmlFile, tgtPath, csvFile, maxRecords, flushCount): | |
return | |
# ------------------------------------------------------------------------- | |
# define the MDB to CSV file method | |
# ------------------------------------------------------------------------- | |
def mdb2csv(self, mdbFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("MDB to CSV file conversion...") | |
print("-----------------------------") | |
print("mdbFile '%s'" % mdbFile) | |
print("csvFile '%s'" % csvFile) | |
print("tblName '%s'" % tblName) | |
# expand any leading tilde | |
# to the user's home path | |
mdbFile = os.path.expanduser(mdbFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that MDB file exists | |
if not os.path.exists(mdbFile): | |
print ("TAB file '%s' does NOT exist!" % mdbFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(mdbFile).lower().replace('.mdb','.csv').replace('.accdb','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
bgnTime = time.time() | |
mdbFile = os.path.abspath(mdbFile) | |
connString = "Driver={Microsoft Access Driver (*.mdb)};DBQ=%s;" % mdbFile | |
print ("pyODBC connString: %s" % connString) | |
conn = pyodbc.connect(connString) | |
cursor = conn.cursor() | |
colNames = collections.OrderedDict() | |
colValues = collections.OrderedDict() | |
for row in cursor.columns(table='%s' % tblName): | |
colNames[row.column_name] = row.column_name | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=colNames) | |
csvwriter.writeheader() | |
cursor.execute("select * from [%s]" % tblName) | |
rows = 0 | |
for row in cursor: | |
colValues.clear() | |
idx = 0 | |
for colName in colNames: | |
colValues[colName] = row[idx] | |
idx += 1 | |
csvwriter.writerow(colValues) | |
rows += 1 | |
if rows % flushCount == 0: | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
conn.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the MDB to CSV file method | |
# ------------------------------------------------------------------------- | |
def pdf2csv(self, pdfFile, tgtPath, csvFile, maxRecords, flushCount): | |
return | |
# ------------------------------------------------------------------------- | |
# define the SHP to CSV file method | |
# ------------------------------------------------------------------------- | |
def shp2csv(self, shpFile, tgtPath, csvFile, maxRecords, flushCount): | |
print ("") | |
print ("========================") | |
print ("SHP to CSV conversion...") | |
print ("------------------------") | |
# expand any leading tilde | |
# to the user's home path | |
shpFile = os.path.expanduser(shpFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that SHP file exists | |
if not os.path.exists(shpFile): | |
print ("SHP file '%s' does NOT exist!" % shpFile) | |
return | |
elif not shpFile.lower().endswith(".shp"): | |
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile) | |
return | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("shpFile = %s" % shpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# Open files | |
csvfile=open(csvFile,'w', newline='') | |
shpDs=ogr.Open(shpFile) | |
shpLayer=shpDs.GetLayer() | |
# Get field names | |
shpDfn=shpLayer.GetLayerDefn() | |
nfields=shpDfn.GetFieldCount() | |
headers=[] | |
for i in range(nfields): | |
headers.append(shpDfn.GetFieldDefn(i).GetName()) | |
headers.append('kmlgeometry') | |
csvwriter = csv.DictWriter(csvfile, headers) | |
try: | |
csvwriter.writeheader() #python 2.7+ | |
except: | |
csvfile.write(','.join(headers)+'\n') | |
bgnTime = time.time() | |
# Write attributes and kml geometry out to csv | |
rows = 0 | |
for shpFeat in shpLayer: | |
try: | |
attributes=shpFeat.items() | |
shpGeom=shpFeat.GetGeometryRef() | |
try: | |
attributes['kmlgeometry']=shpGeom.ExportToKML() | |
except AttributeError as e: | |
attributes['kmlgeometry']="" | |
print (e) | |
csvwriter.writerow(attributes) | |
except UnicodeEncodeError as e: | |
print (e) | |
rows += 1 | |
if rows % flushCount == 0: | |
csvfile.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
# clean up | |
del csvwriter,shpLayer,shpDs | |
csvfile.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the SHP to KML file method | |
# ------------------------------------------------------------------------- | |
def shp2kml(self, shpFile, tgtPath, kmlFile, maxRecords, flushCount): | |
print ("") | |
print ("========================") | |
print ("SHP to KML conversion...") | |
print ("------------------------") | |
# expand any leading tilde | |
# to the user's home path | |
shpFile = os.path.expanduser(shpFile) | |
kmlFile = os.path.expanduser(kmlFile) | |
# verify that SHP file exists | |
if not os.path.exists(shpFile): | |
print ("SHP file '%s' does NOT exist!" % shpFile) | |
return | |
elif not shpFile.lower().endswith(".shp"): | |
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile) | |
return | |
# make sure the KML target folder exists, | |
# creating it recursively if it does not | |
if kmlFile != "": | |
tgtFldr = os.path.dirname(kmlFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("shpFile = %s" % shpFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
# Open files | |
shpDs=ogr.Open(shpFile) | |
shpLayer=shpDs.GetLayer() | |
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile) | |
kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0]) | |
# Get field names | |
shpDfn=shpLayer.GetLayerDefn() | |
nfields=shpDfn.GetFieldCount() | |
headers=[] | |
for i in range(nfields): | |
headers.append(shpDfn.GetFieldDefn(i).GetName()) | |
field = shpDfn.GetFieldDefn(i).GetName() | |
field_def = ogr.FieldDefn(field) | |
kmlLayer.CreateField(field_def) | |
headers.append('kmlgeometry') | |
bgnTime = time.time() | |
# Write attributes and kml out to csv | |
rows = 0 | |
for shpFeat in shpLayer: | |
attributes=shpFeat.items() | |
shpGeom=shpFeat.GetGeometryRef() | |
attributes['kmlgeometry']=shpGeom.ExportToKML() | |
# print (attributes) | |
kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn()) | |
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column) | |
kmlFeat.SetField(field, attributes[field]) | |
kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry'])) | |
kmlLayer.CreateFeature(kmlFeat) | |
rows += 1 | |
if rows % flushCount == 0: | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("Rows: {:,}".format(rows)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
# clean up | |
del shpLayer,shpDs,kmlLayer,kmlDs | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the SHP to KML and CSV files method | |
# ------------------------------------------------------------------------- | |
def shp2kmlcsv(self, shpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount): | |
print ("") | |
print ("================================") | |
print ("SHP to KML and CSV conversion...") | |
print ("--------------------------------") | |
# expand any leading tilde | |
# to the user's home path | |
shpFile = os.path.expanduser(shpFile) | |
kmlFile = os.path.expanduser(kmlFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that SHP file exists | |
if not os.path.exists(shpFile): | |
print ("SHP file '%s' does NOT exist!" % shpFile) | |
return | |
elif not shpFile.lower().endswith(".shp"): | |
print ("SHP file name '%s' does NOT end with '.shp'!" % shpFile) | |
return | |
# make sure the KML target folder exists, | |
# creating it recursively if it does not | |
if kmlFile != "": | |
tgtFldr = os.path.dirname(kmlFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
print ("shpFile = %s" % shpFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(shpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# Open files | |
csvfile=open(csvFile,'w', newline='') | |
shpDs=ogr.Open(shpFile) | |
shpLayer=shpDs.GetLayer() | |
kmlDs = ogr.GetDriverByName('KML').CreateDataSource(kmlFile) | |
kmlLayer = kmlDs.CreateLayer(os.path.splitext(os.path.basename(kmlFile))[0]) | |
# Get field names | |
shpDfn=shpLayer.GetLayerDefn() | |
nfields=shpDfn.GetFieldCount() | |
headers=[] | |
for i in range(nfields): | |
headers.append(shpDfn.GetFieldDefn(i).GetName()) | |
field = shpDfn.GetFieldDefn(i).GetName() | |
field_def = ogr.FieldDefn(field) | |
kmlLayer.CreateField(field_def) | |
headers.append('kmlgeometry') | |
csvwriter = csv.DictWriter(csvfile, headers) | |
try: | |
csvwriter.writeheader() #python 2.7+ | |
except: | |
header = ','.join(headers) + '\n' | |
csvfile.write(header.encode('utf-8')) | |
bgnTime = time.time() | |
# Write attributes and kml out to csv | |
rows = 0 | |
for shpFeat in shpLayer: | |
attributes=shpFeat.items() | |
shpGeom=shpFeat.GetGeometryRef() | |
attributes['kmlgeometry']=shpGeom.ExportToKML() | |
csvwriter.writerow(attributes) | |
kmlFeat = ogr.Feature(kmlLayer.GetLayerDefn()) | |
for field in headers[:-1]: #skip kmlgeometry (assumed to be in last column) | |
kmlFeat.SetField(field, attributes[field]) | |
kmlFeat.SetGeometry(ogr.CreateGeometryFromGML(attributes['kmlgeometry'])) | |
kmlLayer.CreateFeature(kmlFeat) | |
rows += 1 | |
if rows % flushCount == 0: | |
csvfile.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
# clean up | |
del csvwriter,shpLayer,shpDs,kmlLayer,kmlDs | |
csvfile.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the TAB-delimited file to CSV file method | |
# ------------------------------------------------------------------------- | |
def tab2csv(self, tabFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter='\t', csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("TAB to CSV file conversion...") | |
print("-----------------------------") | |
print("tabFile '%s'" % tabFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
tabFile = os.path.expanduser(tabFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that TAB file exists | |
if not os.path.exists(tabFile): | |
print ("TAB file '%s' does NOT exist!" % tabFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tabFile).lower().replace('.tab','.csv').replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
fReader = open(tabFile,'r') | |
csvreader=csv.DictReader(fReader, delimiter=tabDelimiter) | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames) | |
csvwriter.writeheader() | |
bgnTime = time.time() | |
rows = 0 | |
cols = dict() | |
for rowDict in csvreader: | |
cols.clear() | |
for colName in csvreader.fieldnames: | |
cols[colName] = ' '.join(rowDict[colName].strip().split()) | |
csvwriter.writerow(cols) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
fReader.close() | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the TXT file to CSV file method | |
# ------------------------------------------------------------------------- | |
def txt2csv(self, txtFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter=',', csvDelimiter=','): | |
print("") | |
print("=============================") | |
print("TXT to CSV file conversion...") | |
print("-----------------------------") | |
print("txtFile '%s'" % txtFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
txtFile = os.path.expanduser(txtFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that TXT file exists | |
if not os.path.exists(txtFile): | |
print ("TXT file '%s' does NOT exist!" % txtFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(txtFile).lower().replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
fReader = open(txtFile,'r') | |
csvreader=csv.DictReader(fReader, delimiter=txtDelimiter) | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.DictWriter(fWriter, delimiter=csvDelimiter, fieldnames=csvreader.fieldnames) | |
csvwriter.writeheader() | |
bgnTime = time.time() | |
rows = 0 | |
cols = dict() | |
for rowDict in csvreader: | |
cols.clear() | |
for colName in csvreader.fieldnames: | |
try: | |
cols[colName] = ' '.join(rowDict[colName].strip().split()) | |
except AttributeError as e: | |
cols[colName] = '' | |
print (e) | |
csvwriter.writerow(cols) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
#clean up | |
fReader.close() | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the XLS to CSV file method | |
# ------------------------------------------------------------------------- | |
def xls2csv(self, xlsFile, tgtPath, csvFile, maxRecords, flushCount): | |
print("") | |
print("=============================") | |
print("XLS to CSV file conversion...") | |
print("-----------------------------") | |
print("xlsFile '%s'" % xlsFile) | |
print("csvFile '%s'" % csvFile) | |
# expand any leading tilde | |
# to the user's home path | |
xlsFile = os.path.expanduser(xlsFile) | |
csvFile = os.path.expanduser(csvFile) | |
# verify that TAB file exists | |
if not os.path.exists(xlsFile): | |
print ("XLS file '%s' does NOT exist!" % xlsFile) | |
return | |
# make sure the CSV target folder exists, | |
# creating it recursively if it does not | |
if csvFile != "": | |
tgtFldr = os.path.dirname(csvFile) | |
if tgtFldr == "": | |
tgtFldr = tgtPath | |
if not os.path.exists(tgtFldr): | |
os.makedirs(tgtFldr) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(xlsFile).lower().replace('.xls','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
xlsWB = xlrd.open_workbook(xlsFile) | |
xlsSH = xlsWB.sheet_by_index(0) | |
nbrRows = xlsSH.nrows | |
fWriter = open(csvFile,'w',newline='') | |
csvwriter=csv.writer(fWriter, delimiter=',') | |
bgnTime = time.time() | |
rows = 0 | |
values = [] | |
for rowNbr in range(0, nbrRows): | |
values.clear() | |
for entry in xlsSH.row_values(rowNbr): | |
values.append(str(entry).strip()) | |
try: | |
csvwriter.writerow(values) | |
except UnicodeEncodeError as e: | |
print (e) | |
rows += 1 | |
if rows % flushCount == 0: | |
fWriter.flush() | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
if maxRecords > 0 and rows >= maxRecords: | |
break | |
fWriter.close() | |
print ("-----------------------------------") | |
endTime = time.time() | |
rcdsPerSec = rows / (endTime - bgnTime) | |
print ("Rows: {:,} @ {:,.0f} records/second".format(rows, rcdsPerSec)) | |
print ("") | |
return | |
# ------------------------------------------------------------------------- | |
# define the unZIP a file to a target folder method | |
# ------------------------------------------------------------------------- | |
def zip2dir(self, zipFile, tgtPath): | |
print("") | |
print("=================================") | |
print("ZIP to TGT directory expansion...") | |
print("---------------------------------") | |
print("zipFile '%s'" % zipFile) | |
print("tgtPath '%s'" % tgtPath) | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
# create the target folder | |
# recursively if not extant | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# extract all of the files/folders | |
# within the zip file to the target folder | |
with zipfile.ZipFile(zipFile, "r") as z: | |
z.extractall(tgtPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a TXT file and then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2csv2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, srcDelimiter=',', csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".csv"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a CSV file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("srcFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower()) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the SRC file | |
self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, srcDelimiter, csvDelimiter) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a DBF file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2dbf2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".dbf"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("dbfFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.dbf','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the DBF file | |
self.dbf2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, csvDelimiter) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a DBF file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a MDB file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2mdb2csv(self, zipFile, tgtPath, csvFile, tblName, maxRecords, flushCount, unzPath, csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".mdb") or aFile.lower().endswith(".accdb"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("mdbFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.mdb','.csv').replace('.accdb','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV files from the TAB file | |
self.mdb2csv(tmpFile, tgtPath, csvFile, tblName, maxRecords, flushCount, csvDelimiter) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a MDB or ACCDB file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a SHP file, then to CSV files method | |
# ------------------------------------------------------------------------- | |
def zip2shp2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".shp"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a SHP file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("shpFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the SHP file | |
self.shp2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a SHP file, then to KML and CSV files method | |
# ------------------------------------------------------------------------- | |
def zip2shp2kmlcsv(self, zipFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount, unzPath): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".shp"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a SHP file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("shpFile = %s" % tmpFile) | |
if kmlFile == "": | |
kmlFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.kml')) | |
if os.path.dirname(kmlFile) == "": | |
kmlFile = os.path.join(tgtPath, kmlFile) | |
print ("kmlFile = %s" % kmlFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.shp','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the KML and CSV files from the SHP file | |
self.shp2kmlcsv(tmpFile, tgtPath, kmlFile, csvFile, maxRecords, flushCount) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a TAB file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2tab2csv(self, zipFile, tgtPath, tabFile, csvFile, maxRecords, flushCount, unzPath, tabDelimiter='\t', csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".tab") or aFile.lower().endswith(".txt"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("tabFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.tab','.csv').replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV files from the TAB file | |
self.tab2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, tabDelimiter, csvDelimiter) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a TAB or TXT file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# define the ZIP file to a TXT file and then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2txt2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath, txtDelimiter=',', csvDelimiter=','): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the SHP file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".txt"): | |
tmpFile = os.path.join(unzPath, aFile) | |
break | |
# verify that a SHP file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a TXT file!" % zipFile) | |
return | |
else: | |
print ("zipFile = %s" % zipFile) | |
print ("txtFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.txt','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV file from the TXT file | |
self.txt2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount, txtDelimiter, csvDelimiter) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return | |
# ------------------------------------------------------------------------- | |
# defin the ZIP file to a XLS file, then to a CSV file method | |
# ------------------------------------------------------------------------- | |
def zip2xls2csv(self, zipFile, tgtPath, csvFile, maxRecords, flushCount, unzPath): | |
# expand any leading tilde | |
# to the user's home path | |
zipFile = os.path.expanduser(zipFile) | |
tgtPath = os.path.expanduser(tgtPath) | |
unzPath = os.path.expanduser(unzPath) | |
# verify that ZIP file exists | |
if not os.path.exists(zipFile): | |
print ("ZIP file '%s' does NOT exist!" % zipFile) | |
return | |
rmvFldr = False | |
# if no target folder specified, | |
if tgtPath == "": | |
# output an error message | |
print ("Target folder was NOT specified!") | |
else: | |
print ("Target folder '%s'was specified." % tgtPath) | |
# make sure the target folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(tgtPath): | |
os.makedirs(tgtPath) | |
# if UNZIP folder was NOT specifed, | |
# generate a temporary UNZIP folder | |
# to be removed after processing | |
if unzPath == "": | |
unzPath = tempfile.mkdtemp() | |
rmvFldr = True | |
print ("Unzip folder '%s' was generated." % unzPath) | |
else: | |
print ("Unzip folder '%s' was specified." % unzPath) | |
# make sure the UNZIP folder exists, | |
# creating it recursively if it does not | |
if not os.path.exists(unzPath): | |
os.makedirs(unzPath) | |
# unzip the ZIP file | |
# to the UNZIP folder | |
self.zip2dir(zipFile, unzPath) | |
# find the TAB or TXT file in | |
# the specified UNZIP folder | |
tmpFile = "" | |
for aFile in os.listdir(unzPath): | |
if aFile.lower().endswith(".xls") or aFile.lower().endswith(".xlsx"): | |
tmpFile = os.path.join(unzPath, aFile) | |
print ("zipFile = %s" % zipFile) | |
print ("xlsFile = %s" % tmpFile) | |
if csvFile == "": | |
csvFile = os.path.join(tgtPath, os.path.basename(tmpFile).lower().replace('.xls','.csv').replace('.xlsx','.csv')) | |
if os.path.dirname(csvFile) == "": | |
csvFile = os.path.join(tgtPath, csvFile) | |
print ("csvFile = %s" % csvFile) | |
# create the CSV files from the XLS file | |
self.xls2csv(tmpFile, tgtPath, csvFile, maxRecords, flushCount) | |
# verify that a TAB or TXT file was found | |
if tmpFile.strip() == "": | |
print ("Zip file '%s' did NOT contain a XLS file!" % zipFile) | |
# if UNZIP folder removal specified, | |
# remove the UNZIP folder in question | |
if rmvFldr: | |
shutil.rmtree(unzPath) | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment