-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgeocode.py
executable file
·123 lines (96 loc) · 4.03 KB
/
geocode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
from __future__ import print_function
from time import sleep
import csv
import argparse
# from geopy.geocoders import Nominatim
from geopy.geocoders import GoogleV3
"""
This program takes an 'input' csv file and geocodes the lat/long columns
(if empty), and outputs the results to an 'output' file
"""
LAT_COLUMN = 'LOCATION_LATITUDE'
LON_COLUMN = 'LOCATION_LONGITUDE'
ADDRESS_COLUMN = 'Address / Location name'
# Configure whether we want to insert an extra column after the longitude
# column that will contain the results of the geocode
INCLUDE_GEOCODE_RESULTS_COLUMN = True
GEOCODE_RESULTS = 'GEOCODE_RESULTS'
__author__ = "Luke Swart"
def run():
# Define our CLI arguments
# example usage: `python etl.py -m rain raingardensTest.csv test.csv`
parser = argparse.ArgumentParser(description="Manage various etl tasks")
parser.add_argument('input', type=argparse.FileType('r'),
help='input csv file')
parser.add_argument('output', type=argparse.FileType('w'),
help='output csv file')
parser.add_argument('-m', '--method', dest="method",
default="geocode",
help="Defines which method will process the file")
args = parser.parse_args()
print(args)
if args.method == "geocode":
print("input:", args.input)
print("output:", args.output)
with args.input as readFile, args.output as writeFile:
geocode(readFile, writeFile)
print("geocoded csv from", args.input, "into", args.output)
def geocode(readFile, writeFile):
reader = csv.DictReader(readFile)
# Uncomment this for testing in interactive mode
# code.interact(local=locals())
fieldnames = reader.fieldnames
# Add
if INCLUDE_GEOCODE_RESULTS_COLUMN:
lon_index = fieldnames.index(LON_COLUMN)
fieldnames.insert(lon_index + 1, 'GEOCODE_RESULTS')
writer = csv.DictWriter(writeFile, fieldnames=fieldnames)
writer.writeheader()
# geolocator = Nominatim()
geolocator = GoogleV3()
for row in reader:
# if lat/lon not defined, then fill it in with the geocoding
# Uncomment this for testing in interactive mode
# code.interact(local=locals())
lat = row[LAT_COLUMN]
lon = row[LON_COLUMN]
# Skip empty rows:
# if not [True for v in row.values() if v.strip()]:
# print("Empty row:", row, ", skipping and continuing...")
# continue
if lat == '' or lon == '':
# print("replace lat/lon for row :", row)
address = row[ADDRESS_COLUMN]
# zipcode = row['Zip Code']
# city = row['City']
full_address = [address, 'WA']
address_list = [x for x in full_address if (x and x != 'NULL')]
final_address = ', '.join(address_list)
location = geolocator.geocode(final_address)
print("geocoded location:", location)
if INCLUDE_GEOCODE_RESULTS_COLUMN:
row['GEOCODE_RESULTS'] = location
sleep(5)
# Use this if we want to generalize our addresses further:
# location = None
# snip off least-significant location until we get a location
# while not location:
# garden_address = ', '.join(address_list)
# print("address_list:", address_list)
# location = geolocator.geocode(garden_address)
# sleep(5)
# address_list = address_list[1:]
# if len(address_list) == 0:
# raise Exception("\n\n\nNO LOCATION FOUND FOR ADDRESS:",
# full_address, "at row:", row, "\n\n\n")
# print("full_address:", full_address)
row[LAT_COLUMN] = str(location.latitude)
row[LON_COLUMN] = str(location.longitude)
elif INCLUDE_GEOCODE_RESULTS_COLUMN:
row[GEOCODE_RESULTS] = ''
writer.writerow(row)
def main():
run()
if __name__ == '__main__':
main()