-
Notifications
You must be signed in to change notification settings - Fork 1
/
import.py
55 lines (43 loc) · 1.69 KB
/
import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
Workflow:
-import.py-
1. Obtain CSV of parcels from Leon County's Appraiser's website.
2. Import CSV file as dataframe.
3. Modify dataframe.
-bot.py-
4. Post streetview image of the address (and street name/number) to twitter account.
"""
#import libraries
import pandas as pd
"""
STEP 2: Importing
"""
#import csv file as dataframe
data = pd.read_csv(r'./Certified_Data.csv')[["PARID","LOCATION","ZIP","HOUSENBR","TAX_DISTRICT"]]
"""
STEP 3: Modifying
"""
#remove lots with insufficient information
df = df.dropna()
#turn zip code, tax district, and housenumber into integers
df['ZIP'] = df['ZIP'].astype(int)
df['TAX_DISTRICT'] = df['TAX_DISTRICT'].astype(int)
df['HOUSENBR'] = df['HOUSENBR'].astype(int)
#remove APT, BLDG, SUITE/STE, OFC, and UNIT from location
df["LOCATION"] = df["LOCATION"].apply(lambda x: x.split(' APT')[0].split(' BLDG')[0].split(' SUITE')[0].split(' STE')[0].split(' OFC')[0].split(' UNIT')[0])
#removes parcels outside of the city and with 0 as the house nmber
df = df.loc[(df['HOUSENBR'] != 0) & (df['TAX_DISTRICT'] != 2)]
#remove any lots with the same street name
df = df.drop_duplicates(subset='LOCATION', keep='first')
#add columns tweeted, city, and state
df['CITY'] = 'Tallahassee'
df['STATE'] = 'Florida'
df['tweeted'] = 0
#string concatenation to get parcel's full address
df['FULLADR'] = df['LOCATION'] + ", " + df['CITY'] + ", " + df['STATE'] + ", " + df['ZIP'].astype(str)
#select and rename columns
df = df[["LOCATION","FULLADR","tweeted"]].rename(columns = {'LOCATION':'street', 'FULLADR':'address'})
#shuffle list to randomize order of parcels and reindex dataframe
df = df.sample(frac=1).reset_index(drop=True)
#save dataframe as csv file
df.to_csv('./lots.csv')