-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfor_shilpi.py
executable file
·109 lines (99 loc) · 3.43 KB
/
for_shilpi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import pylab as P
import matplotlib.pyplot as plt
from scipy import stats
from scipy import optimize
import pyqt_fit.nonparam_regression as smooth
from pyqt_fit import npr_methods
import calendar
import re
import textwrap as tw
from geopy.geocoders import Nominatim
# from geopy.distance import vincenty
import time
import geocoder
from vincenty import vincenty
homedir = os.getcwd() + '/csv_FY/weather/'
weatherdir = os.getcwd() + '/csv_FY/weather/'
def is_string(s):
# float, with possible trailing e-0x
pattern = re.compile('-?[0-9]{0,3}\.?[0-9]{0,20}(e-[0-9]{2})?$')
if type(s) is str:
return (not re.match(pattern, s))
else:
return False
def check_data_single(f):
filename = f[f.rfind('/') + 1:]
s = filename[:4]
outfile = (weatherdir + 'weather_nostr/{0}'.format(filename))
df = pd.read_csv(f)
col_value = df[s].tolist()
str_value = [x for x in col_value if is_string(x)]
str_set = set(str_value)
if len(str_set) > 0:
print s, list(str_set)
df.replace(list(str_set), np.nan, inplace=True)
df.to_csv(outfile, index=False)
def get_mean_temp_single(f):
filename = f[f.rfind('/') + 1:]
outfile = (weatherdir + 'station_temp/{0}'.format(filename))
print 'write mean temperature to {0}'.format(filename)
df = pd.read_csv(f)
df.drop('Timestamp', axis=1, inplace=True)
df.set_index(pd.DatetimeIndex(df['localTime']), inplace=True)
df = df.resample('M', how = 'mean').to_csv(outfile, index=True)
df2 = pd.read_csv(outfile)
df2['year'] = df2['Unnamed: 0'].map(lambda x: x[:4])
df2['month'] = df2['Unnamed: 0'].map(lambda x: int(x[5: 7]))
df2.rename(columns={'Unnamed: 0': 'timestamp'}, inplace=True)
cols = list(df2)
cols.remove('year')
cols.remove('month')
cols.insert(1, 'month')
cols.insert(1, 'year')
df2 = df2[cols]
df2.to_csv(outfile, index=False)
def get_DD_itg_single(f, baselist, theme):
starttime = time.time()
df = pd.read_csv(f)
filename = f[f.rfind('/') + 1:]
s = filename[:4]
df.drop('Timestamp', axis=1, inplace=True)
df.set_index(pd.DatetimeIndex(df['localTime']), inplace=True)
df_hour = df.resample('H', how = 'mean')
for base in baselist:
base_hd = '{0}F'.format(base)
if theme == 'HDD':
df_hour[base_hd] = df_hour[s].map(lambda x: 0 if x >= base else base - x)
else:
df_hour[base_hd] = df_hour[s].map(lambda x: 0 if x <= base else x - base)
df_day = df_hour.resample('D', how = 'mean')
df_month = df_day.resample('M', how = 'sum').to_csv(weatherdir + 'station_dd/{0}_{1}.csv'.format(theme, s), index=True)
print 'calculating {0} for {1} in {2} s'.format(theme, s, round(time.time() - starttime, 2))
def process_single_stations():
filelist = glob.glob(weatherdir + 'weatherinput/by_station/*.csv')
length = len(filelist)
for i in range(length):
f = filelist[i]
print i
check_data_single(f)
def process_weatherfile():
# process_single_stations()
filelist = glob.glob(weatherdir + 'weather_nostr/*.csv')
length = len(filelist)
baselist = range(40, 81)
for i in range(length):
f = filelist[i]
print i
get_mean_temp_single(f)
get_DD_itg_single(f, baselist, 'HDD')
get_DD_itg_single(f, baselist, 'CDD')
return
def main():
process_weatherfile()
return
main()