-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate-sub.py
executable file
·155 lines (138 loc) · 4.73 KB
/
calculate-sub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import pandas as pd
import numpy as np
import os
import glob
# question:
# 1. for not completed year, how to calculate EUI
## ## ## ## ## ## ## ## ## ## ##
## logging and debugging logger.info settings
import logging
import sys
logger = logging.Logger('reading')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())
def check_unit_type(df):
logger.debug('Gas consumption unit types')
logger.debug(df.groupby('gas_unit').size())
logger.debug('Electric - Grid consumption unit types')
logger.debug(df.groupby('elec_unit').size())
def check_unit_convert(df):
grouped_unit = df.groupby('gas_unit')
for name, group in grouped_unit:
print name
# iloc: integer position based (from 0 to length-1 of the axis)
# because the index of the entries in the group is the same as original
print group.iloc[:10, [14, 15, 25]]
def check_total_amt(df):
grouped_unit = df.groupby('gas_unit')
for name, group in grouped_unit:
print name
print group.iloc[:10, [10, 11, 14, 15, 25, 26]]
def calculate():
# read customized table
df = pd.read_csv(os.getcwd() + '/csv/testmerge-euas-2.csv')
logger.debug('reading file testmerge-euas-2.csv')
# no need to fillna for empty measurement because summing will be zero
df['total_amt'] = df['elec_amt'] + df['gas_amt']
# checking added energy consumption
#check_total_amt(df)
df['Year'] = df['End Date'].map(lambda x: x[:4])
df.info()
grouped = df.groupby(['Year', 'Building ID'])
id_list = []
region_list = []
year_list=[]
eui_list = []
for name, group in grouped:
id_list.append(group.iloc[0, 2])
region_list.append(group.iloc[0, 7])
year_list.append(group.iloc[0, 25])
eui_list.append(group['total_amt'].sum()/group['GSF'].mean())
print id_list[:5]
print region_list[:5]
print year_list[:5]
print eui_list[:5]
df_eui = pd.DataFrame({'Building ID':id_list, 'Region':region_list,
'Year':year_list, 'EUI':eui_list})
df_eui.info()
logger.debug('Nan EUI counts:')
logger.debug(df_eui['EUI'].isnull().value_counts())
df_eui['EUI'].replace(np.inf, np.nan, inplace=True)
logger.debug(df_eui['EUI'].isnull().value_counts())
df_eui.dropna(inplace=True)
logger.debug(df_eui['EUI'].isnull().value_counts())
print df_eui['EUI'].value_counts()
ck.get_range(df_eui)
df_eui.to_csv(os.getcwd() + '/csv/eui-2.csv', index=False)
import matplotlib.pyplot as plt
import util_check as ck
import pylab as P
def plot():
# read eui table
df = pd.read_csv(os.getcwd() + '/csv/eui-2.csv')
logger.debug('finished reading file eui-2.csv')
'''
df.boxplot(column='EUI', by='Region')
plt.ylabel('EUI')
plt.xlabel('Region')
plt.title('EUI by Region')
P.savefig(os.getcwd() + '/plot2/EUIbyRegion.png')
plt.close()
# plot histogram
import seaborn as sns
grouped = df.groupby('Region')
for name, group in grouped:
sns.distplot(group['EUI'])
plt.xlabel('EUI')
plt.title('EUI Distribution')
P.savefig(os.getcwd() + '/plot2/Region-' + str(name) + '-EUIdistribution.png')
plt.close()
df.sort(columns='Region', inplace=True)
sns.violinplot(x = 'Region', y = 'EUI', data = df)
plt.ylabel('EUI')
plt.xlabel('Region')
plt.title('EUI by Region Violin Plot')
P.savefig(os.getcwd() + '/plot2/EUIbyRegionViolin.png')
plt.close()
# without region 11
df = df[df['Building ID'] != 'DC0001ZZ']
df.boxplot(column='EUI', by='Region')
plt.ylabel('EUI')
plt.xlabel('Region')
plt.title('EUI by Region')
P.savefig(os.getcwd() + '/plot2/EUIbyRegion-noDC0001ZZ.png')
plt.close()
sns.violinplot(x = 'Region', y = 'EUI', data = df)
plt.ylabel('EUI')
plt.xlabel('Region')
plt.title('EUI by Region Violin Plot')
P.savefig(os.getcwd() + '/plot2/EUIbyRegionViolin-noDC0001ZZ.png')
plt.close()
'''
group_yr = df.groupby(['Region', 'Year'])
df_med = group_yr.median()
#print df_med.head()
df_med = df_med.reindex()
df_med.to_csv(os.getcwd() + '/csv/eui-median.csv', index=True)
df2 = pd.read_csv(os.getcwd() + '/csv/eui-median.csv')
med = df2.groupby(['Region', 'Year'])
med.unstack(level=0)
'''
for name,group in group_r:
print
print name
group_ry = group.groupby('Year')
med = group_ry.median()
med.info()
#print med.head()
med.plot(columns = ['EUI'])
plt.title('EUI of Region {0}'.format(name))
plt.ylabel('EUI')
plt.xlabel('Year')
P.savefig(os.getcwd() + '/plot2/eui-region-{0}.png'.format(name))
#plt.plot(med['EUI'])
'''
def main():
#calculate()
plot()
main()