-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmerge.py
37 lines (32 loc) · 1.36 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
import numpy as np
from matplotlib.pyplot import plot, show
ans1 = pd.read_csv('temp/aqi_xgb.csv') # .75
ans2 = pd.read_csv('temp/aqi_nn.csv') # .79
ans3 = pd.read_csv('temp/aqi_lr.csv') # .80
ans4 = pd.read_csv('temp/iprc.csv') #
ans = ans4.copy()
ans['AQI'] = ans1['AQI'] * 0.4 + ans2['AQI'] * 0.4 + ans3['AQI'] * 0.2
ans.loc[ans['AQI'] > 300, 'IPRC'] = ans.loc[ans['AQI'] > 300, 'IPRC'] + 0.002
########################################################################
train = pd.read_csv('temp/train.csv')
test = pd.read_csv('temp/test.csv')
feats = ['PM2_5', 'PM10', 'SO2', 'NO2', 'CO', 'O3',
'AQI_SO2', 'AQI_CO', 'AQI_NO2', 'AQI_O3', 'AQI_PM2_5', 'AQI_PM10', 'AQI_max_1', 'AQI_max_2']
c = []
for i in test.index:
c.append(np.corrcoef(train.loc[487, feats].values.astype('float'), test.loc[i, feats].values.astype('float'))[0, 1])
test['cor'] = c
print(test.sort_values('cor', ascending=False)[:5][['date', 'cor']])
# 138,139,153,196,154
idxs = test.sort_values('cor', ascending=False).index[:5]
c = [15, 8, -4, 4, -4]
for i in range(5):
ans.loc[idxs[i], 'AQI'] = ans.loc[idxs[i], 'AQI'] + c[i]
########################################################################
# plot(ans['IPRC'], '-x')
# plot(ans['IPRC'], '-x')
# show()
# ans['IPRC'] = 0
# ans['AQI'] = 0
ans.to_csv('ans/sub.csv', index=False)