forked from morarez/Pandemic-Insights
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
34 lines (29 loc) · 1.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import data_cleaning as dc
from sklearn.preprocessing import MinMaxScaler
import data_smoothing as ds
import pandas as pd
from plot_distance import plot_distance
from tslearn.metrics import dtw
import numpy as np
data = dc.cleaning()
daily_cases = pd.DataFrame(data)
daily_cases_trans = daily_cases ** 0.5
daily_cases_norm = MinMaxScaler().fit_transform(daily_cases_trans.values)
daily_cases_smoothed = pd.DataFrame(ds.smoothing(daily_cases_norm))
daily_cases_smoothed.columns = daily_cases.columns
# plotting different countries patterns and their dtw score
# dtw_score = dtw(daily_cases_smoothed["Italy"], daily_cases_smoothed["China"])
# x = np.linspace(1, 300, 300)
# plt.plot(x, daily_cases_smoothed["China"], label="China")
# plt.plot(x, daily_cases_smoothed["Italy"], color='red', label="Italy")
# plt.legend()
# dtw_str = "DTW Score = " + str(dtw_score)
# plt.title(dtw_str)
# plt.show()
# calculate the distance matrix
num_countries = len(daily_cases_smoothed.columns)
distance_matrix = np.zeros((num_countries, num_countries))
for index_i, i in enumerate(daily_cases_smoothed.values.T):
for index_j, j in enumerate(daily_cases_smoothed.values.T):
distance_matrix[index_i, index_j] = dtw(i, j)
plot_distance(distance_matrix, daily_cases.columns)