-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhyper-parameter-tuning.py
123 lines (101 loc) · 4.47 KB
/
hyper-parameter-tuning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import accuracy_score, confusion_matrix
#from IPython.display import Image
from sklearn.externals.six import StringIO
import pydotplus
import graphviz
st.title('Decision Trees - Hyper Parameter Tuning')
st.write("### Sample Data")
df = pd.read_csv('./heart_v2.csv')
st.write(df.head())
st.write("-"*60)
y = df.pop('heart disease')
X = df
# Test Train Split
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=0.7, random_state=42)
# st.write('### Shapes of Training & Test sets')
# st.write(X_train.shape, X_test.shape)
max_depth = st.sidebar.slider(
'Max Depth', min_value=1, max_value=25, step=1, value=3)
max_leaf_nodes = st.sidebar.slider(
'Max Leaves', min_value=2, max_value=100, step=1, value=100)
min_samples_split = st.sidebar.slider(
'Min Samples Before Split', min_value=2, max_value=200, step=1, value=5)
min_samples_leaf = st.sidebar.slider(
'Min Samples In Each Leaf', min_value=1, max_value=200, step=1, value=5)
criterion = st.sidebar.selectbox('Spliting Criterion', ['gini', 'entropy'])
# criterion = st.sidebar.selectbox('Spliting Criterion', ['gini', 'entropy'])
# criterion = st.sidebar.selectbox('Spliting Criterion', ['gini', 'entropy'])
@st.cache
def classify(max_depth, max_leaf_nodes=None, min_samples_split=None, min_samples_leaf=None, criterion='gini'):
dt = DecisionTreeClassifier(
max_depth=max_depth, max_leaf_nodes=max_leaf_nodes, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, criterion=criterion)
return dt.fit(X_train, y_train)
@st.cache
def get_dt_graph(dt_classifier):
dot_data = StringIO()
export_graphviz(dt_classifier, out_file=dot_data, filled=True, rounded=True,
feature_names=X.columns, class_names=['No Disease', 'Disease'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
return graph
# model evaluation helper
def evaluate_model(dt_classifier):
y_train_pred = dt_classifier.predict(X_train)
y_test_pred = dt_classifier.predict(X_test)
st.write('### Train Set Performance')
st.write('Accuracy : ', 100 *
np.round(accuracy_score(y_train, y_train_pred), 3))
st.write('#### Confusion Matrix')
confusion = confusion_matrix(y_train, y_train_pred)
st.write(confusion)
TP = confusion[1, 1] # true positive
TN = confusion[0, 0] # true negatives
FP = confusion[0, 1] # false positives
FN = confusion[1, 0] # false negatives
sensitivity = TP/(FN + TP)
specificity = TN/(FP + TN)
falsePositiveRate = FP/(FP + TN)
positivePredictivePower = TP/(TP + FP)
negativePredictivePower = TN/(TN + FN)
st.write('Sensitivity / Recall: ', round(100*sensitivity, 3), '%')
st.write('Specificity : ', round(100*specificity, 3), '%')
st.write('False Positive Rate : ', round(100*falsePositiveRate, 3), '%')
st.write('Precision / Positive Predictive Power : ',
round(100*positivePredictivePower, 3), '%')
st.write('Negative Predictive Power : ', round(
100*negativePredictivePower, 3), '%')
st.write("-"*60)
st.write('### Test Set Performance')
st.write('Accuracy : ', 100*np.round(accuracy_score(y_test, y_test_pred), 3))
st.write('#### Confusion Matrix')
confusion = confusion_matrix(y_test, y_test_pred)
st.write(confusion)
TP = confusion[1, 1] # true positive
TN = confusion[0, 0] # true negatives
FP = confusion[0, 1] # false positives
FN = confusion[1, 0] # false negatives
sensitivity = TP/(FN + TP)
specificity = TN/(FP + TN)
falsePositiveRate = FP/(FP + TN)
positivePredictivePower = TP/(TP + FP)
negativePredictivePower = TN/(TN + FN)
st.write('Sensitivity / Recall: ', round(100*sensitivity, 3), '%')
st.write('Specificity : ', round(100*specificity, 3), '%')
st.write('False Positive Rate : ', round(100*falsePositiveRate, 3), '%')
st.write('Precision / Positive Predictive Power : ',
round(100*positivePredictivePower, 3), '%')
st.write('Negative Predictive Power : ', round(
100*negativePredictivePower, 3), '%')
# st.write("-"*60)
dt = classify(max_depth, max_leaf_nodes, min_samples_split,
min_samples_leaf, criterion)
graph = get_dt_graph(dt)
st.write('### Decision Tree')
st.image(graph.create_png(), width=800)
st.write("-"*60)
evaluate_model(dt)