forked from zygmuntz/time-series-classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_and_evaluate.py
54 lines (38 loc) · 1.44 KB
/
train_and_evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
"train a binary classifier on extracted features, predict, evaluate"
import pandas as pd
from pprint import pprint
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression as LR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.metrics import roc_auc_score as AUC, accuracy_score as accuracy
#
from config import train_file, test_file
train = pd.read_csv( train_file )
test = pd.read_csv( test_file )
x_train = train.drop( 'y', axis = 1 ).values
y_train = train.y.values
x_test = test.drop( 'y', axis = 1 ).values
y_test = test.y.values
classifiers = [
#LR( C = 10 ),
#LR( C = 1 ),
#LR( C = 0.1 ),
make_pipeline( StandardScaler(), LR()),
#make_pipeline( StandardScaler(), LR( C = 10 )),
#make_pipeline( StandardScaler(), LR( C = 30 )),
make_pipeline( MinMaxScaler(), LR()),
#make_pipeline( MinMaxScaler(), LR( C = 10 )),
#make_pipeline( MinMaxScaler(), LR( C = 30 )),
#LDA(),
RF( n_estimators = 100, min_samples_leaf = 5 )
]
for clf in classifiers:
clf.fit( x_train, y_train )
p = clf.predict_proba( x_test )[:,1]
p_bin = clf.predict( x_test )
auc = AUC( y_test, p )
acc = accuracy( y_test, p_bin )
print( "AUC: {:.2%}, accuracy: {:.2%} \n\n{}\n\n".format( auc, acc, clf ))