This repository has been archived by the owner on Jun 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathml.py
86 lines (69 loc) · 2.02 KB
/
ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
import pandas as pd
df = pd.read_csv('./final_dataset.csv')
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import pickle
# Features
gender = list(df['gender'])
for i in range(len(gender)):
if gender[i] == 'M':
gender[i] = 0
else:
gender[i] = 1
# Male - 0 , Female - 1
school_level = list(df['StageID'])
for i in range(len(school_level)):
if school_level[i] == "lowerlevel":
school_level[i] = 0
elif school_level[i] == "MiddleSchool":
school_level[i] = 1
else:
school_level[i] = 2
# Class 0-4 - 0 , 5-8 - 1, 9-12 - 2
doubts_aksed = list(df['raisedhands'])
discussion = list(df['Discussion'])
parent = list(df['ParentschoolSatisfaction'])
for i in range(len(parent)):
if parent[i] == 'Good':
parent[i] = 1
else:
parent[i] = 0
# Good - 1, Bad - 0
absent = list(df['StudentAbsenceDays'])
for i in range(len(absent)):
if absent[i] == 'Under-7':
absent[i] = 0
else:
absent[i] = 1
# Absent More Than 10% - 1, Less Than 10% - 0
result = list(df['Class'])
for i in range(len(result)):
if result[i] == 'L':
result[i] = 0
elif result[i] == 'M':
result[i] = 1
else:
result[i] = 2
# Results 0,1,2
labels = result
features = list()
for i in range(len(doubts_aksed)):
features.append([school_level[i],doubts_aksed[i],discussion[i],parent[i],absent[i]])
# Fraction Of Training Data
fac = 0.8
features_train = features[:int(fac*len(features))]
features_test = features[int(fac*len(features)):]
labels_train = labels[:int(fac*len(labels))]
labels_test = labels[int(fac*len(labels)):]
sc = StandardScaler()
sc.fit(features_train)
features_train_std = sc.transform(features_train)
features_test_std = sc.transform(features_test)
svm = SVC(kernel='linear', C=2.0, random_state=0)
svm.fit(features_train_std, labels_train)
pickle.dump(svm,open('trained_model.sav','wb'))
loaded_model = pickle.load(open('trained_model.sav','rb'))
labels_pred = loaded_model.predict(features_test_std)
print('Misclassified samples: %d' % (labels_test != labels_pred).sum())
print(len(features_test))