-
Notifications
You must be signed in to change notification settings - Fork 0
/
support_vector_machine.py
112 lines (98 loc) · 3.59 KB
/
support_vector_machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy as np
import math
import cvxopt
cvxopt.solvers.options['show_progress'] = False
class SVM:
'''
Implements a Support Vector Machine with hard margin on linearly separable
datasets for classification problems
fields:
int dim Dimensionality of the data
ndarray weights Array (dim+1 x 1) of the weights
List data Array (N x 1) of tuples (x, y) composed of vectors x and results y=f(x)
List suppvectors Support vectors of the solution
'''
def __init__(self, dim, data = []):
self.dim = dim
self.reset(data)
def reset(self, data):
'''
Reset weights and feed a data sample
'''
self.weights = np.zeros(self.dim+1)
for t in data:
if len(t[0])!=self.dim:
raise ValueError('Wrong data dimensionality')
self.data = data
self.suppvectors = []
def solve(self):
'''
Computes the weights and support vectors with quadratic programming
'''
P = []
for pointrow in self.data:
row = []
for pointcol in self.data:
row.append(pointrow[1]*pointcol[1]*np.dot(pointrow[0], pointcol[0]))
P.append(row)
P = cvxopt.matrix(np.array(P))
q = cvxopt.matrix(np.repeat(-1., len(self.data)))
G = []
row = []
nrow = []
for point in self.data:
row.append(point[1])
nrow.append(-point[1])
G.append(row)
G.append(nrow)
G = cvxopt.matrix(np.concatenate((G, np.negative(np.identity(len(self.data)))), axis=0))
h = cvxopt.matrix(np.zeros(len(self.data)+2))
sol = cvxopt.solvers.qp(P, q, G, h)
alpha = sol['x']
supp = []
for i in range(len(self.data)):
if alpha[i] < 1/(len(self.data)**2):
alpha[i] = 0
else:
supp.append(self.data[i])
self.suppvectors = supp
w = np.zeros(self.dim)
for i in range(len(self.data)):
w += alpha[i]*self.data[i][1]*np.array(self.data[i][0])
maxsv = self.data[0]
if (len(self.suppvectors)>0):
maxsv = self.suppvectors[0]
for sv in self.suppvectors:
if np.linalg.norm(sv[0]) > np.linalg.norm(maxsv[0]):
maxsv = sv
b = maxsv[1] - np.dot(w, maxsv[0])
self.weights = np.append([b], w, axis=0)
def hypothesis(self, x):
'''
Takes d-dimensional data vector x and computes h(x)
using the current weights
'''
x_adj = [1.0] + x #adjusted to include 1 at the start
weighted_sum = np.dot(self.weights, x_adj) #dot product of w and x
if weighted_sum==0.0:
return 0.0
else:
return math.copysign(1.0, weighted_sum) #sign function
def classify(self, point):
'''
Takes as "point" a tuple (x, y) with x a vector and y=f(x)
and classifies it, returning True if h(x)=f(x) and False if not
'''
h = self.hypothesis(point[0])
return h == point[1]
def classification_error(self, data):
'''
Computes the in-sample error Ein if given self.data as data,
computes the out-of-sample error Eout if given a dataset generated by f(x) as data
'''
g_misclass_points = 0 #counter of newdata points misclassified by g
for point in data:
if not self.classify(point):
g_misclass_points += 1
#return the fraction of P
return g_misclass_points / len(data)