-
Notifications
You must be signed in to change notification settings - Fork 0
/
simple_perceptron.py
93 lines (86 loc) · 2.77 KB
/
simple_perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import math
import random
import operator
import numpy as np
class Perceptron:
'''
Implements the Perceptron Learning Algorithm
fields:
int dim Dimensionality of the data
List weights Array (dim+1 x 1) of the weights
List data Array (N x 1) of tuples (x, y) composed of vectors x and results y=f(x)
int iterations Number of iterations of PLA undergone
'''
def __init__(self, dim, data = []):
self.dim = dim
self.reset(data)
def reset(self, data, weights = [0]):
'''
Reset weights and iterations and feed a data sample
'''
if np.all(weights == [0]):
self.weights = [0.0] * (self.dim+1)
elif len(weights)!=(self.dim+1):
raise ValueError('Wrong initial weights dimensionality')
else:
self.weights = weights
for t in data:
if len(t[0])!=self.dim:
raise ValueError('Wrong data dimensionality')
elif t[1]!=1 and t[1]!=-1:
raise ValueError('Function output is not binary')
self.data = data
self.iterations = 0
def hypothesis(self, x):
'''
Takes d-dimensional data vector x and computes h(x)
using the current weights
'''
x_adj = [1.0] + x #adjusted to include 1 at the start
weighted_sum = sum(map(operator.mul, self.weights, x_adj)) #dot product of w and x
if weighted_sum==0.0:
return 0.0
else:
return math.copysign(1.0, weighted_sum) #sign function
def classify(self, point):
'''
Takes as "point" a tuple (x, y) with x a vector and y=f(x)
and classifies it, returning True if h(x)=f(x) and False if not
'''
h = self.hypothesis(point[0])
return h == point[1]
def train(self):
'''
Trains the perceptron with the data using the PLA
'''
misclass = True
#iterate until there is no more misclassification
while(misclass):
#obtain a set of misclassified points
misclass_points = [] #array of indexes of misclassified points in data
for point in self.data:
if not self.classify(point):
misclass_points.append(self.data.index(point))
if len(misclass_points)!=0:
#choose the misclassified point at random
p = self.data[random.choice(misclass_points)]
x_adj = [1.0] + p[0]
# w <- w + yx where (x,y) is a misclassified point
x_sign = [p[1]*xi for xi in x_adj]
self.weights = [self.weights[i] + x_sign[i] for i in range(len(x_sign))]
#increment number of iterations
self.iterations += 1
else:
misclass=False
def f_disagreement(self, new_data):
'''
When given a sufficiently big new dataset new_data with the same format of self.data,
returns the disagreement fraction between the trained function g and the original f
P[f(x) != g(x)]
'''
g_misclass_points = 0 #counter of newdata points misclassified by g
for point in new_data:
if not self.classify(point):
g_misclass_points += 1
#return the fraction of P
return g_misclass_points / len(new_data)