-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathactiveknn.py
117 lines (104 loc) · 2.2 KB
/
activeknn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def knnclassifier(posknn,negknn,token,k):
tokenl=len(token)
#print negknn
checkp = False
checkn = False
prob=[0.0,0.0]
posprob=[]
negprob=[]
for rev in posknn:
posl = len(rev)
freq=0.0
for t in token:
if(t in rev):
#print t," ",r
checkp = True
#print t," ",r
freq +=1
if (posl>0 and tokenl>0):
freq=freq/(posl*tokenl*1.0)
posprob.append(freq)
for rev in negknn:
negl = len(rev)
freq=0.0
for t in token:
if(t in rev):
#print t," ",r
checkn = True
#print t," ",r
freq +=1
#freq=freq/(negl*tokenl)
# negprob.append(freq)
if (negl>0 and tokenl>0):
freq=freq/(negl*tokenl*1.0)
negprob.append(freq)
#print checkp," ",checkn
posprob=sorted([w for w in posprob],reverse=True)
negprob=sorted([w for w in negprob],reverse=True)
#print posprob
#print negprob
#print checkn," ",checkp
posindex=0
negindex=0
#print checkp,checkn
if(checkp==1 or checkn==1):
i=0
while(i<k):
if(posprob[posindex]>negprob[negindex]):
prob[1]=prob[1]+posprob[posindex]*1.0
posindex +=1
else:
prob[0]=prob[0]+negprob[i]*1.0
negindex+=1
i+=1
prob.append(1)
else:
prob.append(0)
return prob
def insert(l,number):
i = len(l)-1
j=0
if(number>l[0]):
while(number>l[j] and j<=i-1):
l[j] = l[j+1]
j+=1
l[j] = number
def knnclassifier2(posknn,negknn,tokens,k):
sim1 = []
sim2 = []
if(len(tokens)==0):
return -1
for i in range(0,k):
sim1.append(0.0)
sim2.append(0.0)
for rev in posknn:
if(len(rev)==0):
continue
freq=0
for w in tokens:
if w in rev:
freq+=1
similarity = (freq*1.0)/(len(rev)*len(tokens))
if(freq>0):
insert(sim1,similarity)
for rev in negknn:
if(len(rev)==0):
continue
freq=0
for w in tokens:
if w in rev:
freq+=1
similarity = (freq*1.0)/(len(rev)*len(tokens))
if(freq>0):
insert(sim2,similarity)
prob1 = 0.0
prob2 = 0.0
for i in range(0,k):
prob1+=sim1[i]
prob2+=sim2[i]
if(prob1>prob2):
return 1
elif(prob2>prob1):
return 0
else:
return -1