-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReasoning.py
177 lines (153 loc) · 6.33 KB
/
Reasoning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import numpy as np
import math
import pandas as pd
# @author Liang-Peng & Wang-Yijie
from enum import Enum
from os import path
'''
class Type(Enum):
DISCRETE = 'discrete'
LINEAR = 'linear'
BELL = 'bell'
'''
def findUsedTerrainEnvs(envNames, envClasses,casesEnvs,caseIds):
envNames_terrain = []
for index, envClass in enumerate(envClasses):
if envClass == "R":
for j in range(0, len(caseIds)):
if envNames[index] in casesEnvs[j]:
envNames_terrain.append(envNames[index])
break
return envNames_terrain
def case_Envtable(envNames, envClasses, x_ids, casesEnvs, caseIds):
envNames_terrain = []
for index, envClass in enumerate(envClasses):
if envClass == "R":
envNames_terrain.append(envNames[index])
y_index = len(envNames_terrain) + 1 # add a col for record the id of case
x_index = len(x_ids)
y = np.zeros([x_index, y_index])
y[:, 0] = x_ids
for x_index, x_id in enumerate(x_ids):
caseEnvs_id = np.argwhere(caseIds == x_id)
caseEnvs = casesEnvs[caseEnvs_id]
for y_index, envName in enumerate(envNames_terrain):
if envName in caseEnvs:
y[x_index][y_index + 1] = 1
return y,envNames_terrain
def calSimilarity(x_test, x_case, type, params=None):
if type == 'discrete':
return calDiscrete(x_test, x_case)
elif type == 'linear':
return calLinear(x_test, x_case, params)
elif type == 'bell':
return calBell(x_test, x_case, params)
def calBell(x_test, x_case, params):
similartiys = []
for i in range(0, len(x_case)):
if x_test != 0 and x_case[i] != 0:
similartiy = 2 ** (-(params * abs(math.log10(x_test) - math.log10(x_case[i]))) ** 0.5)
elif x_test == 0 and x_case[i] != 0:
similartiy = 2 ** (-(params * abs(x_test - math.log10(x_case[i]))) ** 0.5)
elif x_test != 0 and x_case[i] == 0:
similartiy = 2 ** (-(params * abs(math.log10(x_test) - 0)) ** 0.5)
elif x_test == 0 and x_case[i] == 0:
similartiy = 1
similartiys.append(similartiy)
return similartiys
def calLinear(x_test, x_case, valMax):
similartiys = []
for i in range(0, len(x_case)):
s = abs(x_test - x_case[i])
similartiy = 1 - s/max(valMax - x_test, x_test)
similartiys.append(similartiy)
return similartiys
def calDiscrete(x_test, x_case):
similartiys = []
for i in range(0, len(x_case)):
if (x_test - x_case[i]) == 0:
similartiy = 1
else:
similartiy = 0
similartiys.append(similartiy)
return similartiys
def generateSimilarity(newCases, caseData = None, envData = None):
if caseData == None or path.exists(caseData) == 0:
print('no database found')
return
table = pd.read_excel(caseData, sheet_name="cases")
cases = table.values[:, 2:]
caseIndicesIds = table.values[:, 0]
formtable = pd.read_excel(caseData, sheet_name="formalization")
colNames = formtable.columns.values[1:]
types = formtable.values[0,1:]
params = formtable.values[1,1:]
#计算新案例与案例库各案例相似度
distances = np.zeros((len(cases), 5))
indices = np.zeros((len(cases), 5))
for j in range(0, len(newCases)):
similaritys = np.zeros(np.shape(cases))
newCase = newCases[j]
for m, colName in enumerate(colNames):
temp = calSimilarity(newCase.get_parameter(colName), cases[:, m], types[m], params[m])
similaritys[:, m] = temp
caseSimilarity = np.min(similaritys, axis=1)
caseSimilaritySort = np.sort(-caseSimilarity)
caseSimilaritySortId = np.argsort(-caseSimilarity)
#提取第二相似案例
distances[j,:] = caseSimilaritySort[0:5:1]
indices[j, :] = caseSimilaritySortId[0:5:1]
#查找各案例的环境变量
casesTable = pd.read_excel(caseData, sheet_name="Envs")
caseEnvs = casesTable.values[:, 1:]
caseIds = casesTable.values[:, 0]
envsTable = pd.read_excel(envData, sheet_name="class")
envNames = envsTable.values[:, 0]
envClassNames = envsTable.values[:,1]
y, envNames_terrain = case_Envtable(envNames, envClassNames, caseIndicesIds, caseEnvs, caseIds)
caseSize, envSize = np.shape(y)
dic={}
#将推荐环境变量存入result
for i in range(0, len(newCases)):
recCase = {}
caseIndex = int(indices[i][0])
recCase['caseID'] = str(int(y[caseIndex][0]))
result = []
#print('most similiar case:', int(y[caseIndex][0]))
#print('similarity:', -distances[i][0])
#print('environmental covariates', end=':')
for j in range(0, envSize):
if j and y[caseIndex][j] == 1:
#print(envNames_terrain[j - 1], end=',')
result.append(envNames_terrain[j - 1])
recCase['covariates'] = result
dic['most_similiar_case'] = recCase
newcase = {}
for m, colName in enumerate(colNames):
newcase[colName] = format(newCases[0].get_parameter(colName), '.0f')
dic['case_formalization']=newcase
simicase = []
simi = -distances[i][0]
num = len(result)
for k in range(0,5):
caseIndex = int(indices[i][k])
cases = {}
if -distances[i][k] > 0:
result = []
for j in range(0, envSize):
if j and y[caseIndex][j] == 1:
result.append(envNames_terrain[j-1])
cases['rank'] = k+1
cases['caseID'] = str(int(y[caseIndex][0]))
cases['covariates'] = result
#print(-distances[i][k] == simi, -distances[i][k], simi)
cases['similarity'] = format(-distances[i][k], '.3f')
simicase.append(cases)
if -distances[i][k] == simi and len(result) > num:
recCase['caseID'] = str(int(y[caseIndex][0]))
recCase['covariates'] = result
dic['most_similiar_case'] = recCase
simi = -distances[i][k]
num = len(result)
dic['similiar_cases']=simicase
return dic