-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFRIQ_get_best_action.m
144 lines (117 loc) · 5.09 KB
/
FRIQ_get_best_action.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
function [ action ] = FRIQ_get_best_action(state, actionset)
%FRIQ_get_best_action: return the proposed best action for the current state
%
% FRIQ-learning framework v0.70
% https://github.com/szaguldo-kamaz/
%
% Author: David Vincze <david.vincze@uni-miskolc.hu>
% Copyright (c) 2013-2022 by David Vincze
%
global U VE R numofactions
global measure_rb_usage_state Rusage
actionconclusions = zeros(1,numofactions);
% calculate distances from rules for the 'state' antecedents excluding 'action'
[numofrules, rulewidth] = size(R);
Ustates = U(1:rulewidth - 2, :);
VEstates = VE(1:rulewidth - 2, :);
[nU, mU] = size(Ustates);
vagdist_states = zeros(numofrules, rulewidth - 2);
dmlen=rulewidth-1;
dm=zeros(1, dmlen);
% This is from FIVERuleDist!
% current state distance from each rule
Ronlystates=R(:, 1:rulewidth - 2);
for i = 1:numofrules
vagdist_states(i, :) = FIVEVagDist_fixres(Ustates, nU, mU, VEstates, Ronlystates(i, :), state);
end
% now calculate distance for possible actions:
% for 'action' values no alignment needed, because they must hit exactly in the action dimension
VEk = VE(rulewidth - 1, :); % VE of action dimension
Uk = U(rulewidth - 1, :); % U of action dimension
[~, Uk_n] = size(Uk);
Uk_size = Uk_n - 1;
Uk_domain = Uk(Uk_n) - Uk(1);
% This is from FIVEVagDist_FixRes !
for actno = 1:numofactions
FRIQ_check_universes('actionconclusions', state, actionset(actno));
P2 = actionset(actno); % action point value
% This is from FIVEVagDist
if isnan(P2)% not a valid scaled distance (D=0)
Da = 0; % indifferent (not existing) rule antecedent
else % valid scaled distance
if (P2 < Uk(1)) || (P2 > Uk(Uk_n))
error('The points are out of range!');
end
tempP2 = (P2 - Uk(1)) / Uk_domain;
if P2 ~= Uk(Uk_n)
where = floor(tempP2 * Uk_size) + 1;
if where > 0
if abs(Uk(where) - P2) <= abs(Uk(where + 1) - P2)
j = where;
else
j = where + 1;
end
else
j = 1;
end
else
j = Uk_size + 1;
end
end
RD = zeros(1,numofrules);
for curruleno = 1:numofrules % Check all rules
P1 = R(curruleno, rulewidth - 1); % action dimension from rulebase
% This is from FIVEVagDist
if isnan(P1)% not a valid scaled distance (D=0)
Da = 0; % indifferent (not existing) rule antecedent
else % valid scaled distance
if (P1 < Uk(1)) || (P1 > Uk(Uk_n))
error('The points are out of range!');
end
tempP1 = (P1 - Uk(1)) / Uk_domain;
if P1 ~= Uk(Uk_n)
where = floor(tempP1 * Uk_size) + 1;
if where > 0
if abs(Uk(where) - P1) <= abs(Uk(where + 1) - P1)
i = where;
else
i = where + 1;
end
else
i = 1;
end
else
i = Uk_size + 1;
end
Da = abs(VEk(j) - VEk(i)); % the scaled distance of P1 and P2 (P2>P1)
end
% dm = [vagdist_states(curruleno, :) Da];
% This is much faster:
for dmindex = 1:rulewidth-2
dm(dmindex)=vagdist_states(curruleno,dmindex);
end
dm(dmlen)=Da;
%- This is from FIVERuleDist!
if min(dm) < 0 % there are inf elements (denoted by <0 value)
dm = dm(dm < 0); % vector of elements <0 from dm
% RD(curruleno)=-sqrt(sum(dm.^2)); % RD<0 denotes that RD=inf, norm is faster
RD(curruleno) = -norm(dm, 2); % RD<0 denotes that RD=inf
else % there are no inf elements
% Euclidean distance of the observation from the rule antecedents
RD(curruleno) = norm(dm, 2);
% RD(curruleno)=sqrt(sum(dm.^2)); % norm is faster
end
end
actionconclusions(actno) = FIVEVagConcl_FRIQ_bestact(U, VE, R, RD);
% measure usage - collect weights/distances - accumulate contribution of each rule
if measure_rb_usage_state == 1
weights=FIVEVagConclWeight_fixres(U, VE, R, [state, actionset(actno)]);
Rusage=Rusage+weights';
end
end
[~, action] = max(actionconclusions);
% TODO: think:
%- calculate distance once instead of 'numofactions'
%- for 'action' values no alignment needed, because they must exactly hit in the action dimension
%- what if it is missing? - no problem because it is aligned to the universe not to existing rules
%- ?