-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkov.erl
74 lines (63 loc) · 2.06 KB
/
markov.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
-module(markov).
-include_lib("records.hrl").
-compile(export_all).
splitString(String) ->
Tokens = filterPrintableAscii(string:tokens(String, " ")),
[" ", " "] ++ Tokens ++ ["<<<undefined>>>"].
filterPrintableAscii(TokenList) ->
lists:map(fun(Token) ->
re:replace(Token, "[^ -~]", "", [global, {return, list}])
end, TokenList).
genTable(String) ->
TokenList = splitString(String),
genTable(TokenList, tl(TokenList), tl(tl(TokenList)), []).
genTable(_, _, [], Acc) ->
reduceTable(Acc);
genTable(L1, L2, L3, Acc) ->
Prefix = string:join([hd(L1), hd(L2)], " "),
Suffix = hd(L3),
Chain = factories:chainFactory(Prefix, Suffix),
genTable(tl(L1), tl(L2), tl(L3), Acc ++ [Chain]).
hasPrefix(Prefix, #chain{prefix=Prefix, suffix=_}) ->
true;
hasPrefix(_,_) ->
false.
hasSuffix(Suffix, #chain{prefix=_, suffix=Suffix}) ->
true;
hasSuffix(_,_) ->
false.
gatherChainsWithPrefix(Prefix, Table) ->
lists:filter(fun(Chain) ->
hasPrefix(Prefix, Chain)
end, Table).
gatherChainsWithSuffix(Suffix, Table) ->
lists:filter(fun(Chain) ->
hasSuffix(Suffix, Chain)
end, Table).
countSameSuffix(Suffix, Chains) ->
lists:foldl(fun(Chain, NumSame) ->
case hasSuffix(Suffix, Chain) of
true -> NumSame + 1;
false -> NumSame
end
end, 0, Chains).
genSuffixes(Chains) ->
genSuffixes(Chains, []).
genSuffixes([], Acc) ->
Acc;
genSuffixes(Chains, Acc) ->
Chain = hd(Chains),
Suffix = Chain#chain.suffix,
SameSuffixes = gatherChainsWithSuffix(Suffix, Chains),
NumSameSuffix = countSameSuffix(Suffix, Chains),
genSuffixes(Chains -- SameSuffixes, Acc ++ [factories:suffixFactory(Suffix, NumSameSuffix)]).
reduceTable(Table) ->
reduceTable(Table, []).
reduceTable([], Acc) ->
Acc;
reduceTable(Table, Acc) ->
Chain = hd(Table),
Prefix = Chain#chain.prefix,
ChainsSamePrefix = gatherChainsWithPrefix(Prefix, Table),
Suffixes = genSuffixes(ChainsSamePrefix),
reduceTable(Table -- ChainsSamePrefix, Acc ++ [factories:reducedChainFactory(Prefix, Suffixes)]).