-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.rb
82 lines (70 loc) · 1.7 KB
/
main.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
$MULTIPLIER = 0.42 # this number most be chosen carefully to enable a uniform distribution of hashes
class Prefix
attr_reader :nPref, :pref
def initialize(args)
if args["prefix"] then
@pref = args["prefix"].clone
else
@nPref = args["nPref"]
@pref = Array.new()
for i in 0..@nPref-1
@pref[i] = args["pref"]
end
end
end
end
class Chain
def initialize
@NPREF = 2
@NONWORD = "<endoftext>" # the "word" that can't appear
@statetab = Hash.new()
@prefix = Prefix.new({"nPref"=>@NPREF, "pref"=>@NONWORD}) # initial prefix
end
def add(word)
pref = @prefix.pref.clone()
suf = @statetab[pref]
if suf == nil then
suf = Array.new()
end
suf.push(word)
@statetab[pref] = suf
@prefix.pref.delete_at(0)
@prefix.pref.push(word)
end
def build(inputText)
tokens = inputText.split(" ")
for i in 0...tokens.size()
add(tokens[i])
if tokens[i] == @NONWORD then
break
end
end
end
def generate(nWords)
prefix = Prefix.new({"nPref"=>@NPREF, "pref"=>@NONWORD})
for i in 0...nWords
states = @statetab[prefix.pref]
r = rand(states.size())
suf = states[r]
if (suf == @NONWORD) then
break
end
print (suf + " ")
prefix.pref.delete_at(0)
prefix.pref.push(suf)
end
print ("\n")
end
end
$MAXGEN = 1000
inputFile = ARGV[0]
if inputFile then
inputText = File.open("./#{inputFile}").read
else
inputText = File.open("./markov_chains.txt").read
end
inputText.gsub("\n", "<endoftext>")
inputText.concat("<endoftext>")
chain = Chain.new()
chain.build(inputText)
chain.generate($MAXGEN)