-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarkov_chain.py
54 lines (46 loc) · 2.07 KB
/
markov_chain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import random
class MarkovChain:
def __init__(self, fn):
self.table = {}
self.ACCURACY = 1000
print("[*] reading file")
with open(fn, "r", encoding="utf8") as file:
self.data = list(file.read())
def generate_table(self):
print("[*] generating table")
for index, char_ in enumerate(self.data[:-1]):
next_char = self.data[index + 1]
if char_ not in self.table.keys():
self.table[char_] = {}
if next_char not in self.table[char_].keys():
self.table[char_][next_char] = 0
self.table[char_][next_char] += 1
# convert total amounts to relative values
for key_char in self.table.keys():
total = sum(self.table[key_char].values())
for key_next_char in self.table[key_char].keys():
absolute = self.table[key_char][key_next_char]
self.table[key_char][key_next_char] = round(absolute / total * self.ACCURACY)
def print_table(self):
print("[*] printing table")
for key_char in self.table.keys():
print(f"{repr(key_char)}:")
zipped = list(zip(self.table[key_char].keys(), self.table[key_char].values()))
zipped.sort(key=lambda x: -x[1])
for key_next_char, probability in zipped:
print(f"\t{repr(key_next_char)}:\t{str(probability).zfill(len(str(self.ACCURACY)))}/{self.ACCURACY}")
print()
def get(self, length):
print(f"[*] getting {length} chars from table")
result = [random.choice(list(self.table.keys()))]
for _ in range(length):
temp_list = []
for char_ in self.table[result[-1]].keys():
probability = self.table[result[-1]][char_]
temp_list.extend(char_ * probability)
result.append(random.choice(temp_list))
return "".join(result)
if __name__ == "__main__":
chain = MarkovChain(input("file to open: "))
chain.generate_table()
print(chain.get(int(input("characters to generate: "))))