forked from archangelic/pinhook-tilde
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_tildetalk.py
executable file
·106 lines (88 loc) · 2.86 KB
/
build_tildetalk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
import json
from os import path, listdir
import re
import shlex
import string
import subprocess
import traceback
import markovify
import nltk
valid_chars = string.ascii_letters + string.digits
valid_chars = tuple([i for i in valid_chars])
class POSifiedText(markovify.NewlineText):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
words = [w for w in words if len(w) > 0]
words = ["::".join(tag) for tag in nltk.pos_tag(words)]
return words
def word_join(self, words):
sentence = " ".join(word.split("::")[0] for word in words)
return sentence
def make_sentence(word_list):
if word_list[0].endswith(':'):
word_list.pop(0)
elif word_list[0].startswith('!'):
word_list = []
sentence = ''.join([i + ' ' for i in word_list if not i.startswith('http')]).strip()
if user == 'cosnok':
sentence = sentence.strip('"[]')
return sentence
def make_user_file(user, sentences):
with open(path.join('users', user), 'w') as userfile:
for sentence in sentences:
userfile.write(sentence + '\n')
def convert_to_json(user):
with open(path.join('users', user)) as f:
text = f.read()
text_model = POSifiedText(text)
model_json = text_model.to_json()
with open(path.join('json', user), 'w') as juser:
json.dump(model_json, juser)
def user_changed(user, sentences):
usertext = ''.join([s + '\n' for s in sentences])
try:
if user and user in listdir('users'):
with open(path.join('users', user)) as f:
text = f.read()
if usertext != text:
return True
elif user:
return True
except:
pass
with open('log', 'rb') as log:
regex = re.compile(b"\x01|\x1f|\x02|\x12|\x0f|\x16|\x03(?:\d{1,2}(?:,\d{1,2})?)?")
log = regex.sub(b'', log.read())
log = log.decode('UTF-8', errors='replace')
log = log.replace('ACTION', '')
with open('nick_merge.json') as c:
canon = json.load(c)
text_dict = {}
for line in log.split('\n'):
try:
user = line.split()[1]
if user in canon:
user = canon[user]
sentence = make_sentence(line.split()[2:])
if not user in text_dict and sentence:
text_dict[user] = [sentence]
elif sentence:
text_dict[user].append(sentence)
except IndexError:
continue
changed_users = []
for entry in text_dict:
valid = re.match('^[\w-]+$', entry)
if valid and user_changed(entry, text_dict[entry]):
changed_users.append(entry)
if entry in changed_users:
make_user_file(entry, text_dict[entry])
for user in listdir('users'):
try:
if user in changed_users:
convert_to_json(user)
except:
print(user)
traceback.print_exc()
continue