-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbot.py
117 lines (96 loc) · 4.48 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
import math
from torch.utils.data import Dataset
class CharDataset(Dataset):
def __init__(self, data, block_size):
chars = sorted(list(set(data)))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
self.stoi = { ch:i for i,ch in enumerate(chars) }
self.itos = { i:ch for i,ch in enumerate(chars) }
self.block_size = block_size
self.vocab_size = vocab_size
self.data = data
def __len__(self):
return len(self.data) - self.block_size
def __getitem__(self, idx):
# grab a chunk of (block_size + 1) characters from the data
chunk = self.data[idx:idx + self.block_size + 1]
# encode every character to an integer
dix = [self.stoi[s] for s in chunk]
"""
arrange data and targets so that the first i elements of x
will be asked to predict the i-th element of y. Notice that
the eventual language model will actually make block_size
individual predictions at the same time based on this data,
so we are being clever and amortizing the cost of the forward
pass of the network. So for example if block_size is 4, then
we could e.g. sample a chunk of text "hello", the integers in
x will correspond to "hell" and in y will be "ello". This will
then actually "multitask" 4 separate examples at the same time
in the language model:
- given just "h", please predict "e" as next
- given "he" please predict "l" next
- given "hel" predict "l" next
- given "hell" predict "o" next
In addition, because the DataLoader will create batches of examples,
every forward/backward pass during traning will simultaneously train
a LOT of predictions, amortizing a lot of computation. In particular,
for a batched input of integers X (B, T) where B is batch size and
T is block_size and Y (B, T), the network will during training be
simultaneously training to make B*T predictions, all at once! Of course,
at test time we can paralellize across batch B, but unlike during training
we cannot parallelize across the time dimension T - we have to run
a forward pass of the network to recover the next single character of the
sequence along each batch dimension, and repeatedly always feed in a next
character to get the next one.
So yes there is a big asymmetry between train/test time of autoregressive
models. During training we can go B*T at a time with every forward pass,
but during test time we can only go B at a time, T times, with T forward
passes.
"""
x = torch.tensor(dix[:-1], dtype=torch.long)
y = torch.tensor(dix[1:], dtype=torch.long)
return x, y
block_size = 128 # spatial extent of the model for its context
text = open('chat/all.txt', 'r').read() # don't worry we won't run out of file handles
train_dataset = CharDataset(text, block_size) # one line of poem is roughly 50 characters
# Load Model
from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size,
n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf).cuda()
# Load weight
model.load_state_dict(torch.load("model.pth"))
from mingpt.utils import sample
def run(context):
x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].cuda()
y = sample(model, x, 500, temperature=1.0, sample=True, top_k=10)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
# print(completion)
return completion
# Bot
import telebot
from telebot import types
import configparser
config = configparser.ConfigParser()
config.read('config.ini')
bot = telebot.TeleBot("__main__")
bot.config['api_key'] = config['TELEGRAM']['secret']
@bot.route('/start ?(.*)')
def example_command(message, cmd):
chat_dest = message['chat']['id']
msg = "Welcome to IUT GPT bot"
bot.send_message(chat_dest, msg)
# bot.register_next_step_handler(msg, chat)
print(message['chat']['first_name'])
@bot.route('(?!/).+')
def chat(message):
# print(message['chat'])
completion = run("{" + message['chat']['first_name'] + "}:\t"+message['text'])
bot.send_message(message['chat']['id'], completion)
print("Bot Started")
bot.poll(debug=True)