-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathletter_by_letter.py
46 lines (37 loc) · 1.52 KB
/
letter_by_letter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
from keras import Sequential
from keras.layers import LSTM, Dense, Activation
from keras.optimizers import RMSprop
from pickle import dump
text = open("republic.txt", encoding='utf-8').read().lower()
text = text.replace('--', ' ')
text = text.replace('\n', ' ')
chars = sorted(list(set(text)))
print('total chars:', len(chars))
print(chars)
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
SEQUENCE_LENGTH = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - SEQUENCE_LENGTH, step):
sentences.append(text[i: i + SEQUENCE_LENGTH])
next_chars.append(text[i + SEQUENCE_LENGTH])
print(f'num training examples: {len(sentences)}')
X = np.zeros((len(sentences), SEQUENCE_LENGTH, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
X[i, t, char_indices[char]] = 1
y[i, char_indices[next_chars[i]]] = 1
print(sentences[100], next_chars[100])
print(X.shape, y.shape)
model = Sequential()
model.add(LSTM(128, input_shape=(SEQUENCE_LENGTH, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.01), metrics=['accuracy'])
history = model.fit(X, y, validation_split=0.05, batch_size=128, epochs=20, shuffle=True).history
model.save('lbl_40.h5')
dump(history, open("history.pkl", "wb"))