-
Notifications
You must be signed in to change notification settings - Fork 0
/
hyperparameter_tuning.py
279 lines (213 loc) · 11.4 KB
/
hyperparameter_tuning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
print('==== start importing packages ====')
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
# # Check TensorFlow Version
# assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use Tensorflow version 1.0 or newer. You are using {}'.format(tf.__version__)
# print('Tensorflow Version: {}'.format(tf.__version__))
# # Check for GPU
# if not tf.test.gpu_device_name():
# warnings.warn('No GPU found. Please ensure you have installed TensorFlow correctly')
# else:
# print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Restrict TensorFlow to only use the fourth GPU
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import math
import pickle
import os
from collections import Counter
import time
import datetime
from datetime import datetime
from gensim.models import KeyedVectors
import numpy as np
from itertools import chain
import random, os, sys
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
# import DAR_model_new
import final_new_models2
import data_generators
import keras
from keras import backend as K
from keras.preprocessing.sequence import pad_sequences
from keras import Sequential
from keras.layers import Lambda, merge, Input, Reshape, Embedding, Dropout, Bidirectional, LSTM, GlobalMaxPooling1D, Dense, Flatten, TimeDistributed,Concatenate
from keras.optimizers import RMSprop
from keras.models import Model, load_model
from keras.layers import Conv1D, ThresholdedReLU, MaxPooling1D, Activation, concatenate # TimeDistributedDense
from keras.initializers import RandomUniform
from keras_contrib.layers import CRF
from keras.optimizers import RMSprop, Adam, Adadelta
from keras.callbacks import EarlyStopping
from keras.engine.topology import Layer
from keras_bert import load_trained_model_from_checkpoint
from models.mimic_raheja import RahejaModel
from models.pool_crf import PoolCRF
from models.att_crf import AttCRF
from models.mha_pool_crf import MHAPoolCRF
from models.mha_att_crf import MHAAttCRF
from models.mha_csa_crf import MHACsaCRF
from models.pool_2rnn import Pool2RNN
from models.att_2rnn import Att2RNN
from models.csa_2rnn import CSA2RNN
from models.bert_crf import BertCRF
from models.bert_2rnn import Bert2RNN
# specify corpus and model
data = 'MRDA' #['SwDA', 'MRDA']
model_name = 'pool-crf' #['raheja_tetreault', 'pool-crf', 'att-crf', 'mha-pool-crf', 'mha-att-crf', 'mha-csa-crf', 'pool-2rnn', 'att-2rnn', 'csa_2rnn', 'bert-crf', 'bert-2rnn']
if data == 'MRDA':
data_dir = "C://Users//31642//anaconda3//envs//thesis_dar//KimThesis//processed_data/MRDA/"
embedding_dir = "C://Users//31642//anaconda3//envs//thesis_dar//KimThesis//embeddings/MRDA/"
elif data == 'SwDA':
data_dir = "C://Users//31642//anaconda3//envs//thesis_dar//KimThesis//processed_data/SwDA/"
embedding_dir = "C://Users//31642//anaconda3//envs//thesis_dar//KimThesis//embeddings/SwDA/"
print('======LOAD DATA =====')
# metadata
metadata = pickle.load(open(data_dir + "metadata.pkl", "rb"))
word_embd_matrix_train300 = pickle.load(open(embedding_dir + 'embd_matrix_train_300D.pkl', 'rb'))
# embeddings
embd_matrix_google = pickle.load(open(embedding_dir + 'embd_matrix_pre_train_Google_300D.pkl', 'rb'))
embd_matrix_train200 = pickle.load(open(embedding_dir + 'embd_matrix_train_200D.pkl', 'rb'))
embd_matrix_train300 = pickle.load(open(embedding_dir + 'embd_matrix_train_300D.pkl', 'rb'))
# load training, test and validation sets
train_x = pickle.load(open(data_dir + "train_x.pkl", "rb"))
train_y = pickle.load(open(data_dir + "train_y.pkl", "rb"))
test_x = pickle.load(open(data_dir + "test_x.pkl", "rb"))
test_y = pickle.load(open(data_dir + "test_y.pkl", "rb"))
valid_x = pickle.load(open(data_dir + "valid_x.pkl", "rb"))
valid_y = pickle.load(open(data_dir + "valid_y.pkl", "rb"))
# load training, test and validation sets made for character embeddings
train_char_x = pickle.load(open(data_dir + "train_char_x.pkl", "rb"))
test_char_x = pickle.load(open(data_dir + "test_char_x.pkl", "rb"))
valid_char_x = pickle.load(open(data_dir + "valid_char_x.pkl", "rb"))
#load training, test and validation sets made for BERT embeddings
train_bert_tokens = pickle.load(open(data_dir + "train_bert_tokens.pkl", "rb"))
train_bert_segments = pickle.load(open(data_dir + "train_bert_segments.pkl", "rb"))
valid_bert_tokens = pickle.load(open(data_dir + "valid_bert_tokens.pkl", "rb"))
valid_bert_segments = pickle.load(open(data_dir + "valid_bert_segments.pkl", "rb"))
test_bert_tokens = pickle.load(open(data_dir + "test_bert_tokens.pkl", "rb"))
test_bert_segments = pickle.load(open(data_dir + "test_bert_segments.pkl", "rb"))
print('==== set hyperparameter space ====')
hidden_layers_utterance = Integer(low=20, high =200, name='hidden_layers_utterance')
utterance_dropout_rate = Real(low=0.0, high=0.6, name='utterance_dropout_rate')
utterance_recurrent_dropout = Real(low=0.0, high=0.6, name='utterance_recurrent_dropout')
hidden_layers_dialogue = Integer(low=20, high =300, name='hidden_layers_dialogue')
dialogue_dropout_rate = Real(low=0.0, high=0.6, name='dialogue_dropout_rate')
dialogue_recurrent_dropout = Real(low=0.0, high=0.6, name='dialogue_recurrent_dropout')
hidden_layers_labels = Integer(low=20, high =300, name='hidden_layers_labels')
labels_dropout_rate = Real(low=0.0, high=0.6, name='labels_dropout_rate')
labels_recurrent_dropout = Real(low=0.0, high=0.6, name='labels_recurrent_dropout')
word_embd = Categorical(['google', 'train200', 'train300'], name = 'word_embd')
cnn_dropout = Real(low=0.1, high=0.6, name='cnn_dropout')
kernel_size = Categorical([3, 4, 5,6,7], name = 'kernel_size')
filters = Integer(low=15, high =100, name='filters')
heads_utterance = Categorical([4, 6, 8,10,12], name = 'heads_utterance')
heads_dialogue = Categorical([4, 6, 8,10,12], name = 'heads_dialogue')
attention_dim = Integer(low=50, high =200, name='attention_dim')
optimizer = Categorical(['adam', 'RMSprop'], name = 'optimizer')
print('========== hyperparameter tuning =======================')
r = 30
epochs = 3
dimensions = [
word_embd,
cnn_dropout,
kernel_size,
filters,
hidden_layers_utterance,
utterance_dropout_rate,
utterance_recurrent_dropout,
hidden_layers_dialogue,
dialogue_dropout_rate,
dialogue_recurrent_dropout,
optimizer
]
default_parameters = ['google', 0.1, 3, 30, 128, 0.1, 0.0,128, 0.1, 0.0,'adam']
@use_named_args(dimensions=dimensions)
def fitness(word_embd, cnn_dropout, kernel_size, filters,
hidden_layers_utterance, utterance_dropout_rate,
utterance_recurrent_dropout,hidden_layers_dialogue,
dialogue_dropout_rate, dialogue_recurrent_dropout,
heads_utterance,optimizer):
# Print the hyper-parameters.
print('word_embd:', word_embd)
print('cnn_dropout:', cnn_dropout)
print('kernel_size:', kernel_size)
print('filters:', filters)
print('hidden_layers_utterance:',hidden_layers_utterance )
print('utterance_dropout_rate:', utterance_dropout_rate)
print('utterance_recurrent_dropout:', utterance_recurrent_dropout)
print('hidden_layers_dialogue:',hidden_layers_dialogue )
print('dialogue_dropout_rate:', dialogue_dropout_rate)
print('dialogue_recurrent_dropout:', dialogue_recurrent_dropout)
print('optimizer:', optimizer)
print()
# Create the neural network with these hyper-parameters.
model = PoolCRF(metadata, word_embd, word_embd_matrix_google=embd_matrix_google, word_embd_matrix_train200=embd_matrix_train_200D,
word_embd_matrix_train300 = embd_matrix_train_300D, cnn_dropout = cnn_dropout, kernel_size = kernel_size, filters = filters,
hidden_layers_utterance = hidden_layers_utterance, utterance_dropout_rate = utterance_dropout_rate, utterance_recurrent_dropout = utterance_recurrent_dropout,
hidden_layers_dialogue = hidden_layers_dialogue, dialogue_dropout_rate = dialogue_dropout_rate, dialogue_recurrent_dropout = dialogue_recurrent_dropout,
optimizer = optimizer).create_model()
# Use Keras to train the model.
callback = keras.callbacks.EarlyStopping(monitor='acc', patience=epochs)
steps_per_epoch = len(train_x)
validation_steps = len(valid_x)
history = model.fit_generator(generator = data_generators.DataGenerators(embedding = 'word_char', data_y = train_y, data_x = train_x,
data_char_x = train_char_x, data_x_segments=None,
data_x_tokens=None).data_generator(),
steps_per_epoch=steps_per_epoch,
epochs=epochs,
verbose=1,
callbacks=[callback],
validation_data= data_generators.DataGenerators(embedding = 'word_char', data_y = valid_y, data_x = valid_x,
data_char_x = valid_char_x, data_x_segments=None,
data_x_tokens=None).data_generator(),
validation_steps= validation_steps,
class_weight=None,
max_queue_size=10,
workers=1,
use_multiprocessing=False,
shuffle=True)
# Get the classification accuracy on the validation-set
# after the last training-epoch.
accuracy = history.history['val_acc'][-1]
# Print the classification accuracy.
print()
print("Accuracy: {0:.2%}".format(accuracy))
print()
global best_accuracy
# Delete the Keras model with these hyper-parameters from memory.
del model
# Clear the Keras session
K.clear_session()
tf.compat.v1.reset_default_graph()
return -accuracy
print('==== start tuning ====')
start_time = datetime.now()
search_result = gp_minimize(func=fitness,
dimensions=dimensions,
acq_func='EI',
n_calls=r,
x0=default_parameters)
important_data = [search_result.x, search_result.x_iters, search_result.func_vals]
pickle.dump(important_data, open(results_save_dir + model_name, 'wb'))
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))