-
Notifications
You must be signed in to change notification settings - Fork 113
/
Copy pathmanager.py
105 lines (83 loc) · 4.33 KB
/
manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
from keras.models import Model
from keras import backend as K
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
class NetworkManager:
'''
Helper class to manage the generation of subnetwork training given a dataset
'''
def __init__(self, dataset, epochs=5, child_batchsize=128, acc_beta=0.8, clip_rewards=0.0):
'''
Manager which is tasked with creating subnetworks, training them on a dataset, and retrieving
rewards in the term of accuracy, which is passed to the controller RNN.
Args:
dataset: a tuple of 4 arrays (X_train, y_train, X_val, y_val)
epochs: number of epochs to train the subnetworks
child_batchsize: batchsize of training the subnetworks
acc_beta: exponential weight for the accuracy
clip_rewards: float - to clip rewards in [-range, range] to prevent
large weight updates. Use when training is highly unstable.
'''
self.dataset = dataset
self.epochs = epochs
self.batchsize = child_batchsize
self.clip_rewards = clip_rewards
self.beta = acc_beta
self.beta_bias = acc_beta
self.moving_acc = 0.0
def get_rewards(self, model_fn, actions):
'''
Creates a subnetwork given the actions predicted by the controller RNN,
trains it on the provided dataset, and then returns a reward.
Args:
model_fn: a function which accepts one argument, a list of
parsed actions, obtained via an inverse mapping from the
StateSpace.
actions: a list of parsed actions obtained via an inverse mapping
from the StateSpace. It is in a specific order as given below:
Consider 4 states were added to the StateSpace via the `add_state`
method. Then the `actions` array will be of length 4, with the
values of those states in the order that they were added.
If number of layers is greater than one, then the `actions` array
will be of length `4 * number of layers` (in the above scenario).
The index from [0:4] will be for layer 0, from [4:8] for layer 1,
etc for the number of layers.
These action values are for direct use in the construction of models.
Returns:
a reward for training a model with the given actions
'''
with tf.Session(graph=tf.Graph()) as network_sess:
K.set_session(network_sess)
# generate a submodel given predicted actions
model = model_fn(actions) # type: Model
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
# unpack the dataset
X_train, y_train, X_val, y_val = self.dataset
# train the model using Keras methods
model.fit(X_train, y_train, batch_size=self.batchsize, epochs=self.epochs,
verbose=1, validation_data=(X_val, y_val),
callbacks=[ModelCheckpoint('weights/temp_network.h5',
monitor='val_acc', verbose=1,
save_best_only=True,
save_weights_only=True)])
# load best performance epoch in this training session
model.load_weights('weights/temp_network.h5')
# evaluate the model
loss, acc = model.evaluate(X_val, y_val, batch_size=self.batchsize)
# compute the reward
reward = (acc - self.moving_acc)
# if rewards are clipped, clip them in the range -0.05 to 0.05
if self.clip_rewards:
reward = np.clip(reward, -0.05, 0.05)
# update moving accuracy with bias correction for 1st update
if self.beta > 0.0 and self.beta < 1.0:
self.moving_acc = self.beta * self.moving_acc + (1 - self.beta) * acc
self.moving_acc = self.moving_acc / (1 - self.beta_bias)
self.beta_bias = 0
reward = np.clip(reward, -0.1, 0.1)
print()
print("Manager: EWA Accuracy = ", self.moving_acc)
# clean up resources and GPU memory
network_sess.close()
return reward, acc