-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathcentralized_gradients.py
73 lines (58 loc) · 2.62 KB
/
centralized_gradients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import tensorflow as tf
import keras.backend as K
def get_centralized_gradients(optimizer, loss, params):
"""Compute the centralized gradients.
This function is ideally not meant to be used directly unless you are building a custom optimizer, in which case you
could point `get_gradients` to this function. This is a modified version of
`tf.keras.optimizers.Optimizer.get_gradients`.
# Arguments:
optimizer: a `tf.keras.optimizers.Optimizer object`. The optimizer you are using.
loss: Scalar tensor to minimize.
params: List of variables.
# Returns:
A gradients tensor.
# Reference:
[Yong et al., 2020](https://arxiv.org/abs/2004.01461)
"""
# We here just provide a modified get_gradients() function since we are trying to just compute the centralized
# gradients at this stage which can be used in other optimizers.
grads = []
for grad in K.gradients(loss, params):
grad_len = len(grad.shape)
if grad_len > 1:
axis = list(range(grad_len - 1))
grad -= tf.reduce_mean(grad,
axis=axis,
keep_dims=True)
grads.append(grad)
if None in grads:
raise ValueError('An operation has `None` for gradient. '
'Please make sure that all of your ops have a '
'gradient defined (i.e. are differentiable). '
'Common ops without gradient: '
'K.argmax, K.round, K.eval.')
if hasattr(optimizer, 'clipnorm') and optimizer.clipnorm > 0:
norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads]))
grads = [
tf.keras.optimizers.clip_norm(
g,
optimizer.clipnorm,
norm) for g in grads]
if hasattr(optimizer, 'clipvalue') and optimizer.clipvalue > 0:
grads = [K.clip(g, -optimizer.clipvalue, optimizer.clipvalue)
for g in grads]
return grads
def centralized_gradients_for_optimizer(optimizer):
"""Create a centralized gradients functions for a specified optimizer.
# Arguments:
optimizer: a `tf.keras.optimizers.Optimizer object`. The optimizer you are using.
# Usage:
```py
>>> opt = tf.keras.optimizers.Adam(learning_rate=0.1)
>>> opt.get_gradients = gctf.centralized_gradients_for_optimizer(opt)
>>> model.compile(optimizer = opt, ...)
```
"""
def get_centralized_gradients_for_optimizer(loss, params):
return get_centralized_gradients(optimizer, loss, params)
return get_centralized_gradients_for_optimizer