-
Notifications
You must be signed in to change notification settings - Fork 3
/
batch_norm1D.py
52 lines (42 loc) · 1.96 KB
/
batch_norm1D.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
""" Batch normalization (also called "batch scale") is a technique used to normalize the activations of the layers in a deep learning model,
specifically the mean and variance of the activations over a mini-batch of training examples.
(ref) https://youtu.be/4gal2zIjm3M
(ref) https://gaussian37.github.io/dl-concept-batchnorm/
"""
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
class BatchNorm1D(nn.Module):
""" --num_features: the number of features(= dim)
"""
def __init__(self, num_features, eps=1e-5, momentum=0.1):
super(BatchNorm1D, self).__init__()
self.num_features = num_features
self.eps = eps
self.momentum = momentum
self.gamma = nn.Parameter(torch.ones(num_features))
self.beta = nn.Parameter(torch.zeros(num_features))
self.register_buffer('running_mean', torch.zeros(num_features))
self.register_buffer('running_var', torch.ones(num_features))
def forward(self, x):
if self.training:
# Compute the mean and variance of the mini-batch
mean = x.mean(dim=0, keepdim=False)
var = ((x - mean) ** 2).mean(dim=0, keepdim=False)
# Update the running mean and variance
self.running_mean.mul_(1 - self.momentum).add_(self.momentum * mean.data)
self.running_var.mul_(1 - self.momentum).add_(self.momentum * var.data)
else:
mean = torch.tensor(self.running_mean).to(x.device)
var = torch.tensor(self.running_var).to(x.device)
# Normalize the activations
return self.gamma * (x - mean) / (var + self.eps).sqrt() + self.beta
if __name__ == "__main__":
x = torch.randn(64, 512) + 3
batch_norm = BatchNorm1D(512)
out = batch_norm(x)
# == Vis. == #
plt.hist(x.flatten().detach().numpy(), bins=100, label="Input")
plt.hist(out.flatten().detach().numpy(), bins=100, label="Layer Scale")
plt.legend(loc='best')
plt.show()