-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexer03.py
107 lines (86 loc) · 4.21 KB
/
exer03.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import torch
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')
# without optim
# learning_rate = 1e-4
# for t in range(500):
# # Forward pass: compute predicted y by passing x to the model. Module objects
# # override the __call__ operator so you can call them like functions. When
# # doing so you pass a Tensor of input data to the Module and it produces
# # a Tensor of output data.
# y_pred = model(x)
# # Compute and print loss. We pass Tensors containing the predicted and true
# # values of y, and the loss function returns a Tensor containing the
# # loss.
# loss = loss_fn(y_pred, y)
# if t % 100 == 99:
# print(t, loss.item())
# # Zero the gradients before running the backward pass.
# model.zero_grad()
# '''
# In PyTorch, we need to set the gradients to zero before starting to do backpropragation
# because PyTorch accumulates the gradients on subsequent backward passes.
# This is convenient while training RNNs.
# So, the default action is to accumulate (i.e. sum) the gradients on every loss.backward() call.
# Because of this, when you start your training loop,
# ideally you should zero out the gradients so that you do the parameter update correctly.
# Else the gradient would point in some other direction
# than the intended direction towards the minimum (or maximum, in case of maximization objectives).
# '''
# # Backward pass: compute gradient of the loss with respect to all the learnable
# # parameters of the model. Internally, the parameters of each Module are stored
# # in Tensors with requires_grad=True, so this call will compute gradients for
# # all learnable parameters in the model.
# loss.backward()
# # Update the weights using gradient descent. Each parameter is a Tensor, so
# # we can access its gradients like we did before.
# with torch.no_grad():
# # temporarily sets all require_grad to False
# '''
# To prevent tracking history (and using memory),
# you can also wrap the code block in with torch.no_grad()
# This can be particularly helpful when evaluating a model
# because the model may have trainable parameters with requires_grad=True,
# but for which we don’t need the gradients.
# '''
# for param in model.parameters():
# param -= learning_rate * param.grad
# with optim
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
# Forward pass: compute predicted y by passing x to the model.
y_pred = model(x)
# Compute and print loss.
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Before the backward pass, use the optimizer object to zero all of the
# gradients for the variables it will update (which are the learnable
# weights of the model). This is because by default, gradients are
# accumulated in buffers( i.e, not overwritten) whenever .backward()
# is called. Checkout docs of torch.autograd.backward for more details.
optimizer.zero_grad()
# Backward pass: compute gradient of the loss with respect to model
# parameters
loss.backward()
# Calling the step function on an Optimizer makes an update to its
# parameters
optimizer.step()
# where is the gradients calculated at loss.backward() stored?
# how can optimizer.step() access them?