-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcifar_singlerun.py
118 lines (98 loc) · 3.61 KB
/
cifar_singlerun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import torch
from torch import optim
import time
from models import *
from datasets import *
from loss import *
batch_size = 500
v_batch_size = 100
epoch = 22
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True
train_imgs, train_lbls, val_imgs, val_lbls = build_dataset(device=device)
n_train = len(train_lbls)
n_val = len(val_lbls)
net = build_network()
net.to(device).half()
for layer in net.modules():
if isinstance(layer, nn.BatchNorm2d):
layer.float()
if hasattr(layer, 'weight') and layer.weight is not None:
layer.weight.data.fill_(1.0)
layer.eps = 0.00001
layer.momentum = 0.1
criterion = nn.CrossEntropyLoss()
criterion2 = CrossEntropyLabelSmooth(num_classes=10, epsilon=0.2)
optimizer = optim.SGD(net.parameters(), lr=0.2, momentum=0.9, nesterov=True, weight_decay=0.001)
def lr(e):
if e < 4:
return 0.5*e/3. + 0.01
return 0.5*(22-e)/19. + 0.01
sched = optim.lr_scheduler.LambdaLR(optimizer, lr)
augment = Augment()
augment.to(device).half()
t_start = time.time()
for e in range(epoch): # loop over the dataset multiple times
start = time.time()
# process training set
a_train = []
for i in range(n_train//batch_size):
# get the inputs; data is a list of [inputs, labels]
inputs = train_imgs[i*batch_size:(i+1)*batch_size, ...]
a_train.append(augment(inputs.to(device).half()))
a_train_imgs = torch.cat(a_train)
perm = torch.randperm(n_train)
a_train_imgs = a_train_imgs[perm, ...].contiguous()
a_train_lbls = train_lbls[perm].contiguous()
# a_stop = time.time()
net.train()
running_loss = []
perm = torch.randperm(n_train)
# t1 = 0
# t2 = 0
# t3 = 0
for i in range(n_train//batch_size):
# s = time.time()
# get the inputs; data is a list of [inputs, labels]
inputs = a_train_imgs[i*batch_size: (i+1)*batch_size, ...]
labels = a_train_lbls[i*batch_size: (i+1)*batch_size]
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss2 = criterion2(outputs, labels)
loss = loss + 2*loss2
# torch.cuda.synchronize()
# t1 += time.time() - s
loss.backward()
# torch.cuda.synchronize()
# t2 += time.time() - s
optimizer.step()
# torch.cuda.synchronize()
# t3 += time.time() - s
# print statistics
running_loss.append(loss)
running_loss = torch.stack(running_loss).mean().item()
# t_stop = time.time()
# t1 /= n_train//batch_size
# t2 /= n_train//batch_size
# t3 /= n_train//batch_size
if e == 0 or e%5 == 1:
net.eval()
val_loss = []
val_acc = []
for i in range(n_val//v_batch_size):
# get the inputs; data is a list of [inputs, labels]
inputs = val_imgs[i*v_batch_size: (i+1)*v_batch_size, ...]
labels = val_lbls[i*v_batch_size: (i+1)*v_batch_size]
outputs = net(inputs)
val_loss.append(criterion(outputs, labels))
val_acc.append((outputs.argmax(dim=1) == labels).sum()/labels.shape[0])
v_stop = time.time()
# print('{} train loss {:5.02f} val loss {:5.02f} val acc {:5.02f} time a:{:5.03f} t:{:5.03f}, v:{:5.03f}, t1:{:5.03f}, t2:{:5.03f}, t3:{:5.03f} '.format(
# e, running_loss, torch.stack(val_loss).mean(), 100.*torch.stack(val_acc).mean(), (a_stop-start), (t_stop-start), (v_stop - start), t1, t2, t3))
print('{} train loss {:5.02f} val loss {:5.02f} val acc {:5.02f} time v:{:5.03f}'.format(
e, running_loss, torch.stack(val_loss).mean(), 100.*torch.stack(val_acc).mean(), (v_stop - start)))
sched.step()
print('Finished Training in {:5.03f}'.format(time.time()-t_start))