-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathkfoldcv.py
171 lines (130 loc) · 4.72 KB
/
kfoldcv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
import torch
from torch import nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import transforms
from sklearn.model_selection import KFold
def reset_weights(m):
'''
Try resetting model weights to avoid
weight leakage.
'''
for layer in m.children():
if hasattr(layer, 'reset_parameters'):
print(f'Reset trainable parameters of layer = {layer}')
layer.reset_parameters()
class SimpleConvNet(nn.Module):
'''
Simple Convolutional Neural Network
'''
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Conv2d(1, 10, kernel_size=3),
nn.ReLU(),
nn.Flatten(),
nn.Linear(26 * 26 * 10, 50),
nn.ReLU(),
nn.Linear(50, 20),
nn.ReLU(),
nn.Linear(20, 10)
)
def forward(self, x):
'''Forward pass'''
return self.layers(x)
if __name__ == '__main__':
# Configuration options
k_folds = 5
num_epochs = 1
loss_function = nn.CrossEntropyLoss()
# For fold results
results = {}
# Set fixed random number seed
torch.manual_seed(42)
# Prepare MNIST dataset by concatenating Train/Test part; we split later.
dataset_train_part = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor(), train=True)
dataset_test_part = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor(), train=False)
dataset = ConcatDataset([dataset_train_part, dataset_test_part])
# Define the K-fold Cross Validator
kfold = KFold(n_splits=k_folds, shuffle=True)
# Start print
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
# Print
print(f'FOLD {fold}')
print('--------------------------------')
# Sample elements randomly from a given list of ids, no replacement.
train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
# Define data loaders for training and testing data in this fold
trainloader = torch.utils.data.DataLoader(
dataset,
batch_size=10, sampler=train_subsampler)
testloader = torch.utils.data.DataLoader(
dataset,
batch_size=10, sampler=test_subsampler)
# Init the neural network
network = SimpleConvNet()
network.apply(reset_weights)
# Initialize optimizer
optimizer = torch.optim.Adam(network.parameters(), lr=1e-4)
# Run the training loop for defined number of epochs
for epoch in range(0, num_epochs):
# Print epoch
print(f'Starting epoch {epoch+1}')
# Set current loss value
current_loss = 0.0
# Iterate over the DataLoader for training data
for i, data in enumerate(trainloader, 0):
# Get inputs
inputs, targets = data
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = network(inputs)
# Compute loss
loss = loss_function(outputs, targets)
# Perform backward pass
loss.backward()
# Perform optimization
optimizer.step()
# Print statistics
current_loss += loss.item()
if i % 500 == 499:
print('Loss after mini-batch %5d: %.3f' %
(i + 1, current_loss / 500))
current_loss = 0.0
# Process is complete.
print('Training process has finished. Saving trained model.')
# Print about testing
print('Starting testing')
# Saving the model
save_path = f'./model-fold-{fold}.pth'
torch.save(network.state_dict(), save_path)
# Evaluationfor this fold
correct, total = 0, 0
with torch.no_grad():
# Iterate over the test data and generate predictions
for i, data in enumerate(testloader, 0):
# Get inputs
inputs, targets = data
# Generate outputs
outputs = network(inputs)
# Set total and correct
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
# Print accuracy
print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
print('--------------------------------')
results[fold] = 100.0 * (correct / total)
# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
print('--------------------------------')
sum = 0.0
for key, value in results.items():
print(f'Fold {key}: {value} %')
sum += value
print(f'Average: {sum/len(results.items())} %')