-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXX_LightningClasses.py
189 lines (145 loc) · 6.54 KB
/
XX_LightningClasses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# Used cars kicks classification - NN implementation in PyTorch Lightning
# Data source:
# https://www.openml.org/search?type=data&sort=runs&id=41162&status=active
# https://www.kaggle.com/competitions/DontGetKicked/overview
import torch, torchvision, torchmetrics
import lightning.pytorch as pl
# import pytorch_lightning as pl
# Careful: Optuna still uses "pytorch_lightning" to import the Pruning integration
# with Ligtning, but Lightning itself is now imported as lightning.pytorch
from optuna.integration.pytorch_lightning import PyTorchLightningPruningCallback
# Define TrainDataset class: Takes in preprocessed features & targets
class TrainDataset(torch.utils.data.Dataset):
# Store preprocessed features & targets
def __init__(self, x_train, y_train):
self.x = torch.tensor(x_train, dtype = torch.float32) # Store features
self.y = torch.tensor(y_train.values, dtype = torch.float32).unsqueeze(1) # Store targets
# Return data length
def __len__(self):
return len(self.x)
# Return a pair of features & target
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
# Define TestDataset class: Takes in preprocessed features only
class TestDataset(torch.utils.data.Dataset):
# Store preprocessed features
def __init__(self, x_test):
self.x = torch.tensor(x_test, dtype = torch.float32) # Store features
# Return data length
def __len__(self):
return len(self.x)
# Return one set of features
def __getitem__(self, idx):
return self.x[idx]
# Define Lightning module with Focal loss
class SeluDropoutModel(pl.LightningModule):
# Initialize model
def __init__(self, hyperparams_dict):
# Delegate function to parent class
super().__init__()
self.save_hyperparameters(logger = False) # Save external hyperparameters so
# they are available when loading saved models
# Initialize validation average precision metric (since focal loss has tunable
# hyperparameters, using it for validation would be leakage)
self.val_avg_precision = torchmetrics.classification.AveragePrecision(
task = "binary")
# Define hyperparameters
self.n_hidden_layers = hyperparams_dict["n_hidden_layers"]
self.input_size = hyperparams_dict["input_size"]
self.hidden_size = hyperparams_dict["hidden_size"]
self.learning_rate = hyperparams_dict["learning_rate"]
self.l2 = hyperparams_dict["l2"]
self.dropout = hyperparams_dict["dropout"]
self.loss_alpha = hyperparams_dict["loss_alpha"]
self.loss_gamma = hyperparams_dict["loss_gamma"]
# Define architecture
# Initialize layers list with first hidden layer
self.layers_list = torch.nn.ModuleList([
torch.nn.Linear(self.input_size, self.hidden_size), # Hidden layer 1
torch.nn.SELU(), # Activation 1
torch.nn.AlphaDropout(self.dropout) # Dropout 1
])
# Append extra hidden layers to layers list, according to hyperparameter
for n in range(0, (self.n_hidden_layers - 1)):
self.layers_list.extend([
torch.nn.Linear(self.hidden_size, self.hidden_size), # Hidden layer N
torch.nn.SELU(), # Activation N
torch.nn.AlphaDropout(self.dropout) # Dropout N
])
# Append output layer to layers list
self.layers_list.append(
torch.nn.Linear(self.hidden_size, 1) # Output layer
# No sigmoid activation here, because the loss function has that built-in
)
# Define full network from layers list
self.network = torch.nn.Sequential(*self.layers_list)
# Sigmoid activation for prediction step only, not part of forward propagation
self.sigmoid = torch.nn.Sequential(torch.nn.Sigmoid())
# Initialize weights to conform with self-normalizing SELU activation
for layer in self.network:
if isinstance(layer, torch.nn.Linear):
torch.nn.init.kaiming_normal_(layer.weight, nonlinearity = "linear")
torch.nn.init.zeros_(layer.bias)
# Define forward propagation
def forward(self, x):
output = self.network(x.view(x.size(0), -1))
return output # Returns logits, not probabilities
# Define training loop
def training_step(self, batch, batch_idx):
# Perform training, calculate & return loss
x, y = batch
output = self.forward(x)
# Loss function applies sigmoid activation before calculating focal loss
loss = torchvision.ops.sigmoid_focal_loss(
output, y,
alpha = self.loss_alpha, gamma = self.loss_gamma,
reduction = "mean")
self.log(
"train_loss", loss,
on_step = False, on_epoch = True, prog_bar = True, logger = True)
return loss
# Define validation loop
def validation_step(self, batch, batch_idx):
# Make predictions, apply sigmoid activation to get probabilities
x, y = batch
output = self.forward(x)
pred = self.sigmoid(output)
# Update & log avg. precision score, ensure y is in int32 format for metric
self.val_avg_precision(pred, y.type(torch.int32))
self.log(
"val_avg_precision", self.val_avg_precision,
on_step = True, on_epoch = True, prog_bar = True, logger = True)
return self.val_avg_precision
# Define prediction method (because the default just runs forward(), which
# doesn't have sigmoid activation and doesn't return probabilities)
def predict_step(self, batch, batch_idx):
# Run the forward propagation, apply sigmoid activation to return probs.
x = batch
output = self.forward(x)
pred = self.sigmoid(output)
return pred
# Define optimization algorithm, LR scheduler
def configure_optimizers(self):
# Adam optimizer with L2 regularization
optimizer = torch.optim.Adam(
self.parameters(), lr = self.learning_rate, weight_decay = self.l2)
# Cyclic LR scheduler
lr_scheduler = torch.optim.lr_scheduler.CyclicLR(
optimizer,
base_lr = self.learning_rate, max_lr = (self.learning_rate * 5),
step_size_up = 200, # Heuristic: (2-8 * steps (batches) in one epoch)
cycle_momentum = False, # Not compatible with Adam optimizer
mode = "exp_range", gamma = 0.99995)
return {
"optimizer": optimizer,
"lr_scheduler": {
"scheduler": lr_scheduler,
"interval": "step",
"frequency": 1
}
}
# Create copy of Optuna callback with lightning.pytorch namespace as a workaround,
# as Optuna code uses pytorch_lightning namespace which causes an error
class OptunaPruning(PyTorchLightningPruningCallback, pl.Callback):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)