From badd1bcfa339a2ee306a1de3acaf9b85c8b1a02d Mon Sep 17 00:00:00 2001 From: Jeffrey Ling Date: Thu, 29 Dec 2016 16:08:57 -0500 Subject: [PATCH] adds option to write predicted classes --- README.md | 1 + main.lua | 1 + preprocess.py | 5 ----- trainer.lua | 37 +++++++++++++++++++++++++++++-------- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 489aef2..cbbe79a 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ The following is a list of complete parameters allowed by the torch code. * `train_only`: Set to 1 to only train (no testing) * `test_only`: Given a `.t7` file with model, test on testing data * `dump_feature_maps_file`: Filename for dumping feature maps of convolution at test time. This will be a `.hdf5` file with fields `feature_maps` for the features at each time step and `word_idxs` for the word indexes (aligned with the last word of the filter). This currently only works for models with a single filter size. This is saved for the best model on fold 1. + * `preds_file`: Filename for writing predictions (with `test_only` set to 1). Output is zero indexed. Training hyperparameters: * `num_epochs`: Number of training epochs. diff --git a/main.lua b/main.lua index 93cb6f6..b1230a2 100644 --- a/main.lua +++ b/main.lua @@ -38,6 +38,7 @@ cmd:text() -- Training own dataset cmd:option('-train_only', 0, 'Set to 1 to only train on data. Default is cross-validation') cmd:option('-test_only', 0, 'Set to 1 to only do testing. Must have a -warm_start_model') +cmd:option('-preds_file', '', 'On test data, write predictions to an output file. Set test_only to 1 to use') cmd:option('-warm_start_model', '', 'Path to .t7 file with pre-trained model. Should contain a table with key \'model\'') cmd:text() diff --git a/preprocess.py b/preprocess.py index 6facc12..3debf1a 100644 --- a/preprocess.py +++ b/preprocess.py @@ -202,12 +202,7 @@ def main(): for word, vec in w2v.items(): embed[word_to_idx[word] - 1] = vec - # Shuffle train print 'train size:', train.shape - N = train.shape[0] - perm = np.random.permutation(N) - train = train[perm] - train_label = train_label[perm] filename = dataset + '.hdf5' with h5py.File(filename, "w") as f: diff --git a/trainer.lua b/trainer.lua index 11f0efb..d34b53d 100644 --- a/trainer.lua +++ b/trainer.lua @@ -134,6 +134,12 @@ function Trainer:test(test_data, test_labels, model, criterion, layers, dump_fea local confusion = optim.ConfusionMatrix(classes) confusion:zero() + local preds_file + if opt.test_only == 1 and opt.preds_file ~= '' then + print('Writing predictions to ' .. opt.preds_file) + preds_file = io.open(opt.preds_file, 'w') + end + -- dump feature maps local feature_maps local conv_layer = get_layer(model, 'convolution') @@ -156,15 +162,26 @@ function Trainer:test(test_data, test_labels, model, criterion, layers, dump_fea local outputs = model:forward(inputs) -- dump feature maps from model forward local cur_feature_maps - if opt.cudnn == 1 then - cur_feature_maps = conv_layer.output:squeeze(4) - else - cur_feature_maps = conv_layer.output + if dump_features then + if opt.cudnn == 1 then + cur_feature_maps = conv_layer.output:squeeze(4) + else + cur_feature_maps = conv_layer.output + end + if feature_maps == nil then + feature_maps = cur_feature_maps + else + feature_maps = torch.cat(feature_maps, cur_feature_maps, 1) + end end - if feature_maps == nil then - feature_maps = cur_feature_maps - else - feature_maps = torch.cat(feature_maps, cur_feature_maps, 1) + + if opt.test_only == 1 and opt.preds_file ~= '' then + -- write predictions to file + local _,preds = torch.max(outputs, 2) + for j = 1, preds:size(1) do + -- zero index + preds_file:write((preds[j][1] - 1) .. '\n') + end end local err = criterion:forward(outputs, targets) @@ -194,6 +211,10 @@ function Trainer:test(test_data, test_labels, model, criterion, layers, dump_fea f:close() end + if opt.test_only == 1 and opt.preds_file ~= '' then + preds_file:close() + end + -- return error percent confusion:updateValids() return confusion.totalValid