From badd1bcfa339a2ee306a1de3acaf9b85c8b1a02d Mon Sep 17 00:00:00 2001
From: Jeffrey Ling <jeffrey.r.ling@gmail.com>
Date: Thu, 29 Dec 2016 16:08:57 -0500
Subject: [PATCH] adds option to write predicted classes

---
 README.md     |  1 +
 main.lua      |  1 +
 preprocess.py |  5 -----
 trainer.lua   | 37 +++++++++++++++++++++++++++++--------
 4 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 489aef2..cbbe79a 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,7 @@ The following is a list of complete parameters allowed by the torch code.
   * `train_only`: Set to 1 to only train (no testing)
   * `test_only`: Given a `.t7` file with model, test on testing data
   * `dump_feature_maps_file`: Filename for dumping feature maps of convolution at test time. This will be a `.hdf5` file with fields `feature_maps` for the features at each time step and `word_idxs` for the word indexes (aligned with the last word of the filter). This currently only works for models with a single filter size. This is saved for the best model on fold 1.
+  * `preds_file`: Filename for writing predictions (with `test_only` set to 1). Output is zero indexed.
 
 Training hyperparameters:
   * `num_epochs`: Number of training epochs.
diff --git a/main.lua b/main.lua
index 93cb6f6..b1230a2 100644
--- a/main.lua
+++ b/main.lua
@@ -38,6 +38,7 @@ cmd:text()
 -- Training own dataset
 cmd:option('-train_only', 0, 'Set to 1 to only train on data. Default is cross-validation')
 cmd:option('-test_only', 0, 'Set to 1 to only do testing. Must have a -warm_start_model')
+cmd:option('-preds_file', '', 'On test data, write predictions to an output file. Set test_only to 1 to use')
 cmd:option('-warm_start_model', '', 'Path to .t7 file with pre-trained model. Should contain a table with key \'model\'')
 cmd:text()
 
diff --git a/preprocess.py b/preprocess.py
index 6facc12..3debf1a 100644
--- a/preprocess.py
+++ b/preprocess.py
@@ -202,12 +202,7 @@ def main():
   for word, vec in w2v.items():
     embed[word_to_idx[word] - 1] = vec
 
-  # Shuffle train
   print 'train size:', train.shape
-  N = train.shape[0]
-  perm = np.random.permutation(N)
-  train = train[perm]
-  train_label = train_label[perm]
 
   filename = dataset + '.hdf5'
   with h5py.File(filename, "w") as f:
diff --git a/trainer.lua b/trainer.lua
index 11f0efb..d34b53d 100644
--- a/trainer.lua
+++ b/trainer.lua
@@ -134,6 +134,12 @@ function Trainer:test(test_data, test_labels, model, criterion, layers, dump_fea
   local confusion = optim.ConfusionMatrix(classes)
   confusion:zero()
 
+  local preds_file
+  if opt.test_only == 1 and opt.preds_file ~= '' then
+    print('Writing predictions to ' .. opt.preds_file)
+    preds_file = io.open(opt.preds_file, 'w')
+  end
+
   -- dump feature maps
   local feature_maps
   local conv_layer = get_layer(model, 'convolution')
@@ -156,15 +162,26 @@ function Trainer:test(test_data, test_labels, model, criterion, layers, dump_fea
     local outputs = model:forward(inputs)
     -- dump feature maps from model forward
     local cur_feature_maps
-    if opt.cudnn == 1 then
-      cur_feature_maps = conv_layer.output:squeeze(4)
-    else
-      cur_feature_maps = conv_layer.output
+    if dump_features then
+      if opt.cudnn == 1 then
+        cur_feature_maps = conv_layer.output:squeeze(4)
+      else
+        cur_feature_maps = conv_layer.output
+      end
+      if feature_maps == nil then
+        feature_maps = cur_feature_maps
+      else
+        feature_maps = torch.cat(feature_maps, cur_feature_maps, 1)
+      end
     end
-    if feature_maps == nil then
-      feature_maps = cur_feature_maps
-    else
-      feature_maps = torch.cat(feature_maps, cur_feature_maps, 1)
+
+    if opt.test_only == 1 and opt.preds_file ~= '' then
+      -- write predictions to file
+      local _,preds = torch.max(outputs, 2)
+      for j = 1, preds:size(1) do
+        -- zero index
+        preds_file:write((preds[j][1] - 1) .. '\n')
+      end
     end
 
     local err = criterion:forward(outputs, targets)
@@ -194,6 +211,10 @@ function Trainer:test(test_data, test_labels, model, criterion, layers, dump_fea
     f:close()
   end
 
+  if opt.test_only == 1 and opt.preds_file ~= '' then
+    preds_file:close()
+  end
+
   -- return error percent
   confusion:updateValids()
   return confusion.totalValid