Migrate to PyTorch 0.4.1

potterhsu · Sep 10, 2018 · 9fc2ecd · 9fc2ecd
1 parent 7a7ffc2
commit 9fc2ecd
Show file tree

Hide file tree

Showing 11 changed files with 119 additions and 130 deletions.
diff --git a/README.md b/README.md
@@ -26,11 +26,11 @@ An easy implementation of Faster R-CNN in PyTorch.
 
     * **25 minutes** every 10000 steps
 
-    * **3 hours** for 70000 steps (which leads to mAP=70.29%)
+    * **3 hours** for 70000 steps (which leads to mAP=xx.xx%)
 
 * Inference
 
-    * **~9 examples** per second
+    * **~13 examples** per second
 
 ### Trained Model
 
@@ -39,8 +39,8 @@ An easy implementation of Faster R-CNN in PyTorch.
 ## Requirements
 
 * Python 3.6
-* torch 0.3.1
-* torchvision 0.2.0
+* torch 0.4.1
+* torchvision 0.2.1
 * tqdm
 
     ```
@@ -50,8 +50,8 @@ An easy implementation of Faster R-CNN in PyTorch.
 ## Setup
 
 1. Download VOC 2007 Dataset
-    - [Training / Validation](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar)
-    - [Test](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar)
+    - [Training / Validation](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar) (5011 images)
+    - [Test](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar) (4952 images)
 
 1. Extract to data folder, now your folder structure should be like:
     ```
@@ -83,7 +83,9 @@ An easy implementation of Faster R-CNN in PyTorch.
     $ python test_nms.py
     ```
     > sm_61 is for GTX-1080-Ti, to see others, visit [here](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
-    
+
+    > Try to rebuild module if unit test fails
+
     * result after running `test_nms.py`
 
         ![](https://github.com/potterhsu/easy-faster-rcnn.pytorch/blob/master/images/test_nms.png?raw=true)

diff --git a/bbox.py b/bbox.py
@@ -1,10 +1,11 @@
-import torch
 import numpy as np
+import torch
+from torch import Tensor
 
 
 class BBox(object):
 
-    def __init__(self, left: float, top: float, right: float, bottom: float) -> None:
+    def __init__(self, left: float, top: float, right: float, bottom: float):
         super().__init__()
         self.left = left
         self.top = top
@@ -19,7 +20,7 @@ def tolist(self):
         return [self.left, self.top, self.right, self.bottom]
 
     @staticmethod
-    def to_center_base(bboxes):
+    def to_center_base(bboxes: Tensor):
         return torch.stack([
             (bboxes[:, 0] + bboxes[:, 2]) / 2,
             (bboxes[:, 1] + bboxes[:, 3]) / 2,
@@ -28,7 +29,7 @@ def to_center_base(bboxes):
         ], dim=1)
 
     @staticmethod
-    def from_center_base(center_based_bboxes):
+    def from_center_base(center_based_bboxes: Tensor) -> Tensor:
         return torch.stack([
             center_based_bboxes[:, 0] - center_based_bboxes[:, 2] / 2,
             center_based_bboxes[:, 1] - center_based_bboxes[:, 3] / 2,
@@ -37,7 +38,7 @@ def from_center_base(center_based_bboxes):
         ], dim=1)
 
     @staticmethod
-    def calc_transformer(src_bboxes, dst_bboxes):
+    def calc_transformer(src_bboxes: Tensor, dst_bboxes: Tensor) -> Tensor:
         center_based_src_bboxes = BBox.to_center_base(src_bboxes)
         center_based_dst_bboxes = BBox.to_center_base(dst_bboxes)
         transformers = torch.stack([
@@ -49,7 +50,7 @@ def calc_transformer(src_bboxes, dst_bboxes):
         return transformers
 
     @staticmethod
-    def apply_transformer(src_bboxes, transformers):
+    def apply_transformer(src_bboxes: Tensor, transformers: Tensor) -> Tensor:
         center_based_src_bboxes = BBox.to_center_base(src_bboxes)
         center_based_dst_bboxes = torch.stack([
             transformers[:, 0] * center_based_src_bboxes[:, 2] + center_based_src_bboxes[:, 0],
@@ -61,7 +62,7 @@ def apply_transformer(src_bboxes, transformers):
         return dst_bboxes
 
     @staticmethod
-    def iou(source, other):
+    def iou(source: Tensor, other: Tensor) -> Tensor:
         source = source.repeat(other.shape[0], 1, 1).permute(1, 0, 2)
         other = other.repeat(source.shape[0], 1, 1)
 
@@ -79,14 +80,14 @@ def iou(source, other):
         return intersection_area / (source_area + other_area - intersection_area)
 
     @staticmethod
-    def inside(source, other) -> bool:
+    def inside(source: Tensor, other: Tensor) -> bool:
         source = source.repeat(other.shape[0], 1, 1).permute(1, 0, 2)
         other = other.repeat(source.shape[0], 1, 1)
         return ((source[:, :, 0] >= other[:, :, 0]) * (source[:, :, 1] >= other[:, :, 1]) *
                 (source[:, :, 2] <= other[:, :, 2]) * (source[:, :, 3] <= other[:, :, 3]))
 
     @staticmethod
-    def clip(bboxes, left: float, top: float, right: float, bottom: float):
+    def clip(bboxes: Tensor, left: float, top: float, right: float, bottom: float) -> Tensor:
         return torch.stack([
             torch.clamp(bboxes[:, 0], min=left, max=right),
             torch.clamp(bboxes[:, 1], min=top, max=bottom),
@@ -95,7 +96,7 @@ def clip(bboxes, left: float, top: float, right: float, bottom: float):
         ], dim=1)
 
     @staticmethod
-    def generate_anchors(max_x: int, max_y: int, stride: int):
+    def generate_anchors(max_x: int, max_y: int, stride: int) -> Tensor:
         center_based_anchor_bboxes = []
 
         # NOTE: it's important to let `anchor_y` be the major index of list (i.e., move horizontally and then vertically) for consistency with 2D convolution
@@ -110,7 +111,7 @@ def generate_anchors(max_x: int, max_y: int, stride: int):
                         width = size * np.sqrt(1 / r)
                         center_based_anchor_bboxes.append([center_x, center_y, width, height])
 
-        center_based_anchor_bboxes = torch.FloatTensor(center_based_anchor_bboxes)
+        center_based_anchor_bboxes = torch.tensor(center_based_anchor_bboxes, dtype=torch.float)
         anchor_bboxes = BBox.from_center_base(center_based_anchor_bboxes)
 
         return anchor_bboxes
diff --git a/dataset.py b/dataset.py
@@ -7,8 +7,8 @@
 import PIL
 import torch.utils.data
 from PIL import Image, ImageOps
+from torch import Tensor
 from torchvision import transforms
-from torch import FloatTensor, LongTensor
 
 from bbox import BBox
 
@@ -21,7 +21,7 @@ class Mode(Enum):
 
     class Annotation(object):
         class Object(object):
-            def __init__(self, name: str, difficult: bool, bbox: BBox) -> None:
+            def __init__(self, name: str, difficult: bool, bbox: BBox):
                 super().__init__()
                 self.name = name
                 self.difficult = difficult
@@ -31,7 +31,7 @@ def __repr__(self) -> str:
                 return 'Object[name={:s}, difficult={!s}, bbox={!s}]'.format(
                     self.name, self.difficult, self.bbox)
 
-        def __init__(self, filename: str, objects: List[Object]) -> None:
+        def __init__(self, filename: str, objects: List[Object]):
             super().__init__()
             self.filename = filename
             self.objects = objects
@@ -46,7 +46,7 @@ def __init__(self, filename: str, objects: List[Object]) -> None:
 
     LABEL_TO_CATEGORY_DICT = {v: k for k, v in CATEGORY_TO_LABEL_DICT.items()}
 
-    def __init__(self, path_to_data_dir: str, mode: Mode) -> None:
+    def __init__(self, path_to_data_dir: str, mode: Mode):
         super().__init__()
 
         self._mode = mode
@@ -89,15 +89,15 @@ def __init__(self, path_to_data_dir: str, mode: Mode) -> None:
     def __len__(self) -> int:
         return len(self._image_id_to_annotation_dict)
 
-    def __getitem__(self, index: int) -> Tuple[str, FloatTensor, float, FloatTensor, LongTensor]:
+    def __getitem__(self, index: int) -> Tuple[str, Tensor, float, Tensor, Tensor]:
         image_id = self._image_ids[index]
         annotation = self._image_id_to_annotation_dict[image_id]
 
         bboxes = [obj.bbox.tolist() for obj in annotation.objects if not obj.difficult]
         labels = [Dataset.CATEGORY_TO_LABEL_DICT[obj.name] for obj in annotation.objects if not obj.difficult]
 
-        bboxes = torch.FloatTensor(bboxes)
-        labels = torch.LongTensor(labels)
+        bboxes = torch.tensor(bboxes, dtype=torch.float)
+        labels = torch.tensor(labels, dtype=torch.long)
 
         image = Image.open(os.path.join(self._path_to_jpeg_images_dir, annotation.filename))
 
@@ -112,7 +112,7 @@ def __getitem__(self, index: int) -> Tuple[str, FloatTensor, float, FloatTensor,
         return image_id, image, scale, bboxes, labels
 
     @staticmethod
-    def preprocess(image: PIL.Image.Image):
+    def preprocess(image: PIL.Image.Image) -> Tuple[Tensor, float]:
         # resize according to the rules:
         #   1. scale shorter edge to 600
         #   2. after scaling, if longer edge > 1000, scale longer edge to 1000

diff --git a/eval.py b/eval.py
@@ -7,7 +7,7 @@
 from model import Model
 
 
-def _eval(path_to_checkpoint, path_to_data_dir, path_to_results_dir):
+def _eval(path_to_checkpoint: str, path_to_data_dir: str, path_to_results_dir: str):
     dataset = Dataset(path_to_data_dir, Dataset.Mode.TEST)
     evaluator = Evaluator(dataset, path_to_data_dir, path_to_results_dir)
 

diff --git a/evaluator.py b/evaluator.py
@@ -1,5 +1,7 @@
 import os
+from typing import Dict, List
 
+import torch
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 
@@ -9,29 +11,30 @@
 
 
 class Evaluator(object):
-    def __init__(self, dataset, path_to_data_dir, path_to_results_dir):
+    def __init__(self, dataset: Dataset, path_to_data_dir: str, path_to_results_dir: str):
         super().__init__()
         self.dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
         self._path_to_data_dir = path_to_data_dir
         self._path_to_results_dir = path_to_results_dir
         os.makedirs(self._path_to_results_dir, exist_ok=True)
 
-    def evaluate(self, model):
+    def evaluate(self, model: Model) -> Dict[int, float]:
         all_image_ids, all_pred_bboxes, all_pred_labels, all_pred_probs = [], [], [], []
 
-        for batch_index, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self.dataloader)):
-            image_id = image_id_batch[0]
-            image = image_batch[0].cuda()
-            scale = scale_batch[0]
+        with torch.no_grad():
+            for batch_index, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self.dataloader)):
+                image_id = image_id_batch[0]
+                image = image_batch[0].cuda()
+                scale = scale_batch[0].item()
 
-            pred_bboxes, pred_labels, pred_probs = model.detect(image)
+                pred_bboxes, pred_labels, pred_probs = model.detect(image)
 
-            pred_bboxes = [[it / scale for it in bbox] for bbox in pred_bboxes]
+                pred_bboxes = [[it / scale for it in bbox] for bbox in pred_bboxes]
 
-            all_pred_bboxes.extend(pred_bboxes)
-            all_pred_labels.extend(pred_labels)
-            all_pred_probs.extend(pred_probs)
-            all_image_ids.extend([image_id] * len(pred_labels))
+                all_pred_bboxes.extend(pred_bboxes)
+                all_pred_labels.extend(pred_labels)
+                all_pred_probs.extend(pred_probs)
+                all_image_ids.extend([image_id] * len(pred_labels))
 
         self._write_results(all_image_ids, all_pred_bboxes, all_pred_labels, all_pred_probs)
 
@@ -57,13 +60,13 @@ def evaluate(self, model):
 
         return label_to_ap_dict
 
-    def _write_results(self, image_ids, bboxes, labels, preds):
+    def _write_results(self, image_ids: List[str], bboxes: List[List[float]], labels: List[int], probs: List[float]):
         label_to_txt_files_dict = {}
         for c in range(1, Model.NUM_CLASSES):
             label_to_txt_files_dict[c] = open(os.path.join(self._path_to_results_dir, 'comp3_det_test_{:s}.txt'.format(Dataset.LABEL_TO_CATEGORY_DICT[c])), 'w')
 
-        for image_id, bbox, label, pred in zip(image_ids, bboxes, labels, preds):
-            label_to_txt_files_dict[label].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, pred,
+        for image_id, bbox, label, prob in zip(image_ids, bboxes, labels, probs):
+            label_to_txt_files_dict[label].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, prob,
                                                                                           bbox[0], bbox[1], bbox[2], bbox[3]))
 
         for _, f in label_to_txt_files_dict.items():

diff --git a/infer.py b/infer.py
@@ -9,7 +9,7 @@
 from model import Model
 
 
-def _infer(path_to_input_image, path_to_output_image, path_to_checkpoint):
+def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str):
     image = transforms.Image.open(path_to_input_image)
     image_tensor, scale = Dataset.preprocess(image)