Skip to content

Commit

Permalink
Migrate to PyTorch 0.4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
potterhsu committed Sep 10, 2018
1 parent 7a7ffc2 commit 9fc2ecd
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 130 deletions.
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ An easy implementation of Faster R-CNN in PyTorch.

* **25 minutes** every 10000 steps

* **3 hours** for 70000 steps (which leads to mAP=70.29%)
* **3 hours** for 70000 steps (which leads to mAP=xx.xx%)

* Inference

* **~9 examples** per second
* **~13 examples** per second

### Trained Model

Expand All @@ -39,8 +39,8 @@ An easy implementation of Faster R-CNN in PyTorch.
## Requirements

* Python 3.6
* torch 0.3.1
* torchvision 0.2.0
* torch 0.4.1
* torchvision 0.2.1
* tqdm

```
Expand All @@ -50,8 +50,8 @@ An easy implementation of Faster R-CNN in PyTorch.
## Setup

1. Download VOC 2007 Dataset
- [Training / Validation](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar)
- [Test](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar)
- [Training / Validation](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar) (5011 images)
- [Test](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar) (4952 images)

1. Extract to data folder, now your folder structure should be like:
```
Expand Down Expand Up @@ -83,7 +83,9 @@ An easy implementation of Faster R-CNN in PyTorch.
$ python test_nms.py
```
> sm_61 is for GTX-1080-Ti, to see others, visit [here](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
> Try to rebuild module if unit test fails
* result after running `test_nms.py`

![](https://github.com/potterhsu/easy-faster-rcnn.pytorch/blob/master/images/test_nms.png?raw=true)
Expand Down
23 changes: 12 additions & 11 deletions bbox.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import torch
import numpy as np
import torch
from torch import Tensor


class BBox(object):

def __init__(self, left: float, top: float, right: float, bottom: float) -> None:
def __init__(self, left: float, top: float, right: float, bottom: float):
super().__init__()
self.left = left
self.top = top
Expand All @@ -19,7 +20,7 @@ def tolist(self):
return [self.left, self.top, self.right, self.bottom]

@staticmethod
def to_center_base(bboxes):
def to_center_base(bboxes: Tensor):
return torch.stack([
(bboxes[:, 0] + bboxes[:, 2]) / 2,
(bboxes[:, 1] + bboxes[:, 3]) / 2,
Expand All @@ -28,7 +29,7 @@ def to_center_base(bboxes):
], dim=1)

@staticmethod
def from_center_base(center_based_bboxes):
def from_center_base(center_based_bboxes: Tensor) -> Tensor:
return torch.stack([
center_based_bboxes[:, 0] - center_based_bboxes[:, 2] / 2,
center_based_bboxes[:, 1] - center_based_bboxes[:, 3] / 2,
Expand All @@ -37,7 +38,7 @@ def from_center_base(center_based_bboxes):
], dim=1)

@staticmethod
def calc_transformer(src_bboxes, dst_bboxes):
def calc_transformer(src_bboxes: Tensor, dst_bboxes: Tensor) -> Tensor:
center_based_src_bboxes = BBox.to_center_base(src_bboxes)
center_based_dst_bboxes = BBox.to_center_base(dst_bboxes)
transformers = torch.stack([
Expand All @@ -49,7 +50,7 @@ def calc_transformer(src_bboxes, dst_bboxes):
return transformers

@staticmethod
def apply_transformer(src_bboxes, transformers):
def apply_transformer(src_bboxes: Tensor, transformers: Tensor) -> Tensor:
center_based_src_bboxes = BBox.to_center_base(src_bboxes)
center_based_dst_bboxes = torch.stack([
transformers[:, 0] * center_based_src_bboxes[:, 2] + center_based_src_bboxes[:, 0],
Expand All @@ -61,7 +62,7 @@ def apply_transformer(src_bboxes, transformers):
return dst_bboxes

@staticmethod
def iou(source, other):
def iou(source: Tensor, other: Tensor) -> Tensor:
source = source.repeat(other.shape[0], 1, 1).permute(1, 0, 2)
other = other.repeat(source.shape[0], 1, 1)

Expand All @@ -79,14 +80,14 @@ def iou(source, other):
return intersection_area / (source_area + other_area - intersection_area)

@staticmethod
def inside(source, other) -> bool:
def inside(source: Tensor, other: Tensor) -> bool:
source = source.repeat(other.shape[0], 1, 1).permute(1, 0, 2)
other = other.repeat(source.shape[0], 1, 1)
return ((source[:, :, 0] >= other[:, :, 0]) * (source[:, :, 1] >= other[:, :, 1]) *
(source[:, :, 2] <= other[:, :, 2]) * (source[:, :, 3] <= other[:, :, 3]))

@staticmethod
def clip(bboxes, left: float, top: float, right: float, bottom: float):
def clip(bboxes: Tensor, left: float, top: float, right: float, bottom: float) -> Tensor:
return torch.stack([
torch.clamp(bboxes[:, 0], min=left, max=right),
torch.clamp(bboxes[:, 1], min=top, max=bottom),
Expand All @@ -95,7 +96,7 @@ def clip(bboxes, left: float, top: float, right: float, bottom: float):
], dim=1)

@staticmethod
def generate_anchors(max_x: int, max_y: int, stride: int):
def generate_anchors(max_x: int, max_y: int, stride: int) -> Tensor:
center_based_anchor_bboxes = []

# NOTE: it's important to let `anchor_y` be the major index of list (i.e., move horizontally and then vertically) for consistency with 2D convolution
Expand All @@ -110,7 +111,7 @@ def generate_anchors(max_x: int, max_y: int, stride: int):
width = size * np.sqrt(1 / r)
center_based_anchor_bboxes.append([center_x, center_y, width, height])

center_based_anchor_bboxes = torch.FloatTensor(center_based_anchor_bboxes)
center_based_anchor_bboxes = torch.tensor(center_based_anchor_bboxes, dtype=torch.float)
anchor_bboxes = BBox.from_center_base(center_based_anchor_bboxes)

return anchor_bboxes
16 changes: 8 additions & 8 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import PIL
import torch.utils.data
from PIL import Image, ImageOps
from torch import Tensor
from torchvision import transforms
from torch import FloatTensor, LongTensor

from bbox import BBox

Expand All @@ -21,7 +21,7 @@ class Mode(Enum):

class Annotation(object):
class Object(object):
def __init__(self, name: str, difficult: bool, bbox: BBox) -> None:
def __init__(self, name: str, difficult: bool, bbox: BBox):
super().__init__()
self.name = name
self.difficult = difficult
Expand All @@ -31,7 +31,7 @@ def __repr__(self) -> str:
return 'Object[name={:s}, difficult={!s}, bbox={!s}]'.format(
self.name, self.difficult, self.bbox)

def __init__(self, filename: str, objects: List[Object]) -> None:
def __init__(self, filename: str, objects: List[Object]):
super().__init__()
self.filename = filename
self.objects = objects
Expand All @@ -46,7 +46,7 @@ def __init__(self, filename: str, objects: List[Object]) -> None:

LABEL_TO_CATEGORY_DICT = {v: k for k, v in CATEGORY_TO_LABEL_DICT.items()}

def __init__(self, path_to_data_dir: str, mode: Mode) -> None:
def __init__(self, path_to_data_dir: str, mode: Mode):
super().__init__()

self._mode = mode
Expand Down Expand Up @@ -89,15 +89,15 @@ def __init__(self, path_to_data_dir: str, mode: Mode) -> None:
def __len__(self) -> int:
return len(self._image_id_to_annotation_dict)

def __getitem__(self, index: int) -> Tuple[str, FloatTensor, float, FloatTensor, LongTensor]:
def __getitem__(self, index: int) -> Tuple[str, Tensor, float, Tensor, Tensor]:
image_id = self._image_ids[index]
annotation = self._image_id_to_annotation_dict[image_id]

bboxes = [obj.bbox.tolist() for obj in annotation.objects if not obj.difficult]
labels = [Dataset.CATEGORY_TO_LABEL_DICT[obj.name] for obj in annotation.objects if not obj.difficult]

bboxes = torch.FloatTensor(bboxes)
labels = torch.LongTensor(labels)
bboxes = torch.tensor(bboxes, dtype=torch.float)
labels = torch.tensor(labels, dtype=torch.long)

image = Image.open(os.path.join(self._path_to_jpeg_images_dir, annotation.filename))

Expand All @@ -112,7 +112,7 @@ def __getitem__(self, index: int) -> Tuple[str, FloatTensor, float, FloatTensor,
return image_id, image, scale, bboxes, labels

@staticmethod
def preprocess(image: PIL.Image.Image):
def preprocess(image: PIL.Image.Image) -> Tuple[Tensor, float]:
# resize according to the rules:
# 1. scale shorter edge to 600
# 2. after scaling, if longer edge > 1000, scale longer edge to 1000
Expand Down
2 changes: 1 addition & 1 deletion eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from model import Model


def _eval(path_to_checkpoint, path_to_data_dir, path_to_results_dir):
def _eval(path_to_checkpoint: str, path_to_data_dir: str, path_to_results_dir: str):
dataset = Dataset(path_to_data_dir, Dataset.Mode.TEST)
evaluator = Evaluator(dataset, path_to_data_dir, path_to_results_dir)

Expand Down
33 changes: 18 additions & 15 deletions evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
from typing import Dict, List

import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

Expand All @@ -9,29 +11,30 @@


class Evaluator(object):
def __init__(self, dataset, path_to_data_dir, path_to_results_dir):
def __init__(self, dataset: Dataset, path_to_data_dir: str, path_to_results_dir: str):
super().__init__()
self.dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
self._path_to_data_dir = path_to_data_dir
self._path_to_results_dir = path_to_results_dir
os.makedirs(self._path_to_results_dir, exist_ok=True)

def evaluate(self, model):
def evaluate(self, model: Model) -> Dict[int, float]:
all_image_ids, all_pred_bboxes, all_pred_labels, all_pred_probs = [], [], [], []

for batch_index, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self.dataloader)):
image_id = image_id_batch[0]
image = image_batch[0].cuda()
scale = scale_batch[0]
with torch.no_grad():
for batch_index, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self.dataloader)):
image_id = image_id_batch[0]
image = image_batch[0].cuda()
scale = scale_batch[0].item()

pred_bboxes, pred_labels, pred_probs = model.detect(image)
pred_bboxes, pred_labels, pred_probs = model.detect(image)

pred_bboxes = [[it / scale for it in bbox] for bbox in pred_bboxes]
pred_bboxes = [[it / scale for it in bbox] for bbox in pred_bboxes]

all_pred_bboxes.extend(pred_bboxes)
all_pred_labels.extend(pred_labels)
all_pred_probs.extend(pred_probs)
all_image_ids.extend([image_id] * len(pred_labels))
all_pred_bboxes.extend(pred_bboxes)
all_pred_labels.extend(pred_labels)
all_pred_probs.extend(pred_probs)
all_image_ids.extend([image_id] * len(pred_labels))

self._write_results(all_image_ids, all_pred_bboxes, all_pred_labels, all_pred_probs)

Expand All @@ -57,13 +60,13 @@ def evaluate(self, model):

return label_to_ap_dict

def _write_results(self, image_ids, bboxes, labels, preds):
def _write_results(self, image_ids: List[str], bboxes: List[List[float]], labels: List[int], probs: List[float]):
label_to_txt_files_dict = {}
for c in range(1, Model.NUM_CLASSES):
label_to_txt_files_dict[c] = open(os.path.join(self._path_to_results_dir, 'comp3_det_test_{:s}.txt'.format(Dataset.LABEL_TO_CATEGORY_DICT[c])), 'w')

for image_id, bbox, label, pred in zip(image_ids, bboxes, labels, preds):
label_to_txt_files_dict[label].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, pred,
for image_id, bbox, label, prob in zip(image_ids, bboxes, labels, probs):
label_to_txt_files_dict[label].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, prob,
bbox[0], bbox[1], bbox[2], bbox[3]))

for _, f in label_to_txt_files_dict.items():
Expand Down
2 changes: 1 addition & 1 deletion infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from model import Model


def _infer(path_to_input_image, path_to_output_image, path_to_checkpoint):
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str):
image = transforms.Image.open(path_to_input_image)
image_tensor, scale = Dataset.preprocess(image)

Expand Down
Loading

0 comments on commit 9fc2ecd

Please # to comment.