Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

[Feature] Support GPU Normalize #586

Merged
merged 14 commits into from
Feb 19, 2021
1 change: 1 addition & 0 deletions configs/recognition/tsm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
|:--|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
|[tsm_r50_1x1x8_50e_kinetics400_rgb](/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py) |340x256|8| ResNet50| ImageNet |70.24|89.56|[70.36](https://github.com/mit-han-lab/temporal-shift-module/blob/8d53d6fda40bea2f1b37a6095279c4b454d672bd/scripts/train_tsm_kinetics_rgb_8f.sh)|[89.49](https://github.com/mit-han-lab/temporal-shift-module/blob/8d53d6fda40bea2f1b37a6095279c4b454d672bd/scripts/train_tsm_kinetics_rgb_8f.sh)|74.0 (8x1 frames)| 7079 | [ckpt](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/tsm_r50_1x1x8_50e_kinetics400_rgb_20200607-af7fb746.pth) | [log](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/20200607_211800.log)| [json](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb/20200607_211800.log.json)|
|[tsm_r50_1x1x8_50e_kinetics400_rgb](/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py) |short-side 256|8| ResNet50| ImageNet |70.59|89.52|x|x|x|7079|[ckpt](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x8_50e_kinetics400_rgb/tsm_r50_256p_1x1x8_50e_kinetics400_rgb_20200726-020785e2.pth)|[log](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x8_50e_kinetics400_rgb/20200725_031623.log)|[json](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_256p_1x1x8_50e_kinetics400_rgb/20200725_031623.log.json)|
|[tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py](configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py) |short-side 256|8| ResNet50| ImageNet |70.48|89.40|x|x|x|7076|[ckpt](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb_20210219-bf96e6cc.pth)|[log](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb_20210219.log)|[json](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb_20210219.json)|
|[tsm_r50_video_1x1x8_50e_kinetics400_rgb](/configs/recognition/tsm/tsm_r50_video_1x1x8_50e_kinetics400_rgb.py) |short-side 256|8| ResNet50| ImageNet |70.25|89.66|[70.36](https://github.com/mit-han-lab/temporal-shift-module/blob/8d53d6fda40bea2f1b37a6095279c4b454d672bd/scripts/train_tsm_kinetics_rgb_8f.sh)|[89.49](https://github.com/mit-han-lab/temporal-shift-module/blob/8d53d6fda40bea2f1b37a6095279c4b454d672bd/scripts/train_tsm_kinetics_rgb_8f.sh)|74.0 (8x1 frames)| 7077 | [ckpt]( https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_1x1x8_100e_kinetics400_rgb_20200702-a77f4328.pth) | [log](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_2d_1x1x8_50e_kinetics400_rgb.log)| [json](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_2d_1x1x8_50e_kinetics400_rgb.log.json)|
|[tsm_r50_dense_1x1x8_100e_kinetics400_rgb](/configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py) |340x256|8x4| ResNet50 | ImageNet|72.9|90.44|[72.22](https://github.com/mit-han-lab/temporal-shift-module/tree/8d53d6fda40bea2f1b37a6095279c4b454d672bd#dense-sample)|[90.37](https://github.com/mit-han-lab/temporal-shift-module/tree/8d53d6fda40bea2f1b37a6095279c4b454d672bd#dense-sample)|11.5 (8x10 frames)| 7079 | [ckpt](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb/tsm_r50_dense_1x1x8_100e_kinetics400_rgb_20200626-91a54551.pth) | [log](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb/20200626_213415.log)| [json](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb/20200626_213415.log.json)|
|[tsm_r50_dense_1x1x8_100e_kinetics400_rgb](/configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py) |short-side 256|8| ResNet50 | ImageNet|73.38|91.02|x|x|x|7079|[ckpt](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_256p_1x1x8_100e_kinetics400_rgb/tsm_r50_dense_256p_1x1x8_100e_kinetics400_rgb_20200727-e1e0c785.pth)|[log](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_256p_1x1x8_100e_kinetics400_rgb/20200725_032043.log)|[json](https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_dense_256p_1x1x8_100e_kinetics400_rgb/20200725_032043.log.json)|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
_base_ = [
'../../_base_/models/tsm_r50.py', '../../_base_/schedules/sgd_tsm_50e.py',
'../../_base_/default_runtime.py'
]

module_hooks = [
dict(
type='GPUNormalize',
hook_pos='forward_pre',
input_format='NCHW',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375])
]

# dataset settings
dataset_type = 'RawframeDataset'
data_root = 'data/kinetics400/rawframes_train'
data_root_val = 'data/kinetics400/rawframes_val'
ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt'

train_pipeline = [
dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=8),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(
type='MultiScaleCrop',
input_size=224,
scales=(1, 0.875, 0.75, 0.66),
random_crop=False,
max_wh_scale_gap=1,
num_fixed_crops=13),
dict(type='Resize', scale=(224, 224), keep_ratio=False),
dict(type='Flip', flip_ratio=0.5),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs', 'label'])
]
val_pipeline = [
dict(
type='SampleFrames',
clip_len=1,
frame_interval=1,
num_clips=8,
test_mode=True),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='CenterCrop', crop_size=224),
dict(type='Flip', flip_ratio=0),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
test_pipeline = [
dict(
type='SampleFrames',
clip_len=1,
frame_interval=1,
num_clips=8,
test_mode=True),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='CenterCrop', crop_size=224),
dict(type='Flip', flip_ratio=0),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
data = dict(
videos_per_gpu=8,
workers_per_gpu=4,
train=dict(
type=dataset_type,
ann_file=ann_file_train,
data_prefix=data_root,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
data_prefix=data_root_val,
pipeline=val_pipeline),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
data_prefix=data_root_val,
pipeline=test_pipeline))
evaluation = dict(
interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'])

# runtime settings
checkpoint_config = dict(interval=5)
work_dir = './work_dirs/tsm_r50_gpu_normalize_1x1x8_100e_kinetics400_rgb/'
2 changes: 2 additions & 0 deletions mmaction/datasets/pipelines/formating.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ def __call__(self, results):
results (dict): The resulting dict to be modified and passed
to the next transform in pipeline.
"""
if not isinstance(results['imgs'], np.ndarray):
results['imgs'] = np.array(results['imgs'])
imgs = results['imgs']
# [M x H x W x C]
# M = 1 * N_crops * N_clips * L
Expand Down
3 changes: 2 additions & 1 deletion mmaction/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from .gradcam_utils import GradCAM
from .logger import get_root_logger
from .misc import get_random_string, get_shm_dir, get_thread_id
from .module_hooks import register_module_hooks
from .precise_bn import PreciseBNHook

__all__ = [
'get_root_logger', 'collect_env', 'get_random_string', 'get_thread_id',
'get_shm_dir', 'GradCAM', 'PreciseBNHook', 'import_module_error_class',
'import_module_error_func'
'import_module_error_func', 'register_module_hooks'
]
77 changes: 77 additions & 0 deletions mmaction/utils/module_hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import torch
from mmcv.utils import Registry, build_from_cfg

MODULE_HOOKS = Registry('module_hooks')


def register_module_hooks(Module, module_hooks_list):
handles = []
for module_hook_cfg in module_hooks_list:
hook_pos = module_hook_cfg.pop('hook_pos', 'forward_pre')
if hook_pos == 'forward_pre':
handle = Module.register_forward_pre_hook(
build_from_cfg(module_hook_cfg, MODULE_HOOKS).hook_func())
elif hook_pos == 'forward':
handle = Module.register_forward_hook(
build_from_cfg(module_hook_cfg, MODULE_HOOKS).hook_func())
elif hook_pos == 'backward':
handle = Module.register_backward_hook(
build_from_cfg(module_hook_cfg, MODULE_HOOKS).hook_func())
else:
raise ValueError(
f'hook_pos must be `forward_pre`, `forward` or `backward`, '
f'but get {hook_pos}')
handles.append(handle)
return handles


@MODULE_HOOKS.register_module()
class GPUNormalize:
"""Normalize images with the given mean and std value on GPUs.

Call the member function ``hook_func`` will return the forward pre-hook
function for module registration.

Args:
mean (Sequence[float]): Mean values of different channels.
std (Sequence[float]): Std values of different channels.
"""

def __init__(self, input_format, mean, std):
if input_format not in ['NCTHW', 'NCHW', 'NCHW_Flow', 'NPTCHW']:
raise ValueError(f'The input format {input_format} is invalid.')
self.input_format = input_format
_mean = torch.tensor(mean)
congee524 marked this conversation as resolved.
Show resolved Hide resolved
_std = torch.tensor(std)
if input_format == 'NCTHW':
self._mean = _mean[None, :, None, None, None]
self._std = _std[None, :, None, None, None]
elif input_format == 'NCHW':
self._mean = _mean[None, :, None, None]
self._std = _std[None, :, None, None]
elif input_format == 'NCHW_Flow':
self._mean = _mean[None, :, None, None]
self._std = _std[None, :, None, None]
elif input_format == 'NPTCHW':
self._mean = _mean[None, None, None, :, None, None]
self._std = _std[None, None, None, :, None, None]
congee524 marked this conversation as resolved.
Show resolved Hide resolved
else:
raise ValueError(f'The input format {input_format} is invalid.')

def hook_func(self):

def normalize_hook(Module, input):
x = input[0]
assert x.dtype == torch.uint8, (
f'The previous augmentation should use uint8 data type to '
f'speed up computation, but get {x.dtype}')

mean = self._mean.to(x.device)
std = self._std.to(x.device)

with torch.no_grad():
x = x.float().sub_(mean).div_(std)

return (x, *input[1:])

return normalize_hook
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = pkg_resources,setuptools
known_first_party = mmaction
known_third_party = cv2,joblib,matplotlib,mmcv,numpy,pandas,pytest,scipy,seaborn,titlecase,torch,tqdm
known_third_party = cv2,joblib,matplotlib,mmcv,numpy,pandas,pytest,scipy,seaborn,titlecase,torch,torchvision,tqdm
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
2 changes: 1 addition & 1 deletion tests/test_runtime/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_train_model():
load_from=None,
workflow=[('train', 1)],
total_epochs=5,
evaluation=dict(interval=1, key_indicator='acc'),
evaluation=dict(interval=1, save_best='acc'),
data=dict(
videos_per_gpu=1,
workers_per_gpu=0,
Expand Down
103 changes: 103 additions & 0 deletions tests/test_utils/test_module_hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import copy

import numpy as np
import pytest
import torch
import torchvision.models as models

from mmaction.utils import register_module_hooks
from mmaction.utils.module_hooks import GPUNormalize


def test_register_module_hooks():
_module_hooks = [
dict(
type='GPUNormalize',
hook_pos='forward_pre',
input_format='NCHW',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375])
]

# case 1
module_hooks = copy.deepcopy(_module_hooks)
module_hooks[0]['hook_pos'] = 'forward_pre'
resnet = models.resnet50()
handles = register_module_hooks(resnet, module_hooks)
assert resnet._forward_pre_hooks[
handles[0].id].__name__ == 'normalize_hook'

# case 2
module_hooks = copy.deepcopy(_module_hooks)
module_hooks[0]['hook_pos'] = 'forward'
resnet = models.resnet50()
handles = register_module_hooks(resnet, module_hooks)
assert resnet._forward_hooks[handles[0].id].__name__ == 'normalize_hook'

# case 3
module_hooks = copy.deepcopy(_module_hooks)
module_hooks[0]['hook_pos'] = 'backward'
resnet = models.resnet50()
handles = register_module_hooks(resnet, module_hooks)
assert resnet._backward_hooks[handles[0].id].__name__ == 'normalize_hook'

# case 4
module_hooks = copy.deepcopy(_module_hooks)
module_hooks[0]['hook_pos'] = '_other_pos'
resnet = models.resnet50()
with pytest.raises(ValueError):
handles = register_module_hooks(resnet, module_hooks)


def test_gpu_normalize():

def check_normalize(origin_imgs, result_imgs, norm_cfg):
"""Check if the origin_imgs are normalized correctly into result_imgs
in a given norm_cfg."""
from numpy.testing import assert_array_almost_equal
target_imgs = result_imgs.copy()
target_imgs *= norm_cfg['std']
target_imgs += norm_cfg['mean']
assert_array_almost_equal(origin_imgs, target_imgs, decimal=4)

_gpu_normalize_cfg = dict(
input_format='NCTHW',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375])

# case 1
gpu_normalize_cfg = copy.deepcopy(_gpu_normalize_cfg)
gpu_normalize_cfg['input_format'] = 'NCHW'
gpu_normalize = GPUNormalize(**gpu_normalize_cfg)
assert gpu_normalize._mean.shape == (1, 3, 1, 1)
imgs = np.random.randint(256, size=(2, 240, 320, 3), dtype=np.uint8)
_input = (torch.tensor(imgs).permute(0, 3, 1, 2), )
resnet = models.resnet50()
normalize_hook = gpu_normalize.hook_func()
_input = normalize_hook(resnet, _input)
result_imgs = np.array(_input[0].permute(0, 2, 3, 1))
check_normalize(imgs, result_imgs, gpu_normalize_cfg)

# case 2
gpu_normalize_cfg = copy.deepcopy(_gpu_normalize_cfg)
gpu_normalize_cfg['input_format'] = 'NCTHW'
gpu_normalize = GPUNormalize(**gpu_normalize_cfg)
assert gpu_normalize._mean.shape == (1, 3, 1, 1, 1)

# case 3
gpu_normalize_cfg = copy.deepcopy(_gpu_normalize_cfg)
gpu_normalize_cfg['input_format'] = 'NCHW_Flow'
gpu_normalize = GPUNormalize(**gpu_normalize_cfg)
assert gpu_normalize._mean.shape == (1, 3, 1, 1)

# case 4
gpu_normalize_cfg = copy.deepcopy(_gpu_normalize_cfg)
gpu_normalize_cfg['input_format'] = 'NPTCHW'
gpu_normalize = GPUNormalize(**gpu_normalize_cfg)
assert gpu_normalize._mean.shape == (1, 1, 1, 3, 1, 1)

# case 5
gpu_normalize_cfg = copy.deepcopy(_gpu_normalize_cfg)
gpu_normalize_cfg['input_format'] = '_format'
with pytest.raises(ValueError):
gpu_normalize = GPUNormalize(**gpu_normalize_cfg)
7 changes: 7 additions & 0 deletions tools/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from mmaction.apis import multi_gpu_test, single_gpu_test
from mmaction.datasets import build_dataloader, build_dataset
from mmaction.models import build_model
from mmaction.utils import register_module_hooks


def parse_args():
Expand Down Expand Up @@ -155,6 +156,9 @@ def main():
distributed = True
init_dist(args.launcher, **cfg.dist_params)

# The flag is used to register module's hooks
cfg.setdefault('module_hooks', [])

# build the dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
dataloader_setting = dict(
Expand All @@ -168,6 +172,9 @@ def main():

# build the model and load checkpoint
model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

register_module_hooks(model.backbone, cfg.module_hooks)

fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
Expand Down
7 changes: 6 additions & 1 deletion tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from mmaction.apis import train_model
from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.utils import collect_env, get_root_logger
from mmaction.utils import collect_env, get_root_logger, register_module_hooks


def parse_args():
Expand Down Expand Up @@ -103,6 +103,9 @@ def main():
# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)

# The flag is used to register module's hooks
cfg.setdefault('module_hooks', [])

# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# dump config
Expand Down Expand Up @@ -140,6 +143,8 @@ def main():
model = build_model(
cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)

register_module_hooks(model.backbone, cfg.module_hooks)

if cfg.omnisource:
# If omnisource flag is set, cfg.data.train should be a list
assert type(cfg.data.train) is list
Expand Down