-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathtest_sintel_pose.py
125 lines (95 loc) · 4.92 KB
/
test_sintel_pose.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Author: Anurag Ranjan
# Copyright (c) 2019, Anurag Ranjan
# All rights reserved.
# based on github.com/ClementPinard/SfMLearner-Pytorch
import torch
from torch.autograd import Variable
from scipy.misc import imresize
import numpy as np
from path import Path
import argparse
from tqdm import tqdm
import models
from inverse_warp import pose_vec2mat
parser = argparse.ArgumentParser(description='Script for PoseNet testing with corresponding groundTruth from Sintel Odometry',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("pretrained_posenet", type=str, help="pretrained PoseNet path")
parser.add_argument("--posenet", type=str, default="PoseNetB6", help="PoseNet model path")
parser.add_argument("--img-height", default=128, type=int, help="Image height")
parser.add_argument("--img-width", default=416, type=int, help="Image width")
parser.add_argument("--no-resize", action='store_true', help="no resizing is done")
parser.add_argument("--min-depth", default=1e-3)
parser.add_argument("--max-depth", default=80)
parser.add_argument("--dataset-dir", default='.', type=str, help="Dataset directory")
parser.add_argument("--sequences", default=['alley_1'], type=str, nargs='*', help="sequences to test")
parser.add_argument("--output-dir", default=None, type=str, help="Output directory for saving predictions in a big 3D numpy file")
parser.add_argument("--img-exts", default=['png', 'jpg', 'bmp'], nargs='*', type=str, help="images extensions to glob")
parser.add_argument("--rotation-mode", default='euler', choices=['euler', 'quat'], type=str)
def main():
args = parser.parse_args()
from sintel_eval.pose_evaluation_utils import test_framework_Sintel as test_framework
weights = torch.load(args.pretrained_posenet)
seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3)
pose_net = getattr(models, args.posenet)(nb_ref_imgs=seq_length - 1).cuda()
pose_net.load_state_dict(weights['state_dict'], strict=False)
dataset_dir = Path(args.dataset_dir)
framework = test_framework(dataset_dir, args.sequences, seq_length)
print('{} snippets to test'.format(len(framework)))
RE = np.zeros((len(framework)), np.float32)
if args.output_dir is not None:
output_dir = Path(args.output_dir)
output_dir.makedirs_p()
predictions_array = np.zeros((len(framework), seq_length, 3, 4))
for j, sample in enumerate(tqdm(framework)):
imgs = sample['imgs']
h,w,_ = imgs[0].shape
if (not args.no_resize) and (h != args.img_height or w != args.img_width):
imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs]
imgs = [np.transpose(img, (2,0,1)) for img in imgs]
ref_imgs_var = []
for i, img in enumerate(imgs):
img = torch.from_numpy(img).unsqueeze(0)
img = ((img/255 - 0.5)/0.5).cuda()
img_var = Variable(img, volatile=True)
if i == len(imgs)//2:
tgt_img_var = img_var
else:
ref_imgs_var.append(Variable(img, volatile=True))
if args.posenet in ["PoseNet6", "PoseNetB6"]:
poses = pose_net(tgt_img_var, ref_imgs_var)
else:
_, poses = pose_net(tgt_img_var, ref_imgs_var)
poses = poses.cpu().data[0]
poses = torch.cat([poses[:len(imgs)//2], torch.zeros(1,6).float(), poses[len(imgs)//2:]])
inv_transform_matrices = pose_vec2mat(Variable(poses), rotation_mode=args.rotation_mode).data.numpy().astype(np.float64)
rot_matrices = np.linalg.inv(inv_transform_matrices[:,:,:3])
tr_vectors = -rot_matrices @ inv_transform_matrices[:,:,-1:]
transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1)
first_inv_transform = inv_transform_matrices[0]
final_poses = first_inv_transform[:,:3] @ transform_matrices
final_poses[:,:,-1:] += first_inv_transform[:,-1:]
if args.output_dir is not None:
predictions_array[j] = final_poses
RE[j] = compute_pose_error(sample['poses'], final_poses)
print('')
print("Results")
print("\t {:>10}".format('RE'))
print("mean \t {:10.4f}".format(RE.mean()))
print("std \t {:10.4f}".format(RE.std()))
if args.output_dir is not None:
np.save(output_dir/'predictions.npy', predictions_array)
def compute_pose_error(gt, pred):
RE = 0
snippet_length = gt.shape[0]
for gt_pose, pred_pose in zip(gt, pred):
# Residual matrix to which we compute angle's sin and cos
R = gt_pose[:,:3] @ np.linalg.inv(pred_pose[:,:3])
s = np.linalg.norm([R[0,1]-R[1,0],
R[1,2]-R[2,1],
R[0,2]-R[2,0]])
c = np.trace(R) - 1
# Note: we actually compute double of cos and sin, but arctan2 is invariant to scale
RE += np.arctan2(s,c)
return RE/snippet_length
if __name__ == '__main__':
main()