-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpreprocess.py
93 lines (77 loc) · 3.21 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python
"""Defines the several functions to pre-process a video
"""
from PIL import Image, ImageOps
import skimage
import numpy as np
import cv2
import torch
import torchvision
import torchvision.transforms as transforms
__author__ = "jssprz"
__version__ = "0.0.1"
__maintainer__ = "jssprz"
__email__ = "jperezmartin90@gmail.com"
__status__ = "Development"
class GroupScale(object):
""" Rescales the input PIL.Image to the given 'size'.
'size' will be the size of the smaller edge.
For example, if height > width, then image will be
rescaled to (size * height / width, size)
size: size of the smaller edge
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, size, interpolation=Image.BILINEAR):
self.worker = torchvision.transforms.Resize(size, interpolation)
def __call__(self, img_group):
return [self.worker(img) for img in img_group]
def resize_frame(image, target_height=224, target_width=224):
"""
:param image:
:param target_height:
:param target_width:
:return:
"""
if len(image.shape) == 2:
# Copy a single-channel gray-scale image three times into a three-channel image
image = np.tile(image[:, :, None], 3)
elif len(image.shape) == 4:
image = image[:, :, :, 0]
height, width, channels = image.shape
if height == width:
resized_image = cv2.resize(image, (target_height, target_width))
elif height < width:
resized_image = cv2.resize(image, (int(width * target_height / height), target_width))
cropping_length = int((resized_image.shape[1] - target_height) / 2)
resized_image = resized_image[:, cropping_length:resized_image.shape[1] - cropping_length]
else:
resized_image = cv2.resize(image, (target_height, int(height * target_width / width)))
cropping_length = int((resized_image.shape[0] - target_width) / 2)
resized_image = resized_image[cropping_length: resized_image.shape[0] - cropping_length]
return cv2.resize(resized_image, (target_height, target_width))
def scale_frame(img, h, w):
resized = cv2.resize(img, (h, w), interpolation = cv2.INTER_LINEAR)
return resized
def crop_center(img, cropx, cropy):
y, x, c = img.shape
startx = x // 2 - (cropx // 2)
starty = y // 2 - (cropy // 2)
return img[starty:starty+cropy, startx:startx+cropx, :]
class ToTensorWithoutScaling(object):
"""H x W x C -> C x H x W"""
def __call__(self, picture):
return torch.from_numpy(np.array(picture)).float().permute(2, 0, 1)
def to_normalized_float_tensor(vid):
return vid.permute(3, 0, 1, 2).to(torch.float32) / 255
class ToFloatTensorInZeroOne(object):
def __call__(self, vid):
return to_normalized_float_tensor(vid)
def preprocess_frame(image, scale_size=256, crop_size=224, mean=[.485, .456, .406], std=[.229, .224, .225], normalize_input=False):
image = np.asarray(image)
if normalize_input:
image *= (255.0/image.max().as_array(np.float32))
image = scale_frame(image, scale_size, scale_size)
image = crop_center(image, crop_size, crop_size).astype(np.float32)
image -= np.array(mean).astype(np.float32)
image /= np.array(std).astype(np.float32)
return image