-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDataPreprocessing.py
75 lines (60 loc) · 2.21 KB
/
DataPreprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
"""DataPreprocessing.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1iZLQ3IqfMJ-DBVLYl4J9KPZInf_xoGxW
#Data Preprocessing
"""
#Importing required libraries
from torchvision.datasets import ImageFolder
from torchvision import transforms
import numpy as np
import torch
class Preprocessing():
# This class will transforms every images into the applied transforms.
# And it returns the dataset as tensor dataset and the categories
def __init__(self, path=None, img=None):
'''
params: path: (str) directory to the image folder
params: array: (array) image array
'''
self.directory = path
self.img = img
def __image_transformation(self):
'''
params: None
return: transformations
'''
transform = transforms.Compose([
transforms.Resize((130,130)),
transforms.CenterCrop(128),
transforms.Grayscale(1),
transforms.ToTensor(),
transforms.Normalize(0.5,0.5)
])
return transform
def preprocessed_arrays(self):
#For predicting
'''
params: None
return: (array) single Tensor data
'''
img = self.img
transforms = self.__image_transformation()
return torch.tensor(np.expand_dims(transforms(img),0))
def preprocessed_dataset(self):
'''
params: None
return: Tensor dataset
'''
#Using torch's ImageFolder to get data from directory and applying
#transforms
transformations = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomPerspective(0.2,p=0.5),
self.__image_transformation()])
dataset_train = ImageFolder(self.directory['train'],
transform= transformations)
dataset_test = ImageFolder(self.directory['test'],
transform= self.__image_transformation())
return dataset_train, dataset_test