-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #89 from buyi1128/master
增加rgb和bit模块到digitpressure
- Loading branch information
Showing
18 changed files
with
936 additions
and
345 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import torch.nn as nn | ||
from torch.nn import init | ||
|
||
class myNet(nn.Module): | ||
def __init__(self): | ||
super(myNet, self).__init__() | ||
self.conv1_1 = nn.Sequential( # input_size=(1*28*28) | ||
nn.Conv2d( | ||
in_channels=1, | ||
out_channels=16, | ||
kernel_size=3, | ||
padding=1 | ||
), # padding=2保证输入输出尺寸相同 | ||
) | ||
self.BN = nn.BatchNorm2d(1, momentum=0.5) | ||
self.conv1_2 = nn.Sequential( # input_size=(16*28*28) | ||
nn.Conv2d( | ||
in_channels=16, | ||
out_channels=16, | ||
kernel_size=3, | ||
padding=1 | ||
), # padding=2保证输入输出尺寸相同 | ||
nn.ReLU(), # input_size=(16*28*28) | ||
nn.MaxPool2d(kernel_size=2, stride=2) # output_size=(16*14*14) | ||
) | ||
self.conv2_1 = nn.Sequential( | ||
nn.Conv2d( | ||
in_channels=16, | ||
out_channels=32, | ||
kernel_size=3), | ||
) # output 32*12*12 | ||
self.conv2_2 = nn.Sequential( | ||
nn.Conv2d( | ||
in_channels=32, | ||
out_channels=32, | ||
kernel_size=3), | ||
nn.ReLU(), # input_size=(32*10*10) | ||
nn.MaxPool2d(2, 2) # output_size=(8*5*5) | ||
) | ||
self.fc1 = nn.Linear(32 * 5 * 5, 128) | ||
self.relu1 = nn.ReLU() | ||
self.dropout = nn.Dropout(0.2) | ||
self._set_init(self.fc1) | ||
self.fc2 = nn.Linear(128, 11) | ||
self._set_init(self.fc2) | ||
self.softmax = nn.LogSoftmax(dim=1) | ||
|
||
def _set_init(self, layer): # 参数初始化 | ||
init.normal_(layer.weight, mean=0., std=.1) | ||
|
||
# 定义前向传播过程,输入为x | ||
def forward(self, x): | ||
# x = x.view(-1, 28, 28) | ||
# x = self.BN(x) | ||
# x = x.view(-1, 1, 28, 28) | ||
x = self.conv1_1(x) | ||
x = self.conv1_2(x) | ||
x = self.conv2_1(x) | ||
x = self.conv2_2(x) | ||
x = x.view(x.size()[0], -1) | ||
x = self.fc1(x) | ||
x = self.relu1(x) | ||
x = self.dropout(x) | ||
x = self.fc2(x) | ||
return self.softmax(x) | ||
|
||
class rgbNet(nn.Module): | ||
def __init__(self, imtype): | ||
super(rgbNet, self).__init__() | ||
if imtype == 'bit': | ||
n = 1 | ||
elif imtype == 'rgb': | ||
n = 3 | ||
|
||
self.conv1_1 = nn.Sequential( # input_size=(1*28*28) | ||
nn.Conv2d( | ||
in_channels=n, | ||
out_channels=16, | ||
kernel_size=3, | ||
padding=1 | ||
), # padding=2保证输入输出尺寸相同 | ||
) | ||
self.BN = nn.BatchNorm2d(1, momentum=0.5) | ||
self.conv1_2 = nn.Sequential( # input_size=(16*28*28) | ||
nn.Conv2d( | ||
in_channels=16, | ||
out_channels=16, | ||
kernel_size=3, | ||
padding=1 | ||
), # padding=2保证输入输出尺寸相同 | ||
nn.ReLU(), # input_size=(16*28*28) | ||
nn.MaxPool2d(kernel_size=2, stride=2) # output_size=(16*14*14) | ||
) | ||
self.conv2_1 = nn.Sequential( | ||
nn.Conv2d( | ||
in_channels=16, | ||
out_channels=32, | ||
kernel_size=3), | ||
) # output 32*12*12 | ||
self.conv2_2 = nn.Sequential( | ||
nn.Conv2d( | ||
in_channels=32, | ||
out_channels=32, | ||
kernel_size=3), | ||
nn.ReLU(), # input_size=(32*10*10) | ||
nn.MaxPool2d(2, 2) # output_size=(8*5*5) | ||
) | ||
self.fc1 = nn.Linear(32 * 5 * 5, 128) | ||
self.relu1 = nn.ReLU() | ||
self.dropout = nn.Dropout(0.2) | ||
self._set_init(self.fc1) | ||
self.fc2 = nn.Linear(128, 11) | ||
self._set_init(self.fc2) | ||
self.softmax = nn.LogSoftmax(dim=1) | ||
|
||
def _set_init(self, layer): # 参数初始化 | ||
init.normal_(layer.weight, mean=0., std=.1) | ||
|
||
# 定义前向传播过程,输入为x | ||
def forward(self, x): | ||
# x = x.view(-1, 28, 28) | ||
# x = self.BN(x) | ||
# x = x.view(-1, 1, 28, 28) | ||
x = self.conv1_1(x) | ||
x = self.conv1_2(x) | ||
x = self.conv2_1(x) | ||
x = self.conv2_2(x) | ||
x = x.view(x.size()[0], -1) | ||
x = self.fc1(x) | ||
x = self.relu1(x) | ||
x = self.dropout(x) | ||
x = self.fc2(x) | ||
return self.softmax(x) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import numpy as np | ||
import torch | ||
import os | ||
import cv2 | ||
import pickle | ||
import random | ||
|
||
|
||
class dataLoader(): | ||
# todo 调整batch,使每个batch顺序都不一样 | ||
def __init__(self, type, path, bs, ifUpdate): | ||
self.meanPixel = 80 | ||
self.bs = bs | ||
self.pointer = 0 | ||
self.type = type | ||
if type == 'rgb': | ||
self.train_path = os.path.join(path, "rgb_augment_train.pkl") | ||
self.test_path = os.path.join(path, "rgb_test.pkl") | ||
elif type == 'bit': | ||
self.train_path = os.path.join(path, "bit_augment_train.pkl") | ||
self.test_path = os.path.join(path, "bit_test.pkl") | ||
|
||
if not os.path.exists(self.train_path) or not os.path.exists(self.test_path): | ||
ifUpdate = True | ||
|
||
if ifUpdate: | ||
os.system("rm -rf {}".format(self.train_path)) | ||
os.system("rm -rf {}".format(self.test_path)) | ||
self.readImagesFromMultiFils(path) | ||
|
||
self.readDataFromPkl() | ||
self.shuffle() | ||
|
||
def readImagesFromMultiFils(self, path): | ||
for t in ["rgb_augmentation", "rgb_test"]: | ||
if self.type == 'bit': | ||
data = torch.Tensor(np.zeros((1, 1, 28, 28))) | ||
elif self.type == 'rgb': | ||
data = torch.Tensor(np.zeros((1, 3, 28, 28))) | ||
label = [] | ||
names = [] | ||
|
||
for i in range(11): | ||
root = path + "/" + t + "/" + str(i) + "/" | ||
images = os.listdir(root) | ||
for im in images: | ||
if im.split(".")[-1] != "bmp": | ||
continue | ||
# print(img.shape) | ||
names.append(root+im) | ||
if self.type == "bit": | ||
img = cv2.imread(root + im)[:, :, 0] | ||
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_CUBIC) | ||
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 55, 11) | ||
# 增强 | ||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 2)) | ||
img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) | ||
# _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) | ||
elif self.type == 'rgb': | ||
img = cv2.imread(root + im) | ||
img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_CUBIC) | ||
if len(img.shape) == 2: | ||
img = np.array(img, img, img) | ||
print("convert to rgb: ", img.shape) | ||
|
||
temp = torch.Tensor(img).view(1, 3, 28, 28) - self.meanPixel | ||
data = torch.cat((data, temp), 0) | ||
|
||
label.append(i) | ||
if t.endswith("test"): | ||
fp = open(self.test_path, "wb") | ||
pickle.dump([data[1:], torch.Tensor(np.array(label)).long(), names], fp) | ||
else: | ||
fp = open(self.train_path, "wb") | ||
pickle.dump([data[1:], torch.Tensor(np.array(label)).long()], fp) | ||
fp.close() | ||
|
||
def readDataFromPkl(self): | ||
with open(self.train_path, "rb") as fp: | ||
self.trainData, self.trainLabel = pickle.load(fp) | ||
with open(self.test_path, "rb") as fp: | ||
self.testData, self.testLabel, self.names = pickle.load(fp) | ||
|
||
def getTrainData(self): | ||
return self.trainData, self.trainLabel | ||
|
||
def getTestData(self): | ||
return self.testData, self.testLabel, self.names | ||
|
||
def shuffle(self): | ||
li = list(range(self.trainData.shape[0])) | ||
random.shuffle(li) | ||
self.trainData = self.trainData[li] | ||
self.trainLabel = self.trainLabel[li] | ||
|
||
def next_batch(self): | ||
if self.pointer * self.bs == self.trainData.shape[0]: | ||
self.pointer = 0 | ||
|
||
if (self.pointer + 1) * self.bs > self.trainData.shape[0]: | ||
temp = self.pointer | ||
self.pointer = 0 | ||
return self.trainData[temp * self.bs:], \ | ||
self.trainLabel[temp * self.bs:] | ||
|
||
temp = self.pointer | ||
self.pointer += 1 | ||
|
||
return self.trainData[temp * self.bs:self.pointer * self.bs], \ | ||
self.trainLabel[temp * self.bs:self.pointer * self.bs] | ||
|
||
def get_rounds(self): | ||
return int(self.trainData.shape[0] / self.bs) + 1 | ||
|
||
|
||
# if __name__ == "__main__": | ||
# dl = dataLoader("dataset/", 64, True) | ||
# dl.shuffle() | ||
# train, trl = dl.getTrainData() | ||
# test, tel = dl.getTestData() | ||
# # | ||
# print(train.shape, trl) | ||
# print(test.shape, tel) | ||
# # | ||
# | ||
# print(dl.trainLabel) | ||
# dl.shuffle() | ||
# print(dl.trainLabel) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import os | ||
import cv2 | ||
import numpy as np | ||
import random | ||
from PIL import Image, ImageEnhance | ||
|
||
''' | ||
定义hsv变换函数: | ||
hue_delta是色调变化比例 | ||
sat_delta是饱和度变化比例 | ||
val_delta是明度变化比例 | ||
''' | ||
def hsv_transform(img, hue_delta, sat_mult, val_mult): | ||
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float) | ||
img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180 | ||
img_hsv[:, :, 1] *= sat_mult | ||
img_hsv[:, :, 2] *= val_mult | ||
img_hsv[img_hsv > 255] = 255 | ||
return cv2.cvtColor(np.round(img_hsv).astype(np.uint8), cv2.COLOR_HSV2BGR) | ||
|
||
''' | ||
随机hsv变换 | ||
hue_vari是色调变化比例的范围 | ||
sat_vari是饱和度变化比例的范围 | ||
val_vari是明度变化比例的范围 | ||
''' | ||
def random_hsv_transform(img, hue_vari=10, sat_vari=0.1, val_vari=0.1): | ||
hue_delta = np.random.randint(-hue_vari, hue_vari) | ||
sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari) | ||
val_mult = 1 + np.random.uniform(-val_vari, val_vari) | ||
return hsv_transform(img, hue_delta, sat_mult, val_mult) | ||
|
||
''' | ||
定义gamma变换函数: | ||
gamma就是Gamma | ||
''' | ||
def gamma_transform(img, gamma=1.0): | ||
gamma_table = [np.power(x / 255.0, gamma) * 255.0 for x in range(256)] | ||
gamma_table = np.round(np.array(gamma_table)).astype(np.uint8) | ||
return cv2.LUT(img, gamma_table) | ||
|
||
''' | ||
随机gamma变换 | ||
gamma_vari是Gamma变化的范围[1/gamma_vari, gamma_vari) | ||
''' | ||
def random_gamma_transform(img, gamma_vari=2.0): | ||
log_gamma_vari = np.log(gamma_vari) | ||
alpha = np.random.uniform(-log_gamma_vari, log_gamma_vari) | ||
gamma = np.exp(alpha) | ||
return gamma_transform(img, gamma) | ||
|
||
def randomGaussian(image, mean=0.2, sigma=0.3): | ||
""" | ||
对图像进行高斯噪声处理 | ||
:param image: | ||
:return: | ||
""" | ||
def gaussianNoisy(im, mean=0.2, sigma=0.3): | ||
""" | ||
对图像做高斯噪音处理 | ||
:param im: 单通道图像 | ||
:param mean: 偏移量 | ||
:param sigma: 标准差 | ||
:return: | ||
""" | ||
for _i in range(len(im)): | ||
im[_i] += random.gauss(mean, sigma) | ||
return im | ||
|
||
# 将图像转化成数组 | ||
img = np.asarray(image) | ||
img.flags.writeable = True # 将数组改为读写模式 | ||
width, height = img.shape[:2] | ||
img_r = gaussianNoisy(img[:, :, 0].flatten(), mean, sigma) | ||
img_g = gaussianNoisy(img[:, :, 1].flatten(), mean, sigma) | ||
img_b = gaussianNoisy(img[:, :, 2].flatten(), mean, sigma) | ||
img[:, :, 0] = img_r.reshape([width, height]) | ||
img[:, :, 1] = img_g.reshape([width, height]) | ||
img[:, :, 2] = img_b.reshape([width, height]) | ||
return np.uint8(img) | ||
|
||
def randomColor(image): | ||
""" | ||
对图像进行颜色抖动 | ||
:param image: PIL的图像image | ||
:return: 有颜色色差的图像image | ||
""" | ||
image = Image.fromarray(image) | ||
random_factor = np.random.randint(0, 31) / 10. # 随机因子 | ||
color_image = ImageEnhance.Color(image).enhance(random_factor) # 调整图像的饱和度 | ||
random_factor = np.random.randint(10, 21) / 10. # 随机因子 | ||
brightness_image = ImageEnhance.Brightness(color_image).enhance(random_factor) # 调整图像的亮度 | ||
random_factor = np.random.randint(10, 21) / 10. # 随机因1子 | ||
contrast_image = ImageEnhance.Contrast(brightness_image).enhance(random_factor) # 调整图像对比度 | ||
random_factor = np.random.randint(0, 31) / 10. # 随机因子 | ||
img = ImageEnhance.Sharpness(contrast_image).enhance(random_factor) # 调整图像锐度 | ||
return np.array(img) | ||
|
||
|
||
def augmentation(origin, dest): | ||
for sub in os.listdir(origin): | ||
subpath = os.path.join(origin, sub) | ||
destpath = os.path.join(dest, sub) | ||
if not os.path.exists(destpath): | ||
os.makedirs(destpath) | ||
for file in os.listdir(subpath): | ||
filename = os.path.join(subpath, file) | ||
img = cv2.imread(filename) | ||
cv2.imwrite(os.path.join(destpath, file[:-4]+"_origin.bmp"), img) | ||
# 随机hsv变换 | ||
img_hsv = random_hsv_transform(img.copy()) | ||
destname = os.path.join(destpath, file[:-4]+"_hsv.bmp") | ||
cv2.imwrite(destname, img_hsv) | ||
# 随机gamma变换 | ||
img_gamma = random_gamma_transform(img.copy()) | ||
destname = os.path.join(destpath, file[:-4] + "_gamma.bmp") | ||
cv2.imwrite(destname, img_gamma) | ||
# 对图像进行颜色抖动 | ||
img_color = randomColor(img.copy()) | ||
destname = os.path.join(destpath, file[:-4]+"_color.bmp") | ||
cv2.imwrite(destname, img_color) | ||
# 对图像进行高斯噪声处理 | ||
img_gaussian = randomGaussian(img.copy()) | ||
destname = os.path.join(destpath, file[:-4] + "_gaussian.bmp") | ||
cv2.imwrite(destname, img_gaussian) | ||
|
||
|
||
|
||
|
||
origin = "dataset/rgb_train" | ||
dest = "dataset/rgb_augmentation" | ||
augmentation(origin, dest) |
File renamed without changes.
Binary file not shown.
Oops, something went wrong.