-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathvgg_cbam_model.py
116 lines (99 loc) · 4.97 KB
/
vgg_cbam_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import torch
import torch.nn as nn
import torch.nn.functional as F
class SAM(nn.Module):
def __init__(self, bias=False):
super(SAM, self).__init__()
self.bias = bias
self.conv = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3, dilation=1, bias=self.bias)
def forward(self, x):
max = torch.max(x,1)[0].unsqueeze(1)
avg = torch.mean(x,1).unsqueeze(1)
concat = torch.cat((max,avg), dim=1)
output = self.conv(concat)
output = output * x
return output
class CAM(nn.Module):
def __init__(self, channels, r):
super(CAM, self).__init__()
self.channels = channels
self.r = r
self.linear_max = nn.Sequential(
nn.Linear(in_features=self.channels, out_features=self.channels//self.r, bias=True),
nn.ReLU(inplace=True),
nn.Linear(in_features=self.channels//self.r, out_features=self.channels, bias=True))
def forward(self, x):
max = F.adaptive_max_pool2d(x, output_size=1)
avg = F.adaptive_avg_pool2d(x, output_size=1)
b, c, _, _ = x.size()
linear_max = self.linear(max.view(b,c)).view(b, c, 1, 1)
linear_avg = self.linear(avg.view(b,c)).view(b, c, 1, 1)
output = linear_max + linear_avg
output = F.sigmoid(output) * x
return output
class CBAM(nn.Module):
def __init__(self, channels, r):
super(CBAM, self).__init__()
self.channels = channels
self.r = r
self.sam = SAM(bias=False)
self.cam = CAM(channels=self.channels, r=self.r)
def forward(self, x):
output = self.cam(x)
output = self.sam(output)
return output + x
def conv_block(in_channels, out_channels):
return nn.Sequential(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU())
class VGG19(nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.conv_block1 = nn.Sequential(nn.Conv2d(in_channels=self.in_channels, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
conv_block(64, 64),
CBAM(64, r=2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.conv_block2 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
conv_block(128, 128),
CBAM(128, r=2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.conv_block3 = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
*[conv_block(256, 256) for _ in range(3)],
CBAM(256, r=2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.conv_block4 = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
*[conv_block(512, 512) for _ in range(3)],
CBAM(512, r=2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.conv_block5 = nn.Sequential(*[conv_block(512, 512) for _ in range(4)],
CBAM(512, r=2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(7,7))
self.linear1 = nn.Sequential(nn.Linear(in_features=7*7*512, out_features=4096, bias=True),
nn.Dropout(0.5),
nn.ReLU())
self.linear2 = nn.Sequential(nn.Linear(in_features=4096, out_features=4096, bias=True),
nn.Dropout(0.5),
nn.ReLU())
self.linear3 = nn.Linear(in_features=4096, out_features=self.out_channels, bias=True)
def forward(self, x):
x = self.conv_block1(x)
x = self.conv_block2(x)
x = self.conv_block3(x)
x = self.conv_block4(x)
x = self.conv_block5(x)
x = self.avg_pool(x)
x = self.linear1(x.view(x.shape[0], -1))
x = self.linear2(x)
x = self.linear3(x)
return x