Spaces:
Sleeping
Sleeping
Lodor
commited on
Commit
•
8e518b7
1
Parent(s):
d94bdc5
Initial commit 2
Browse files- output/20220510-091500-045046-edited.jpg +0 -0
- output/20220510-091500-045046.jpg +0 -0
- output/20220510-091516-015613-edited.jpg +0 -0
- output/20220510-091516-015613.jpg +0 -0
- output/20220510-091521-541657-edited.jpg +0 -0
- output/20220510-091521-541657.jpg +0 -0
- output/20220510-154433-175611-edited.jpg +0 -0
- output/20220510-154433-175611.jpg +0 -0
- output/20220510-154448-010789-edited.jpg +0 -0
- output/20220510-154448-010789.jpg +0 -0
- output/20220510-154459-357457-edited.jpg +0 -0
- output/20220510-154459-357457.jpg +0 -0
- output/20220510-154525-151377-edited.jpg +0 -0
- output/20220510-154525-151377.jpg +0 -0
- output/20220510-155557-047130-edited.jpg +0 -0
- output/20220510-155557-047130.jpg +0 -0
- output/20220510-155659-820061-edited.jpg +0 -0
- output/20220510-155659-820061.jpg +0 -0
- output/20220510-160930-702819-edited.jpg +0 -0
- output/20220510-160930-702819.jpg +0 -0
- output/20220510-161047-602957-edited.jpg +0 -0
- output/20220510-161047-602957.jpg +0 -0
- output/20220510-161107-587096-edited.jpg +0 -0
- output/20220510-161107-587096.jpg +0 -0
- src/models/__init__.py +0 -0
- src/models/backbones/__init__.py +0 -10
- src/models/backbones/mobilenetv2.py +0 -199
- src/models/backbones/wrapper.py +0 -82
- src/models/modnet.py +0 -255
output/20220510-091500-045046-edited.jpg
DELETED
Binary file (99.5 kB)
|
|
output/20220510-091500-045046.jpg
DELETED
Binary file (17.8 kB)
|
|
output/20220510-091516-015613-edited.jpg
DELETED
Binary file (99.5 kB)
|
|
output/20220510-091516-015613.jpg
DELETED
Binary file (17.8 kB)
|
|
output/20220510-091521-541657-edited.jpg
DELETED
Binary file (99.5 kB)
|
|
output/20220510-091521-541657.jpg
DELETED
Binary file (17.8 kB)
|
|
output/20220510-154433-175611-edited.jpg
DELETED
Binary file (41.7 kB)
|
|
output/20220510-154433-175611.jpg
DELETED
Binary file (10 kB)
|
|
output/20220510-154448-010789-edited.jpg
DELETED
Binary file (55.8 kB)
|
|
output/20220510-154448-010789.jpg
DELETED
Binary file (10 kB)
|
|
output/20220510-154459-357457-edited.jpg
DELETED
Binary file (84 kB)
|
|
output/20220510-154459-357457.jpg
DELETED
Binary file (76.1 kB)
|
|
output/20220510-154525-151377-edited.jpg
DELETED
Binary file (50.2 kB)
|
|
output/20220510-154525-151377.jpg
DELETED
Binary file (12.3 kB)
|
|
output/20220510-155557-047130-edited.jpg
DELETED
Binary file (720 kB)
|
|
output/20220510-155557-047130.jpg
DELETED
Binary file (621 kB)
|
|
output/20220510-155659-820061-edited.jpg
DELETED
Binary file (99.2 kB)
|
|
output/20220510-155659-820061.jpg
DELETED
Binary file (17.8 kB)
|
|
output/20220510-160930-702819-edited.jpg
DELETED
Binary file (99.2 kB)
|
|
output/20220510-160930-702819.jpg
DELETED
Binary file (17.8 kB)
|
|
output/20220510-161047-602957-edited.jpg
DELETED
Binary file (35.3 kB)
|
|
output/20220510-161047-602957.jpg
DELETED
Binary file (35.3 kB)
|
|
output/20220510-161107-587096-edited.jpg
DELETED
Binary file (99.2 kB)
|
|
output/20220510-161107-587096.jpg
DELETED
Binary file (17.8 kB)
|
|
src/models/__init__.py
DELETED
File without changes
|
src/models/backbones/__init__.py
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
from .wrapper import *
|
2 |
-
|
3 |
-
|
4 |
-
#------------------------------------------------------------------------------
|
5 |
-
# Replaceable Backbones
|
6 |
-
#------------------------------------------------------------------------------
|
7 |
-
|
8 |
-
SUPPORTED_BACKBONES = {
|
9 |
-
'mobilenetv2': MobileNetV2Backbone,
|
10 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/models/backbones/mobilenetv2.py
DELETED
@@ -1,199 +0,0 @@
|
|
1 |
-
""" This file is adapted from https://github.com/thuyngch/Human-Segmentation-PyTorch"""
|
2 |
-
|
3 |
-
import math
|
4 |
-
import json
|
5 |
-
from functools import reduce
|
6 |
-
|
7 |
-
import torch
|
8 |
-
from torch import nn
|
9 |
-
|
10 |
-
|
11 |
-
#------------------------------------------------------------------------------
|
12 |
-
# Useful functions
|
13 |
-
#------------------------------------------------------------------------------
|
14 |
-
|
15 |
-
def _make_divisible(v, divisor, min_value=None):
|
16 |
-
if min_value is None:
|
17 |
-
min_value = divisor
|
18 |
-
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
19 |
-
# Make sure that round down does not go down by more than 10%.
|
20 |
-
if new_v < 0.9 * v:
|
21 |
-
new_v += divisor
|
22 |
-
return new_v
|
23 |
-
|
24 |
-
|
25 |
-
def conv_bn(inp, oup, stride):
|
26 |
-
return nn.Sequential(
|
27 |
-
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
28 |
-
nn.BatchNorm2d(oup),
|
29 |
-
nn.ReLU6(inplace=True)
|
30 |
-
)
|
31 |
-
|
32 |
-
|
33 |
-
def conv_1x1_bn(inp, oup):
|
34 |
-
return nn.Sequential(
|
35 |
-
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
36 |
-
nn.BatchNorm2d(oup),
|
37 |
-
nn.ReLU6(inplace=True)
|
38 |
-
)
|
39 |
-
|
40 |
-
|
41 |
-
#------------------------------------------------------------------------------
|
42 |
-
# Class of Inverted Residual block
|
43 |
-
#------------------------------------------------------------------------------
|
44 |
-
|
45 |
-
class InvertedResidual(nn.Module):
|
46 |
-
def __init__(self, inp, oup, stride, expansion, dilation=1):
|
47 |
-
super(InvertedResidual, self).__init__()
|
48 |
-
self.stride = stride
|
49 |
-
assert stride in [1, 2]
|
50 |
-
|
51 |
-
hidden_dim = round(inp * expansion)
|
52 |
-
self.use_res_connect = self.stride == 1 and inp == oup
|
53 |
-
|
54 |
-
if expansion == 1:
|
55 |
-
self.conv = nn.Sequential(
|
56 |
-
# dw
|
57 |
-
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
|
58 |
-
nn.BatchNorm2d(hidden_dim),
|
59 |
-
nn.ReLU6(inplace=True),
|
60 |
-
# pw-linear
|
61 |
-
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
62 |
-
nn.BatchNorm2d(oup),
|
63 |
-
)
|
64 |
-
else:
|
65 |
-
self.conv = nn.Sequential(
|
66 |
-
# pw
|
67 |
-
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
68 |
-
nn.BatchNorm2d(hidden_dim),
|
69 |
-
nn.ReLU6(inplace=True),
|
70 |
-
# dw
|
71 |
-
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
|
72 |
-
nn.BatchNorm2d(hidden_dim),
|
73 |
-
nn.ReLU6(inplace=True),
|
74 |
-
# pw-linear
|
75 |
-
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
76 |
-
nn.BatchNorm2d(oup),
|
77 |
-
)
|
78 |
-
|
79 |
-
def forward(self, x):
|
80 |
-
if self.use_res_connect:
|
81 |
-
return x + self.conv(x)
|
82 |
-
else:
|
83 |
-
return self.conv(x)
|
84 |
-
|
85 |
-
|
86 |
-
#------------------------------------------------------------------------------
|
87 |
-
# Class of MobileNetV2
|
88 |
-
#------------------------------------------------------------------------------
|
89 |
-
|
90 |
-
class MobileNetV2(nn.Module):
|
91 |
-
def __init__(self, in_channels, alpha=1.0, expansion=6, num_classes=1000):
|
92 |
-
super(MobileNetV2, self).__init__()
|
93 |
-
self.in_channels = in_channels
|
94 |
-
self.num_classes = num_classes
|
95 |
-
input_channel = 32
|
96 |
-
last_channel = 1280
|
97 |
-
interverted_residual_setting = [
|
98 |
-
# t, c, n, s
|
99 |
-
[1 , 16, 1, 1],
|
100 |
-
[expansion, 24, 2, 2],
|
101 |
-
[expansion, 32, 3, 2],
|
102 |
-
[expansion, 64, 4, 2],
|
103 |
-
[expansion, 96, 3, 1],
|
104 |
-
[expansion, 160, 3, 2],
|
105 |
-
[expansion, 320, 1, 1],
|
106 |
-
]
|
107 |
-
|
108 |
-
# building first layer
|
109 |
-
input_channel = _make_divisible(input_channel*alpha, 8)
|
110 |
-
self.last_channel = _make_divisible(last_channel*alpha, 8) if alpha > 1.0 else last_channel
|
111 |
-
self.features = [conv_bn(self.in_channels, input_channel, 2)]
|
112 |
-
|
113 |
-
# building inverted residual blocks
|
114 |
-
for t, c, n, s in interverted_residual_setting:
|
115 |
-
output_channel = _make_divisible(int(c*alpha), 8)
|
116 |
-
for i in range(n):
|
117 |
-
if i == 0:
|
118 |
-
self.features.append(InvertedResidual(input_channel, output_channel, s, expansion=t))
|
119 |
-
else:
|
120 |
-
self.features.append(InvertedResidual(input_channel, output_channel, 1, expansion=t))
|
121 |
-
input_channel = output_channel
|
122 |
-
|
123 |
-
# building last several layers
|
124 |
-
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
|
125 |
-
|
126 |
-
# make it nn.Sequential
|
127 |
-
self.features = nn.Sequential(*self.features)
|
128 |
-
|
129 |
-
# building classifier
|
130 |
-
if self.num_classes is not None:
|
131 |
-
self.classifier = nn.Sequential(
|
132 |
-
nn.Dropout(0.2),
|
133 |
-
nn.Linear(self.last_channel, num_classes),
|
134 |
-
)
|
135 |
-
|
136 |
-
# Initialize weights
|
137 |
-
self._init_weights()
|
138 |
-
|
139 |
-
def forward(self, x):
|
140 |
-
# Stage1
|
141 |
-
x = self.features[0](x)
|
142 |
-
x = self.features[1](x)
|
143 |
-
# Stage2
|
144 |
-
x = self.features[2](x)
|
145 |
-
x = self.features[3](x)
|
146 |
-
# Stage3
|
147 |
-
x = self.features[4](x)
|
148 |
-
x = self.features[5](x)
|
149 |
-
x = self.features[6](x)
|
150 |
-
# Stage4
|
151 |
-
x = self.features[7](x)
|
152 |
-
x = self.features[8](x)
|
153 |
-
x = self.features[9](x)
|
154 |
-
x = self.features[10](x)
|
155 |
-
x = self.features[11](x)
|
156 |
-
x = self.features[12](x)
|
157 |
-
x = self.features[13](x)
|
158 |
-
# Stage5
|
159 |
-
x = self.features[14](x)
|
160 |
-
x = self.features[15](x)
|
161 |
-
x = self.features[16](x)
|
162 |
-
x = self.features[17](x)
|
163 |
-
x = self.features[18](x)
|
164 |
-
|
165 |
-
# Classification
|
166 |
-
if self.num_classes is not None:
|
167 |
-
x = x.mean(dim=(2,3))
|
168 |
-
x = self.classifier(x)
|
169 |
-
|
170 |
-
# Output
|
171 |
-
return x
|
172 |
-
|
173 |
-
def _load_pretrained_model(self, pretrained_file):
|
174 |
-
pretrain_dict = torch.load(pretrained_file, map_location='cpu')
|
175 |
-
model_dict = {}
|
176 |
-
state_dict = self.state_dict()
|
177 |
-
print("[MobileNetV2] Loading pretrained model...")
|
178 |
-
for k, v in pretrain_dict.items():
|
179 |
-
if k in state_dict:
|
180 |
-
model_dict[k] = v
|
181 |
-
else:
|
182 |
-
print(k, "is ignored")
|
183 |
-
state_dict.update(model_dict)
|
184 |
-
self.load_state_dict(state_dict)
|
185 |
-
|
186 |
-
def _init_weights(self):
|
187 |
-
for m in self.modules():
|
188 |
-
if isinstance(m, nn.Conv2d):
|
189 |
-
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
190 |
-
m.weight.data.normal_(0, math.sqrt(2. / n))
|
191 |
-
if m.bias is not None:
|
192 |
-
m.bias.data.zero_()
|
193 |
-
elif isinstance(m, nn.BatchNorm2d):
|
194 |
-
m.weight.data.fill_(1)
|
195 |
-
m.bias.data.zero_()
|
196 |
-
elif isinstance(m, nn.Linear):
|
197 |
-
n = m.weight.size(1)
|
198 |
-
m.weight.data.normal_(0, 0.01)
|
199 |
-
m.bias.data.zero_()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/models/backbones/wrapper.py
DELETED
@@ -1,82 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from functools import reduce
|
3 |
-
|
4 |
-
import torch
|
5 |
-
import torch.nn as nn
|
6 |
-
|
7 |
-
from .mobilenetv2 import MobileNetV2
|
8 |
-
|
9 |
-
|
10 |
-
class BaseBackbone(nn.Module):
|
11 |
-
""" Superclass of Replaceable Backbone Model for Semantic Estimation
|
12 |
-
"""
|
13 |
-
|
14 |
-
def __init__(self, in_channels):
|
15 |
-
super(BaseBackbone, self).__init__()
|
16 |
-
self.in_channels = in_channels
|
17 |
-
|
18 |
-
self.model = None
|
19 |
-
self.enc_channels = []
|
20 |
-
|
21 |
-
def forward(self, x):
|
22 |
-
raise NotImplementedError
|
23 |
-
|
24 |
-
def load_pretrained_ckpt(self):
|
25 |
-
raise NotImplementedError
|
26 |
-
|
27 |
-
|
28 |
-
class MobileNetV2Backbone(BaseBackbone):
|
29 |
-
""" MobileNetV2 Backbone
|
30 |
-
"""
|
31 |
-
|
32 |
-
def __init__(self, in_channels):
|
33 |
-
super(MobileNetV2Backbone, self).__init__(in_channels)
|
34 |
-
|
35 |
-
self.model = MobileNetV2(self.in_channels, alpha=1.0, expansion=6, num_classes=None)
|
36 |
-
self.enc_channels = [16, 24, 32, 96, 1280]
|
37 |
-
|
38 |
-
def forward(self, x):
|
39 |
-
# x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x)
|
40 |
-
x = self.model.features[0](x)
|
41 |
-
x = self.model.features[1](x)
|
42 |
-
enc2x = x
|
43 |
-
|
44 |
-
# x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x)
|
45 |
-
x = self.model.features[2](x)
|
46 |
-
x = self.model.features[3](x)
|
47 |
-
enc4x = x
|
48 |
-
|
49 |
-
# x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x)
|
50 |
-
x = self.model.features[4](x)
|
51 |
-
x = self.model.features[5](x)
|
52 |
-
x = self.model.features[6](x)
|
53 |
-
enc8x = x
|
54 |
-
|
55 |
-
# x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x)
|
56 |
-
x = self.model.features[7](x)
|
57 |
-
x = self.model.features[8](x)
|
58 |
-
x = self.model.features[9](x)
|
59 |
-
x = self.model.features[10](x)
|
60 |
-
x = self.model.features[11](x)
|
61 |
-
x = self.model.features[12](x)
|
62 |
-
x = self.model.features[13](x)
|
63 |
-
enc16x = x
|
64 |
-
|
65 |
-
# x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x)
|
66 |
-
x = self.model.features[14](x)
|
67 |
-
x = self.model.features[15](x)
|
68 |
-
x = self.model.features[16](x)
|
69 |
-
x = self.model.features[17](x)
|
70 |
-
x = self.model.features[18](x)
|
71 |
-
enc32x = x
|
72 |
-
return [enc2x, enc4x, enc8x, enc16x, enc32x]
|
73 |
-
|
74 |
-
def load_pretrained_ckpt(self):
|
75 |
-
# the pre-trained model is provided by https://github.com/thuyngch/Human-Segmentation-PyTorch
|
76 |
-
ckpt_path = './pretrained/mobilenetv2_human_seg.ckpt'
|
77 |
-
if not os.path.exists(ckpt_path):
|
78 |
-
print('cannot find the pretrained mobilenetv2 backbone')
|
79 |
-
exit()
|
80 |
-
|
81 |
-
ckpt = torch.load(ckpt_path)
|
82 |
-
self.model.load_state_dict(ckpt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/models/modnet.py
DELETED
@@ -1,255 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn as nn
|
3 |
-
import torch.nn.functional as F
|
4 |
-
|
5 |
-
from .backbones import SUPPORTED_BACKBONES
|
6 |
-
|
7 |
-
|
8 |
-
#------------------------------------------------------------------------------
|
9 |
-
# MODNet Basic Modules
|
10 |
-
#------------------------------------------------------------------------------
|
11 |
-
|
12 |
-
class IBNorm(nn.Module):
|
13 |
-
""" Combine Instance Norm and Batch Norm into One Layer
|
14 |
-
"""
|
15 |
-
|
16 |
-
def __init__(self, in_channels):
|
17 |
-
super(IBNorm, self).__init__()
|
18 |
-
in_channels = in_channels
|
19 |
-
self.bnorm_channels = int(in_channels / 2)
|
20 |
-
self.inorm_channels = in_channels - self.bnorm_channels
|
21 |
-
|
22 |
-
self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
|
23 |
-
self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
|
24 |
-
|
25 |
-
def forward(self, x):
|
26 |
-
bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
|
27 |
-
in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
|
28 |
-
|
29 |
-
return torch.cat((bn_x, in_x), 1)
|
30 |
-
|
31 |
-
|
32 |
-
class Conv2dIBNormRelu(nn.Module):
|
33 |
-
""" Convolution + IBNorm + ReLu
|
34 |
-
"""
|
35 |
-
|
36 |
-
def __init__(self, in_channels, out_channels, kernel_size,
|
37 |
-
stride=1, padding=0, dilation=1, groups=1, bias=True,
|
38 |
-
with_ibn=True, with_relu=True):
|
39 |
-
super(Conv2dIBNormRelu, self).__init__()
|
40 |
-
|
41 |
-
layers = [
|
42 |
-
nn.Conv2d(in_channels, out_channels, kernel_size,
|
43 |
-
stride=stride, padding=padding, dilation=dilation,
|
44 |
-
groups=groups, bias=bias)
|
45 |
-
]
|
46 |
-
|
47 |
-
if with_ibn:
|
48 |
-
layers.append(IBNorm(out_channels))
|
49 |
-
if with_relu:
|
50 |
-
layers.append(nn.ReLU(inplace=True))
|
51 |
-
|
52 |
-
self.layers = nn.Sequential(*layers)
|
53 |
-
|
54 |
-
def forward(self, x):
|
55 |
-
return self.layers(x)
|
56 |
-
|
57 |
-
|
58 |
-
class SEBlock(nn.Module):
|
59 |
-
""" SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
|
60 |
-
"""
|
61 |
-
|
62 |
-
def __init__(self, in_channels, out_channels, reduction=1):
|
63 |
-
super(SEBlock, self).__init__()
|
64 |
-
self.pool = nn.AdaptiveAvgPool2d(1)
|
65 |
-
self.fc = nn.Sequential(
|
66 |
-
nn.Linear(in_channels, int(in_channels // reduction), bias=False),
|
67 |
-
nn.ReLU(inplace=True),
|
68 |
-
nn.Linear(int(in_channels // reduction), out_channels, bias=False),
|
69 |
-
nn.Sigmoid()
|
70 |
-
)
|
71 |
-
|
72 |
-
def forward(self, x):
|
73 |
-
b, c, _, _ = x.size()
|
74 |
-
w = self.pool(x).view(b, c)
|
75 |
-
w = self.fc(w).view(b, c, 1, 1)
|
76 |
-
|
77 |
-
return x * w.expand_as(x)
|
78 |
-
|
79 |
-
|
80 |
-
#------------------------------------------------------------------------------
|
81 |
-
# MODNet Branches
|
82 |
-
#------------------------------------------------------------------------------
|
83 |
-
|
84 |
-
class LRBranch(nn.Module):
|
85 |
-
""" Low Resolution Branch of MODNet
|
86 |
-
"""
|
87 |
-
|
88 |
-
def __init__(self, backbone):
|
89 |
-
super(LRBranch, self).__init__()
|
90 |
-
|
91 |
-
enc_channels = backbone.enc_channels
|
92 |
-
|
93 |
-
self.backbone = backbone
|
94 |
-
self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
|
95 |
-
self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
|
96 |
-
self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
|
97 |
-
self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
|
98 |
-
|
99 |
-
def forward(self, img, inference):
|
100 |
-
enc_features = self.backbone.forward(img)
|
101 |
-
enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
|
102 |
-
|
103 |
-
enc32x = self.se_block(enc32x)
|
104 |
-
lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
|
105 |
-
lr16x = self.conv_lr16x(lr16x)
|
106 |
-
lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
|
107 |
-
lr8x = self.conv_lr8x(lr8x)
|
108 |
-
|
109 |
-
pred_semantic = None
|
110 |
-
if not inference:
|
111 |
-
lr = self.conv_lr(lr8x)
|
112 |
-
pred_semantic = torch.sigmoid(lr)
|
113 |
-
|
114 |
-
return pred_semantic, lr8x, [enc2x, enc4x]
|
115 |
-
|
116 |
-
|
117 |
-
class HRBranch(nn.Module):
|
118 |
-
""" High Resolution Branch of MODNet
|
119 |
-
"""
|
120 |
-
|
121 |
-
def __init__(self, hr_channels, enc_channels):
|
122 |
-
super(HRBranch, self).__init__()
|
123 |
-
|
124 |
-
self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
|
125 |
-
self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
|
126 |
-
|
127 |
-
self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
|
128 |
-
self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
|
129 |
-
|
130 |
-
self.conv_hr4x = nn.Sequential(
|
131 |
-
Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
|
132 |
-
Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
|
133 |
-
Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
|
134 |
-
)
|
135 |
-
|
136 |
-
self.conv_hr2x = nn.Sequential(
|
137 |
-
Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
|
138 |
-
Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
|
139 |
-
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
|
140 |
-
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
|
141 |
-
)
|
142 |
-
|
143 |
-
self.conv_hr = nn.Sequential(
|
144 |
-
Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
|
145 |
-
Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
|
146 |
-
)
|
147 |
-
|
148 |
-
def forward(self, img, enc2x, enc4x, lr8x, inference):
|
149 |
-
img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
|
150 |
-
img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
|
151 |
-
|
152 |
-
enc2x = self.tohr_enc2x(enc2x)
|
153 |
-
hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
|
154 |
-
|
155 |
-
enc4x = self.tohr_enc4x(enc4x)
|
156 |
-
hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
|
157 |
-
|
158 |
-
lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
|
159 |
-
hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
|
160 |
-
|
161 |
-
hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
|
162 |
-
hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
|
163 |
-
|
164 |
-
pred_detail = None
|
165 |
-
if not inference:
|
166 |
-
hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
|
167 |
-
hr = self.conv_hr(torch.cat((hr, img), dim=1))
|
168 |
-
pred_detail = torch.sigmoid(hr)
|
169 |
-
|
170 |
-
return pred_detail, hr2x
|
171 |
-
|
172 |
-
|
173 |
-
class FusionBranch(nn.Module):
|
174 |
-
""" Fusion Branch of MODNet
|
175 |
-
"""
|
176 |
-
|
177 |
-
def __init__(self, hr_channels, enc_channels):
|
178 |
-
super(FusionBranch, self).__init__()
|
179 |
-
self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
|
180 |
-
|
181 |
-
self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
|
182 |
-
self.conv_f = nn.Sequential(
|
183 |
-
Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
|
184 |
-
Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
|
185 |
-
)
|
186 |
-
|
187 |
-
def forward(self, img, lr8x, hr2x):
|
188 |
-
lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
|
189 |
-
lr4x = self.conv_lr4x(lr4x)
|
190 |
-
lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
|
191 |
-
|
192 |
-
f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
|
193 |
-
f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
|
194 |
-
f = self.conv_f(torch.cat((f, img), dim=1))
|
195 |
-
pred_matte = torch.sigmoid(f)
|
196 |
-
|
197 |
-
return pred_matte
|
198 |
-
|
199 |
-
|
200 |
-
#------------------------------------------------------------------------------
|
201 |
-
# MODNet
|
202 |
-
#------------------------------------------------------------------------------
|
203 |
-
|
204 |
-
class MODNet(nn.Module):
|
205 |
-
""" Architecture of MODNet
|
206 |
-
"""
|
207 |
-
|
208 |
-
def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
|
209 |
-
super(MODNet, self).__init__()
|
210 |
-
|
211 |
-
self.in_channels = in_channels
|
212 |
-
self.hr_channels = hr_channels
|
213 |
-
self.backbone_arch = backbone_arch
|
214 |
-
self.backbone_pretrained = backbone_pretrained
|
215 |
-
|
216 |
-
self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
|
217 |
-
|
218 |
-
self.lr_branch = LRBranch(self.backbone)
|
219 |
-
self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
|
220 |
-
self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
|
221 |
-
|
222 |
-
for m in self.modules():
|
223 |
-
if isinstance(m, nn.Conv2d):
|
224 |
-
self._init_conv(m)
|
225 |
-
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
|
226 |
-
self._init_norm(m)
|
227 |
-
|
228 |
-
if self.backbone_pretrained:
|
229 |
-
self.backbone.load_pretrained_ckpt()
|
230 |
-
|
231 |
-
def forward(self, img, inference):
|
232 |
-
pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
|
233 |
-
pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
|
234 |
-
pred_matte = self.f_branch(img, lr8x, hr2x)
|
235 |
-
|
236 |
-
return pred_semantic, pred_detail, pred_matte
|
237 |
-
|
238 |
-
def freeze_norm(self):
|
239 |
-
norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
|
240 |
-
for m in self.modules():
|
241 |
-
for n in norm_types:
|
242 |
-
if isinstance(m, n):
|
243 |
-
m.eval()
|
244 |
-
continue
|
245 |
-
|
246 |
-
def _init_conv(self, conv):
|
247 |
-
nn.init.kaiming_uniform_(
|
248 |
-
conv.weight, a=0, mode='fan_in', nonlinearity='relu')
|
249 |
-
if conv.bias is not None:
|
250 |
-
nn.init.constant_(conv.bias, 0)
|
251 |
-
|
252 |
-
def _init_norm(self, norm):
|
253 |
-
if norm.weight is not None:
|
254 |
-
nn.init.constant_(norm.weight, 1)
|
255 |
-
nn.init.constant_(norm.bias, 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|