132 lines
4.1 KiB
Python
132 lines
4.1 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
|
|
from ..base import Decoder
|
|
|
|
|
|
class Conv3x3GNReLU(nn.Module):
|
|
def __init__(self, in_channels, out_channels, upsample=False):
|
|
super().__init__()
|
|
self.upsample = upsample
|
|
self.block = nn.Sequential(
|
|
nn.Conv2d(
|
|
in_channels, out_channels, (3, 3), stride=1, padding=1, bias=False
|
|
),
|
|
nn.GroupNorm(32, out_channels),
|
|
nn.ReLU(inplace=True),
|
|
)
|
|
|
|
def forward(self, x):
|
|
x = self.block(x)
|
|
if self.upsample:
|
|
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
|
return x
|
|
|
|
|
|
class FPNBlock(nn.Module):
|
|
def __init__(self, pyramid_channels, skip_channels):
|
|
super().__init__()
|
|
self.skip_conv = nn.Conv2d(skip_channels, pyramid_channels, kernel_size=1)
|
|
|
|
def forward(self, x, skip=None):
|
|
x = F.interpolate(x, scale_factor=2, mode="nearest")
|
|
skip = self.skip_conv(skip)
|
|
x = x + skip
|
|
return x
|
|
|
|
|
|
class SegmentationBlock(nn.Module):
|
|
def __init__(self, in_channels, out_channels, n_upsamples=0):
|
|
super().__init__()
|
|
|
|
blocks = [Conv3x3GNReLU(in_channels, out_channels, upsample=bool(n_upsamples))]
|
|
|
|
if n_upsamples > 1:
|
|
for _ in range(1, n_upsamples):
|
|
blocks.append(Conv3x3GNReLU(out_channels, out_channels, upsample=True))
|
|
|
|
self.block = nn.Sequential(*blocks)
|
|
|
|
def forward(self, x):
|
|
return self.block(x)
|
|
|
|
|
|
class MergeBlock(nn.Module):
|
|
def __init__(self, policy):
|
|
super().__init__()
|
|
if policy not in ["add", "cat"]:
|
|
raise ValueError(
|
|
"`merge_policy` must be one of: ['add', 'cat'], got {}".format(
|
|
policy
|
|
)
|
|
)
|
|
self.policy = policy
|
|
|
|
def forward(self, x):
|
|
if self.policy == 'add':
|
|
return sum(x)
|
|
elif self.policy == 'cat':
|
|
return torch.cat(x, dim=1)
|
|
else:
|
|
raise ValueError(
|
|
"`merge_policy` must be one of: ['add', 'cat'], got {}".format(self.policy)
|
|
)
|
|
|
|
|
|
class RCNNDecoder(Decoder):
|
|
def __init__(
|
|
self,
|
|
encoder_channels,
|
|
encoder_depth=5,
|
|
pyramid_channels=256,
|
|
segmentation_channels=128,
|
|
dropout=0.2,
|
|
merge_policy="add",
|
|
fusion_form="concat",
|
|
):
|
|
super().__init__()
|
|
|
|
self.out_channels = segmentation_channels if merge_policy == "add" else segmentation_channels * 4
|
|
if encoder_depth < 3:
|
|
raise ValueError("Encoder depth for RCNN decoder cannot be less than 3, got {}.".format(encoder_depth))
|
|
|
|
encoder_channels = encoder_channels[::-1]
|
|
encoder_channels = encoder_channels[:encoder_depth + 1]
|
|
# (512, 256, 128, 64, 64, 3)
|
|
|
|
# adjust encoder channels according to fusion form
|
|
self.fusion_form = fusion_form
|
|
if self.fusion_form in self.FUSION_DIC["2to2_fusion"]:
|
|
encoder_channels = [ch*2 for ch in encoder_channels]
|
|
|
|
self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1)
|
|
self.p4 = FPNBlock(pyramid_channels, encoder_channels[1])
|
|
self.p3 = FPNBlock(pyramid_channels, encoder_channels[2])
|
|
self.p2 = FPNBlock(pyramid_channels, encoder_channels[3])
|
|
|
|
self.seg_blocks = nn.ModuleList([
|
|
SegmentationBlock(pyramid_channels, segmentation_channels, n_upsamples=n_upsamples)
|
|
for n_upsamples in [3, 2, 1, 0]
|
|
])
|
|
|
|
self.merge = MergeBlock(merge_policy)
|
|
self.dropout = nn.Dropout2d(p=dropout, inplace=True)
|
|
|
|
def forward(self, *features):
|
|
|
|
features = self.aggregation_layer(features[0], features[1],
|
|
self.fusion_form, ignore_original_img=True)
|
|
c2, c3, c4, c5 = features[-4:]
|
|
|
|
p5 = self.p5(c5)
|
|
p4 = self.p4(p5, c4)
|
|
p3 = self.p3(p4, c3)
|
|
p2 = self.p2(p3, c2)
|
|
|
|
feature_pyramid = [seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2])]
|
|
x = self.merge(feature_pyramid)
|
|
x = self.dropout(x)
|
|
|
|
return x
|