为什么我的完全卷积自动编码器不对称?
Why is my Fully Convolutional Autoencoder not symmetric?
我正在开发一个全卷积自动编码器,它采用 3 个通道作为输入并输出 2 个通道(输入:LAB,输出:AB)。因为输出应该和输入一样大,所以我用的是Full Convolution。
代码:
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
# conv 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(1024),
nn.ReLU()
)
self.decoder = nn.Sequential(
# conv 6
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
# conv 7
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
# conv 8
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
# conv 9
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
# conv 10 out
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=1),
nn.Softmax() # multi-class classification
# TODO softmax deprecated
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
输出张量的大小应该是:torch.Size([1, 2, 199, 253])
输出张量的大小实际上有:torch.Size([1, 2, 190, 238])
我的主要问题是结合 Conv2d 和 MaxPool2d 并在 ConvTranspose2d 中设置正确的参数值。因此,我分别使用 MaxPool2d 的 Upsample 函数和 ConvTranspose2d 仅用于 Conv2d。但是我还是有点不对称,我真的不知道为什么。
感谢您的帮助!
有两个问题。
首先是填充不足:使用 kernel_size=5
你的卷积每次应用时都会将图像缩小 4(每边 2 个像素),所以你需要 padding=2
,而不仅仅是 1 , 在所有地方。
其次是 "uneven" 输入大小。我的意思是,一旦你的卷积被适当地填充,你就会剩下下采样操作,它在每个点都试图将你的图像分辨率分成两半。当他们失败时,他们只是 return 一个较小的结果(整数除法丢弃余数)。由于您的网络有 4 个连续的 2x 下采样操作,因此您需要输入具有 H, W
维度,它是 2^4=16
的倍数。然后你实际上会得到同样形状的输出。下面的例子
import torch
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
# conv 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(1024),
nn.ReLU()
)
self.decoder = nn.Sequential(
# conv 6
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(512),
nn.ReLU(),
# conv 7
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
# conv 8
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(128),
nn.ReLU(),
# conv 9
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
# conv 10 out
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=2),
nn.Softmax() # multi-class classification
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
input = torch.randn(1, 3, 6*16, 7*16)
output = AE()(input)
print(input.shape)
print(output.shape)
我正在开发一个全卷积自动编码器,它采用 3 个通道作为输入并输出 2 个通道(输入:LAB,输出:AB)。因为输出应该和输入一样大,所以我用的是Full Convolution。
代码:
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
# conv 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(1024),
nn.ReLU()
)
self.decoder = nn.Sequential(
# conv 6
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
# conv 7
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
# conv 8
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
# conv 9
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
# conv 10 out
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=1),
nn.Softmax() # multi-class classification
# TODO softmax deprecated
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
输出张量的大小应该是:torch.Size([1, 2, 199, 253])
输出张量的大小实际上有:torch.Size([1, 2, 190, 238])
我的主要问题是结合 Conv2d 和 MaxPool2d 并在 ConvTranspose2d 中设置正确的参数值。因此,我分别使用 MaxPool2d 的 Upsample 函数和 ConvTranspose2d 仅用于 Conv2d。但是我还是有点不对称,我真的不知道为什么。
感谢您的帮助!
有两个问题。
首先是填充不足:使用 kernel_size=5
你的卷积每次应用时都会将图像缩小 4(每边 2 个像素),所以你需要 padding=2
,而不仅仅是 1 , 在所有地方。
其次是 "uneven" 输入大小。我的意思是,一旦你的卷积被适当地填充,你就会剩下下采样操作,它在每个点都试图将你的图像分辨率分成两半。当他们失败时,他们只是 return 一个较小的结果(整数除法丢弃余数)。由于您的网络有 4 个连续的 2x 下采样操作,因此您需要输入具有 H, W
维度,它是 2^4=16
的倍数。然后你实际上会得到同样形状的输出。下面的例子
import torch
import torch.nn as nn
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.encoder = nn.Sequential(
# conv 1
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
# conv 5
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(1024),
nn.ReLU()
)
self.decoder = nn.Sequential(
# conv 6
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(512),
nn.ReLU(),
# conv 7
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(256),
nn.ReLU(),
# conv 8
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(128),
nn.ReLU(),
# conv 9
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
# conv 10 out
nn.Upsample(scale_factor=2, mode='bilinear'),
nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=5, stride=1, padding=2),
nn.Softmax() # multi-class classification
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
input = torch.randn(1, 3, 6*16, 7*16)
output = AE()(input)
print(input.shape)
print(output.shape)