为什么unet有类?
Why does unet have classes?
import torch
import torch.nn as nn
import torch.nn.functional as F
class double_conv(nn.Module):
'''(conv => BN => ReLU) * 2'''
def __init__(self, in_ch, out_ch):
super(double_conv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
x = self.conv(x)
return x
class inconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(inconv, self).__init__()
self.conv = double_conv(in_ch, out_ch)
def forward(self, x):
x = self.conv(x)
return x
class down(nn.Module):
def __init__(self, in_ch, out_ch):
super(down, self).__init__()
self.mpconv = nn.Sequential(
nn.MaxPool2d(2),
double_conv(in_ch, out_ch)
)
def forward(self, x):
x = self.mpconv(x)
return x
class up(nn.Module):
def __init__(self, in_ch, out_ch, bilinear=True):
super(up, self).__init__()
# would be a nice idea if the upsampling could be learned too,
# but my machine do not have enough memory to handle all those weights
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
else:
self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
self.conv = double_conv(in_ch, out_ch)
def forward(self, x1, x2):
x1 = self.up(x1)
diffX = x1.size()[2] - x2.size()[2]
diffY = x1.size()[3] - x2.size()[3]
x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
diffY // 2, int(diffY / 2)))
x = torch.cat([x2, x1], dim=1)
x = self.conv(x)
return x
class outconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(outconv, self).__init__()
self.conv = nn.Conv2d(in_ch, out_ch, 1)
def forward(self, x):
x = self.conv(x)
return x
class UNet(nn.Module):
def __init__(self, n_channels, n_classes):
super(UNet, self).__init__()
self.inc = inconv(n_channels, 64)
self.down1 = down(64, 128)
self.down2 = down(128, 256)
self.down3 = down(256, 512)
self.down4 = down(512, 512)
self.up1 = up(1024, 256)
self.up2 = up(512, 128)
self.up3 = up(256, 64)
self.up4 = up(128, 64)
self.outc = outconv(64, n_classes)
def forward(self, x):
self.x1 = self.inc(x)
self.x2 = self.down1(self.x1)
self.x3 = self.down2(self.x2)
self.x4 = self.down3(self.x3)
self.x5 = self.down4(self.x4)
self.x6 = self.up1(self.x5, self.x4)
self.x7 = self.up2(self.x6, self.x3)
self.x8 = self.up3(self.x7, self.x2)
self.x9 = self.up4(self.x8, self.x1)
self.y = self.outc(self.x9)
return self.y
当我阅读 UNet 架构时,我发现它有 n_classes
作为输出。
class UNet(nn.Module):
def __init__(self, n_channels, n_classes):
但是为什么它有n_classes
因为它是用于图像分割的?
我正在尝试使用此代码进行图像去噪,但我无法弄清楚 n_classes
参数应该是什么,因为我没有任何 类.
n_classes
是否表示多类分割?如果是这样,二进制UNet分割的输出是什么?
回答
Does n_classes signify multiclass segmentation?
是的,如果你指定n_classes=4
它会输出一个(batch, 4, width, height)
形状的张量,其中每个像素都可以被分割为4
类之一。还应该使用 torch.nn.CrossEntropyLoss
进行训练。
If so, what is the output of binary UNet segmentation?
如果你想使用二进制分割,你会指定 n_classes=1
(0
表示黑色或 1
表示白色)并使用 torch.nn.BCEWithLogitsLoss
I am trying to use this code for image denoising and I couldn't figure out what will should the n_classes parameter be
应该等于n_channels
,通常RGB是3
,灰度是1
。如果你想教这个模型去噪你应该:
- 给图像添加一些噪点(例如使用
torchvision.transforms
)
- 在最后使用
sigmoid
激活,因为像素的值介于 0
和 1
之间(除非标准化)
- 使用
torch.nn.MSELoss
进行训练
为什么是 sigmoid?
因为 [0,255]
像素范围表示为 [0, 1]
像素值(至少没有归一化)。 sigmoid
确实如此 - 将值压缩到 [0, 1]
范围内,因此 linear
输出(logits)的范围可以从 -inf
到 +inf
.
Why not a linear output and a clamp?
为了使线性层在钳位后处于 [0, 1]
范围内,线性层的可能输出值必须大于 0
(适合目标的对数范围:[0, +inf]
)
Why not a linear output without a clamp?
输出的 Logits 必须在 [0, 1]
范围内
Why not some other method?
你可以这样做,但是 sigmoid
的想法是:
- 帮助神经网络(可以输出任意logit值)
sigmoid
的一阶导数是高斯标准正态分布,因此它模拟了许多现实生活中发生的现象的概率(更多信息请参见 here)
import torch
import torch.nn as nn
import torch.nn.functional as F
class double_conv(nn.Module):
'''(conv => BN => ReLU) * 2'''
def __init__(self, in_ch, out_ch):
super(double_conv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
x = self.conv(x)
return x
class inconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(inconv, self).__init__()
self.conv = double_conv(in_ch, out_ch)
def forward(self, x):
x = self.conv(x)
return x
class down(nn.Module):
def __init__(self, in_ch, out_ch):
super(down, self).__init__()
self.mpconv = nn.Sequential(
nn.MaxPool2d(2),
double_conv(in_ch, out_ch)
)
def forward(self, x):
x = self.mpconv(x)
return x
class up(nn.Module):
def __init__(self, in_ch, out_ch, bilinear=True):
super(up, self).__init__()
# would be a nice idea if the upsampling could be learned too,
# but my machine do not have enough memory to handle all those weights
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
else:
self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
self.conv = double_conv(in_ch, out_ch)
def forward(self, x1, x2):
x1 = self.up(x1)
diffX = x1.size()[2] - x2.size()[2]
diffY = x1.size()[3] - x2.size()[3]
x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
diffY // 2, int(diffY / 2)))
x = torch.cat([x2, x1], dim=1)
x = self.conv(x)
return x
class outconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(outconv, self).__init__()
self.conv = nn.Conv2d(in_ch, out_ch, 1)
def forward(self, x):
x = self.conv(x)
return x
class UNet(nn.Module):
def __init__(self, n_channels, n_classes):
super(UNet, self).__init__()
self.inc = inconv(n_channels, 64)
self.down1 = down(64, 128)
self.down2 = down(128, 256)
self.down3 = down(256, 512)
self.down4 = down(512, 512)
self.up1 = up(1024, 256)
self.up2 = up(512, 128)
self.up3 = up(256, 64)
self.up4 = up(128, 64)
self.outc = outconv(64, n_classes)
def forward(self, x):
self.x1 = self.inc(x)
self.x2 = self.down1(self.x1)
self.x3 = self.down2(self.x2)
self.x4 = self.down3(self.x3)
self.x5 = self.down4(self.x4)
self.x6 = self.up1(self.x5, self.x4)
self.x7 = self.up2(self.x6, self.x3)
self.x8 = self.up3(self.x7, self.x2)
self.x9 = self.up4(self.x8, self.x1)
self.y = self.outc(self.x9)
return self.y
当我阅读 UNet 架构时,我发现它有 n_classes
作为输出。
class UNet(nn.Module):
def __init__(self, n_channels, n_classes):
但是为什么它有n_classes
因为它是用于图像分割的?
我正在尝试使用此代码进行图像去噪,但我无法弄清楚 n_classes
参数应该是什么,因为我没有任何 类.
n_classes
是否表示多类分割?如果是这样,二进制UNet分割的输出是什么?
回答
Does n_classes signify multiclass segmentation?
是的,如果你指定n_classes=4
它会输出一个(batch, 4, width, height)
形状的张量,其中每个像素都可以被分割为4
类之一。还应该使用 torch.nn.CrossEntropyLoss
进行训练。
If so, what is the output of binary UNet segmentation?
如果你想使用二进制分割,你会指定 n_classes=1
(0
表示黑色或 1
表示白色)并使用 torch.nn.BCEWithLogitsLoss
I am trying to use this code for image denoising and I couldn't figure out what will should the n_classes parameter be
应该等于n_channels
,通常RGB是3
,灰度是1
。如果你想教这个模型去噪你应该:
- 给图像添加一些噪点(例如使用
torchvision.transforms
) - 在最后使用
sigmoid
激活,因为像素的值介于0
和1
之间(除非标准化) - 使用
torch.nn.MSELoss
进行训练
为什么是 sigmoid?
因为 [0,255]
像素范围表示为 [0, 1]
像素值(至少没有归一化)。 sigmoid
确实如此 - 将值压缩到 [0, 1]
范围内,因此 linear
输出(logits)的范围可以从 -inf
到 +inf
.
Why not a linear output and a clamp?
为了使线性层在钳位后处于 [0, 1]
范围内,线性层的可能输出值必须大于 0
(适合目标的对数范围:[0, +inf]
)
Why not a linear output without a clamp?
输出的 Logits 必须在 [0, 1]
范围内
Why not some other method?
你可以这样做,但是 sigmoid
的想法是:
- 帮助神经网络(可以输出任意logit值)
sigmoid
的一阶导数是高斯标准正态分布,因此它模拟了许多现实生活中发生的现象的概率(更多信息请参见 here)