Pytorch:张量a(24)的大小必须与非单维3的张量b(48)的大小匹配

Pytorch: The size of tensor a (24) must match the size of tensor b (48) at non-singleton dimension 3

下面的代码工作正常并生成正确的结果。

import torch
import torch.nn as nn
import torch.nn.functional as F

from modules import ConvLSTMCell, Sign


class EncoderCell(nn.Module):
    def __init__(self):
        super(EncoderCell, self).__init__()

        self.conv = nn.Conv2d(
            3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.rnn1 = ConvLSTMCell(
            64,
            256,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn2 = ConvLSTMCell(
            256,
            512,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn3 = ConvLSTMCell(
            512,
            512,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)

    def forward(self, input, hidden1, hidden2, hidden3):
        x = self.conv(input)

        hidden1 = self.rnn1(x, hidden1)
        x = hidden1[0]

        hidden2 = self.rnn2(x, hidden2)
        x = hidden2[0]

        hidden3 = self.rnn3(x, hidden3)
        x = hidden3[0]

        return x, hidden1, hidden2, hidden3


class Binarizer(nn.Module):
    def __init__(self):
        super(Binarizer, self).__init__()
        self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
        self.sign = Sign()

    def forward(self, input):
        feat = self.conv(input)
        x = F.tanh(feat)
        return self.sign(x)


class DecoderCell(nn.Module):
    def __init__(self):
        super(DecoderCell, self).__init__()

        self.conv1 = nn.Conv2d(
            32, 512, kernel_size=1, stride=1, padding=0, bias=False)
        self.rnn1 = ConvLSTMCell(
            512,
            512,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn2 = ConvLSTMCell(
            128,
            512,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn3 = ConvLSTMCell(
            128,
            256,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=3,
            bias=False)
        self.rnn4 = ConvLSTMCell(
            64,
            128,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=3,
            bias=False)
        self.conv2 = nn.Conv2d(
            32, 3, kernel_size=1, stride=1, padding=0, bias=False)

    def forward(self, input, hidden1, hidden2, hidden3, hidden4):
        x = self.conv1(input)

        hidden1 = self.rnn1(x, hidden1)
        x = hidden1[0]
        x = F.pixel_shuffle(x, 2)

        hidden2 = self.rnn2(x, hidden2)
        x = hidden2[0]
        x = F.pixel_shuffle(x, 2)

        hidden3 = self.rnn3(x, hidden3)
        x = hidden3[0]
        x = F.pixel_shuffle(x, 2)

        hidden4 = self.rnn4(x, hidden4)
        x = hidden4[0]
        x = F.pixel_shuffle(x, 2)

        x = F.tanh(self.conv2(x)) / 2
        return x, hidden1, hidden2, hidden3, hidden4

现在我已经在 self.con 中进行了更改,并添加了预训练的 resent 层。现在它在训练后显示张量不匹配错误。所有的事情都是一样的,只需在代码中添加这一行。我把 ** 放在那些行

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models


from modules import ConvLSTMCell, Sign


class EncoderCell(nn.Module):
    def __init__(self):
        super(EncoderCell, self).__init__()

        #self.conv = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)

        **resConv = models.resnet50(pretrained=True)
        resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.conv = resConv.layer4**


        self.rnn1 = ConvLSTMCell(
            64,
            256,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn2 = ConvLSTMCell(
            256,
            512,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn3 = ConvLSTMCell(
            512,
            512,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)

    def forward(self, input, hidden1, hidden2, hidden3):

        x = self.conv(input)

        hidden1 = self.rnn1(x, hidden1)
        x = hidden1[0]

        hidden2 = self.rnn2(x, hidden2)
        x = hidden2[0]

        hidden3 = self.rnn3(x, hidden3)
        x = hidden3[0]

        return x, hidden1, hidden2, hidden3


class Binarizer(nn.Module):
    def __init__(self):
        super(Binarizer, self).__init__()
        self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
        self.sign = Sign()

    def forward(self, input):
        feat = self.conv(input)
        x = F.tanh(feat)
        return self.sign(x)


class DecoderCell(nn.Module):
    def __init__(self):
        super(DecoderCell, self).__init__()

        **resConv = models.resnet50(pretrained=True)
        resConv.layer4 = nn.Conv2d(32, 512, kernel_size=3, stride=2, padding=1, bias=False)
        self.conv1 = resConv.layer4**

        self.rnn1 = ConvLSTMCell(
            512,
            512,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn2 = ConvLSTMCell(
            128,
            512,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn3 = ConvLSTMCell(
            128,
            256,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=3,
            bias=False)
        self.rnn4 = ConvLSTMCell(
            64,
            128,
            kernel_size=3,
            stride=1,
            padding=1,
            hidden_kernel_size=3,
            bias=False)

        **resConv2 = models.resnet50(pretrained=True)
        resConv2.layer4 = nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False)
        self.conv2 = resConv2.layer4**

    def forward(self, input, hidden1, hidden2, hidden3, hidden4):
        x = self.conv1(input)

        hidden1 = self.rnn1(x, hidden1)
        x = hidden1[0]
        x = F.pixel_shuffle(x, 2)

        hidden2 = self.rnn2(x, hidden2)
        x = hidden2[0]
        x = F.pixel_shuffle(x, 2)

        hidden3 = self.rnn3(x, hidden3)
        x = hidden3[0]
        x = F.pixel_shuffle(x, 2)

        hidden4 = self.rnn4(x, hidden4)
        x = hidden4[0]
        x = F.pixel_shuffle(x, 2)

        x = F.tanh(self.conv2(x)) / 2
        return x, hidden1, hidden2, hidden3, hidden4

你做错了,一些解释是,

    **resConv = models.resnet50(pretrained=True) # you are reading a model

现在您正在用新初始化的层替换该模型中的层。其次,resnet50 中的 layer4 是一个包含多个层的顺序块。使用 print 查看模型中的确切图层。

    resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)

这里你使用的是新图层。

self.conv = resConv.layer4**

根据您关于使用预训练层的查询,您应该这样做,

resConv = models.resnet50(pretrained=True)
print(resConv) #see the layer which you want to use
self.conv = resConv.conv1 # replace conv1 with that layer
# note: conv1 is the name of first conv layer in resnet

除此之外,我还建议在对象初始化之外获取和添加该层(或权重和偏差)。类似于:

enc = EncoderCell()
resnet50 = models.resnet50(pretrained=True)

然后

enc.conv = resnet50.conv1

或更理想

enc.conv.load_state_dict(resnet50.layer1.state_dict())

原因是,在 nn.Module class 上调用 state_dict() 创建参数的克隆(在本例中为权重和偏差),可以通过 nn.Module.load_state_dict() 方法只要 nn.Module 的两个实例共享相同的形状。所以你得到了预训练的权重,它们完全脱离了预训练模型。然后你可以摆脱预训练模型,因为它的内存可能相当大。

del resnet50

我提交了对其他答案的潜在改进,但为了解决您遇到的错误,我也在这里回答。如果代码在您编辑之前运行,并且您尝试更改的图层与前一个图层的形状相同,那么我的猜测是它可能与通过创建 resnet50 对象形成的计算图有关。我会推荐我在对其他答案的编辑中提到的方法,但我会在这里再次声明(注意,这假设您保持代码原样):

# instantiate you encoder (repeat these steps with the decoder as well)
enc = EncoderCell()
# get the pretrained model
resnet = models.resnet50(pretrained=True)
# load the state dict into the regular conv layer
enc.conv.load_state_dict(resnet50.layer4.state_dict())

这应该将来自 resnet50 模型的预训练权重和偏差加载到您的转换层,这可以对解码器转换层完成,只要它们都共享相同的形状。

要对不匹配错误进行更多测试,我建议在模型的 forward() 方法中使用调试器或打印语句,以查看应用每一层后张量的形状,就像这样

def forward(self, input, hidden1, hidden2, hidden3, hidden4):
    print(x.size())
    x = self.conv1(input)
    print(x.size())
    hidden1 = self.rnn1(x, hidden1)
    x = hidden1[0]
    x = F.pixel_shuffle(x, 2)

    hidden2 = self.rnn2(x, hidden2)
    x = hidden2[0]
    x = F.pixel_shuffle(x, 2)

    hidden3 = self.rnn3(x, hidden3)
    x = hidden3[0]
    x = F.pixel_shuffle(x, 2)

    hidden4 = self.rnn4(x, hidden4)
    x = hidden4[0]
    x = F.pixel_shuffle(x, 2)

    x = F.tanh(self.conv2(x)) / 2
    return x, hidden1, hidden2, hidden3, hidden4

当然,您可以将 print 语句放在 forward 方法中的任何其他位置。我也强烈推荐调试器; pycharm 使这变得非常容易,并且还可以在它提供的 python 控制台旁边以科学模式轻松查看变量的状态。在变量通过某些层(如卷积层)后,可能值得寻找计算变量大小的方法。这是很好理解的,并且存在根据初始大小、过滤器大小、步幅宽度和填充计算维度大小的公式。