PyTorch: RuntimeError: The size of tensor a (224) must match the size of tensor b (244) at non-singleton dimension 3

PyTorch: RuntimeError: The size of tensor a (224) must match the size of tensor b (244) at non-singleton dimension 3

我想创建和训练 AutoEncoder 以提取特征并将该特征用于聚类算法。现在我在计算损失时遇到错误。

RuntimeError: 张量 a (224) 的大小必须与张量 b (244) 在非单维维度 3 的大小相匹配

和一个警告

用户警告:使用的目标大小 (torch.Size([1, 3, 224, 244])) 不同于输入大小 (torch.Size([1 , 3, 224, 224]))。由于广播,这可能会导致不正确的结果。请确保它们的大小相同。

我正在使用 Pytorch。

谁能告诉我这有什么问题吗?在警告和错误中,输入和输出的大小相同,但表示不同。 输入和输出图像的总尺寸如下

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 16, 112, 112]             448
              ReLU-2         [-1, 16, 112, 112]               0
            Conv2d-3           [-1, 32, 56, 56]           4,640
              ReLU-4           [-1, 32, 56, 56]               0
            Conv2d-5           [-1, 64, 18, 18]         100,416
              ReLU-6           [-1, 64, 18, 18]               0
            Conv2d-7            [-1, 128, 3, 3]         401,536
              ReLU-8            [-1, 128, 3, 3]               0
            Conv2d-9            [-1, 256, 1, 1]         295,168
  ConvTranspose2d-10            [-1, 128, 3, 3]         295,040
             ReLU-11            [-1, 128, 3, 3]               0
  ConvTranspose2d-12           [-1, 64, 12, 12]         401,472
             ReLU-13           [-1, 64, 12, 12]               0
  ConvTranspose2d-14           [-1, 24, 28, 28]          75,288
             ReLU-15           [-1, 24, 28, 28]               0
  ConvTranspose2d-16           [-1, 16, 56, 56]           3,472
             ReLU-17           [-1, 16, 56, 56]               0
  ConvTranspose2d-18          [-1, 8, 111, 111]           1,160
             ReLU-19          [-1, 8, 111, 111]               0
  ConvTranspose2d-20          [-1, 3, 224, 224]             603
          Sigmoid-21          [-1, 3, 224, 224]               0
================================================================
Total params: 1,579,243
Trainable params: 1,579,243
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 9.94
Params size (MB): 6.02
Estimated Total Size (MB): 16.54
----------------------------------------------------------------
Min Value of input Image =  tensor(0.0627)
Max Value of input Image =  tensor(0.5098)
Input Image shape =  torch.Size([1, 3, 224, 244])
Output Image shape =  torch.Size([1, 3, 224, 224])

我的自动编码器 class 是

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
                      
            nn.Conv2d(3, 16, 3, stride=2, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(32, 64, 7, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(64, 128, 7, stride=5, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(128, 256, 3, stride=5, padding=1)  # b, 16, 10, 10
            
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, 3),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 7,stride=3, padding=1,output_padding=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 24, 7,stride=2, padding=1,output_padding=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(24, 16, 3, stride=2, padding=1,output_padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1),  # b, 1, 28, 28
            nn.ReLU(True),
            nn.ConvTranspose2d(8,3, 5, stride=2, padding=1,output_padding=1),  # b, 1, 28, 28
            nn.Sigmoid()
            
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

训练函数如下

dataset = DatasetLoader('E:/DAL/Dataset/Images', get_transform(train=True))

torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset, indices[-50:])

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=0)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0)


model = autoencoder().cuda()
summary(model, (3, 224, 224))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)

total_loss = 0
for epoch in range(num_epochs):
    for data in data_loader:
        # print(data)
        img = data
        print("Min Value of input Image = ",torch.min(img))
        print("Max Value of input Image = ",torch.max(img))        
        img = Variable(img).cuda()
        # ===================forward=====================
        output = model(img)
        print("Input Image shape = ",img.shape)
        print("Output Image shape = ",output.shape)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    total_loss += loss.data
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, total_loss))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './dc_img/image_{}.png'.format(epoch))

torch.save(model.state_dict(), './conv_autoencoder.pth')

数据加载器Class和转换函数如下

def get_transform(train):
    transforms = []    
   
    transforms.append(T.Resize((224,244)))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.RandomVerticalFlip(0.5))
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

class DatasetLoader(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(root)))
        

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx])
        
        img = Image.open(img_path).convert("RGB")   
        

        if self.transforms is not None:
            img = self.transforms(img)

        return img

    def __len__(self):
        return len(self.imgs)

我敢肯定你的 get_transform 函数有错别字:

transforms.append(T.Resize((224,244)))

您可能想将其大小调整为 (224, 224) 而不是 (224, 244)