自定义层导致 pytorch 中的批次尺寸不匹配。如何解决这个问题?

Custom layer caused the batch dimension mismatch in pytorch. How to fix this problem?

我尝试训练 GCN model.I 定义了我需要的自定义层。但是,当我进行一些批量训练时,它会导致一些维度不匹配。 代码如下:

import math
import numpy as np
import torch

import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

# =============================================================================
# model define
# =============================================================================
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'
               
          
class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.linear = nn.Linear(nclass, 1)
        # self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        # x = F.dropout(x, self.dropout, training=self.training)
        x =  F.relu(self.gc2(x, adj))
        x = self.linear(x)
        return x



def train(dataloader, model, loss_fn, optimizer,adj):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X,adj)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn,adj):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X,adj)
            test_loss += loss_fn(pred, y).item()
            # correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    # correct /= size
    # Accuracy: {(100*correct):>0.1f}%,
    print(f"Test Error: \n  Avg loss: {test_loss:>8f} \n")
    
    

当我 运行 代码时:

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

model = GCN(1,1,1).to(device)
print(model)
# model(X).shape

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

    
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer,Adjacency_matrix)
    test(test_dataloader, model, loss_fn,Adjacency_matrix)
print("Done!")

我收到错误:

当我仔细查看时,我发现当我降低批量大小的维度时模型运行良好。我需要怎么做才能告诉模型这个维度是不需要计算的批量大小?

您看到的错误是由于您试图通过 2D 权重对 3d 张量(您的输入)进行矩阵化。

要解决这个问题,您可以简单地重塑数据,因为我们在执行 matmuls 时只真正关心最后一个 dim:

def forward(self, input, adj):
    b_size = input.size(0)
    input = input.view(-1, input.shape[-1])
    support = torch.mm(input, self.weight)
    output = torch.spmm(adj, support)
    output = output.view(b_size,-1,output.shape[-1])
    if self.bias is not None:
        return output + self.bias
    else:
        return output