自定义层导致 pytorch 中的批次尺寸不匹配。如何解决这个问题?
Custom layer caused the batch dimension mismatch in pytorch. How to fix this problem?
我尝试训练 GCN model.I 定义了我需要的自定义层。但是,当我进行一些批量训练时,它会导致一些维度不匹配。
代码如下:
import math
import numpy as np
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
# =============================================================================
# model define
# =============================================================================
class GraphConvolution(Module):
"""
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, bias=True):
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.FloatTensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input, adj):
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
if self.bias is not None:
return output + self.bias
else:
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
class GCN(nn.Module):
def __init__(self, nfeat, nhid, nclass):
super(GCN, self).__init__()
self.gc1 = GraphConvolution(nfeat, nhid)
self.gc2 = GraphConvolution(nhid, nclass)
self.linear = nn.Linear(nclass, 1)
# self.dropout = dropout
def forward(self, x, adj):
x = F.relu(self.gc1(x, adj))
# x = F.dropout(x, self.dropout, training=self.training)
x = F.relu(self.gc2(x, adj))
x = self.linear(x)
return x
def train(dataloader, model, loss_fn, optimizer,adj):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X,adj)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn,adj):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X,adj)
test_loss += loss_fn(pred, y).item()
# correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
# correct /= size
# Accuracy: {(100*correct):>0.1f}%,
print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")
当我 运行 代码时:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = GCN(1,1,1).to(device)
print(model)
# model(X).shape
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
epochs = 10
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer,Adjacency_matrix)
test(test_dataloader, model, loss_fn,Adjacency_matrix)
print("Done!")
我收到错误:
当我仔细查看时,我发现当我降低批量大小的维度时模型运行良好。我需要怎么做才能告诉模型这个维度是不需要计算的批量大小?
您看到的错误是由于您试图通过 2D 权重对 3d 张量(您的输入)进行矩阵化。
要解决这个问题,您可以简单地重塑数据,因为我们在执行 matmuls 时只真正关心最后一个 dim:
def forward(self, input, adj):
b_size = input.size(0)
input = input.view(-1, input.shape[-1])
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
output = output.view(b_size,-1,output.shape[-1])
if self.bias is not None:
return output + self.bias
else:
return output
我尝试训练 GCN model.I 定义了我需要的自定义层。但是,当我进行一些批量训练时,它会导致一些维度不匹配。 代码如下:
import math
import numpy as np
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
# =============================================================================
# model define
# =============================================================================
class GraphConvolution(Module):
"""
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, bias=True):
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.FloatTensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input, adj):
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
if self.bias is not None:
return output + self.bias
else:
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
class GCN(nn.Module):
def __init__(self, nfeat, nhid, nclass):
super(GCN, self).__init__()
self.gc1 = GraphConvolution(nfeat, nhid)
self.gc2 = GraphConvolution(nhid, nclass)
self.linear = nn.Linear(nclass, 1)
# self.dropout = dropout
def forward(self, x, adj):
x = F.relu(self.gc1(x, adj))
# x = F.dropout(x, self.dropout, training=self.training)
x = F.relu(self.gc2(x, adj))
x = self.linear(x)
return x
def train(dataloader, model, loss_fn, optimizer,adj):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X,adj)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn,adj):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X,adj)
test_loss += loss_fn(pred, y).item()
# correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
# correct /= size
# Accuracy: {(100*correct):>0.1f}%,
print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")
当我 运行 代码时:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = GCN(1,1,1).to(device)
print(model)
# model(X).shape
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
epochs = 10
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer,Adjacency_matrix)
test(test_dataloader, model, loss_fn,Adjacency_matrix)
print("Done!")
我收到错误:
当我仔细查看时,我发现当我降低批量大小的维度时模型运行良好。我需要怎么做才能告诉模型这个维度是不需要计算的批量大小?
您看到的错误是由于您试图通过 2D 权重对 3d 张量(您的输入)进行矩阵化。
要解决这个问题,您可以简单地重塑数据,因为我们在执行 matmuls 时只真正关心最后一个 dim:
def forward(self, input, adj):
b_size = input.size(0)
input = input.view(-1, input.shape[-1])
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
output = output.view(b_size,-1,output.shape[-1])
if self.bias is not None:
return output + self.bias
else:
return output