RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 512, 3, 3], but got 2-dimensional input of size [32, 2048] instead
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 512, 3, 3], but got 2-dimensional input of size [32, 2048] instead
我想用 PyTorch 训练一个基于预训练网络的分类器。我需要做的是采用预训练模型(我尝试使用 ResNet50),在最后添加一些层(我需要这样做,因为项目规范要求这样做)并仅训练我添加的那些层。我试过这个:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import models
from guitar_dataset import GuitarDataset
from tqdm import tqdm
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((200, 200))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset(f"../chords_data/cropped/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)),
len(dataset) - int(0.8 * len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
testset = GuitarDataset(f"../chords_data/cropped/test", transform=transformations)
test_loader = DataLoader(dataset=testset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Sequential(
nn.Conv2d(512, 64, (3, 3)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 64, (3, 3)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Dropout(0.5),
nn.Flatten(),
nn.Linear(147456, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 64),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(64, 7)
)
model.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=learning_rate)
PATH = f"./saved_models/mynet.pth"
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
# predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
predictions = scores.argmax(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs + 1):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
# if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
if epoch == num_epochs:
break
for imgs, labels in loop:
labels = torch.nn.functional.one_hot(labels, num_classes=7).float()
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch + 1}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
torch.save(model.state_dict(), PATH)
def test():
model.load_state_dict(torch.load(PATH))
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data
# calculate outputs by running images through the network
outputs = model(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the test images: %d %%' % (
100 * correct / total))
if __name__ == "__main__":
print(f"Working on {data_type}")
train()
test()
但是我一开始训练阶段就得到了标题中的错误。下载的模型不应该是ready-to-use吗?
完整堆栈跟踪:
Traceback (most recent call last):
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 139, in <module>
train()
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 99, in train
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 83, in check_accuracy
scores = model(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torchvision/models/resnet.py", line 249, in forward
return self._forward_impl(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torchvision/models/resnet.py", line 244, in _forward_impl
x = self.fc(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/container.py", line 119, in forward
input = module(input)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 395, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 512, 3, 3], but got 2-dimensional input of size [32, 2048] instead
你不能用卷积网络替换 resnet50 的 fc
。 resnet 的特征提取器的输出是一个 CNN,它输出一个 2048 长的张量,因此它后面的层应该是完全连接的层。
你的网络设计有误。
您不应该在 Resnet50
的特征提取器的末尾添加卷积层。放一些Linear
层
model.fc = nn.Sequential(
# It has to start from 2048
nn.Linear(2048, 1024), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 256), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 7) # 7 is number of classes
)
model.fc
必须从 2048 个单位开始,因为这是 ResNet50
的特征提取器产生的结果。
错误基本上是说它期待 4D 输入(因为你的 Conv2D 层在 model.fc
开始)但得到 (batch_size, 2048)
因为那是 ResNet50 产生的。
我想用 PyTorch 训练一个基于预训练网络的分类器。我需要做的是采用预训练模型(我尝试使用 ResNet50),在最后添加一些层(我需要这样做,因为项目规范要求这样做)并仅训练我添加的那些层。我试过这个:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import models
from guitar_dataset import GuitarDataset
from tqdm import tqdm
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((200, 200))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset(f"../chords_data/cropped/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)),
len(dataset) - int(0.8 * len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
testset = GuitarDataset(f"../chords_data/cropped/test", transform=transformations)
test_loader = DataLoader(dataset=testset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Sequential(
nn.Conv2d(512, 64, (3, 3)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 64, (3, 3)),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Dropout(0.5),
nn.Flatten(),
nn.Linear(147456, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 64),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(64, 7)
)
model.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=learning_rate)
PATH = f"./saved_models/mynet.pth"
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
# predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
predictions = scores.argmax(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs + 1):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
# if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
if epoch == num_epochs:
break
for imgs, labels in loop:
labels = torch.nn.functional.one_hot(labels, num_classes=7).float()
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch + 1}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
torch.save(model.state_dict(), PATH)
def test():
model.load_state_dict(torch.load(PATH))
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data
# calculate outputs by running images through the network
outputs = model(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the test images: %d %%' % (
100 * correct / total))
if __name__ == "__main__":
print(f"Working on {data_type}")
train()
test()
但是我一开始训练阶段就得到了标题中的错误。下载的模型不应该是ready-to-use吗?
完整堆栈跟踪:
Traceback (most recent call last):
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 139, in <module>
train()
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 99, in train
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
File "/home/deffo/Documents/Unimore/Magistrale/Computer Vision and Cognitive Systems/Guitar_Fingering_&_Chords_Recognition/ChordsClassification/train_ResNetChord.py", line 83, in check_accuracy
scores = model(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torchvision/models/resnet.py", line 249, in forward
return self._forward_impl(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torchvision/models/resnet.py", line 244, in _forward_impl
x = self.fc(x)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/container.py", line 119, in forward
input = module(input)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/deffo/anaconda3/envs/ComputerVision/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 395, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 512, 3, 3], but got 2-dimensional input of size [32, 2048] instead
你不能用卷积网络替换 resnet50 的 fc
。 resnet 的特征提取器的输出是一个 CNN,它输出一个 2048 长的张量,因此它后面的层应该是完全连接的层。
你的网络设计有误。
您不应该在 Resnet50
的特征提取器的末尾添加卷积层。放一些Linear
层
model.fc = nn.Sequential(
# It has to start from 2048
nn.Linear(2048, 1024), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 256), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(1024, 7) # 7 is number of classes
)
model.fc
必须从 2048 个单位开始,因为这是 ResNet50
的特征提取器产生的结果。
错误基本上是说它期待 4D 输入(因为你的 Conv2D 层在 model.fc
开始)但得到 (batch_size, 2048)
因为那是 ResNet50 产生的。