我该如何解决这个 pytorch 两个设备错误
How can I solve this pytorch two devices error
我 运行 遇到了 PyTorch 的问题:
预期所有张量都在同一设备上,但发现至少有两个设备,cpu 和 cuda:0! (在方法 wrapper_addmm 中检查参数 mat1 的参数时)
model = nn.Sequential(
nn.Linear(622, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 5),
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
train_loader = Data.DataLoader(
dataset=train_dataset,
batch_size=32,
shuffle=True,
num_workers=0,
)
test_loader = Data.DataLoader(
dataset=test_dataset,
batch_size=100,
shuffle=True,
num_workers=0,
)
best_acc = 0
best_model = model.cpu().state_dict().copy()
# train_acc = 0
# test_acc = 0
for epoch in range(20):
for step, (batch_x, batch_y) in enumerate(train_loader):
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
print(batch_x)
print(batch_x.device, 0)
out = model(batch_x.to(device)).cuda()
print(out.device, 1)
loss = loss_fn(out, batch_y.long())
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_acc = np.mean((torch.argmax(out, 1) == batch_y).cpu().numpy())
with torch.no_grad():
for batch_x, batch_y in test_loader:
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
print(batch_x.device, 2)
out = model(batch_x)
print(batch_x.device, 3)
test_acc = np.mean((torch.argmax(out, 1) == batch_y).cpu().numpy())
if test_acc > best_acc:
best_acc = test_acc
best_model = model.cpu().state_dict().copy()
谁能帮忙解释一下,我已经研究了一整天....
请注意,.to()
在应用于 nn.Module
和 torch.tensor
时具有不同的行为:.
在您的代码中,您将模型移动到 CPU:
best_model = model.cpu().state_dict().copy()
确保在将模型移至 cpu 后将模型移回 device
。
我 运行 遇到了 PyTorch 的问题: 预期所有张量都在同一设备上,但发现至少有两个设备,cpu 和 cuda:0! (在方法 wrapper_addmm 中检查参数 mat1 的参数时)
model = nn.Sequential(
nn.Linear(622, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 5),
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
train_loader = Data.DataLoader(
dataset=train_dataset,
batch_size=32,
shuffle=True,
num_workers=0,
)
test_loader = Data.DataLoader(
dataset=test_dataset,
batch_size=100,
shuffle=True,
num_workers=0,
)
best_acc = 0
best_model = model.cpu().state_dict().copy()
# train_acc = 0
# test_acc = 0
for epoch in range(20):
for step, (batch_x, batch_y) in enumerate(train_loader):
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
print(batch_x)
print(batch_x.device, 0)
out = model(batch_x.to(device)).cuda()
print(out.device, 1)
loss = loss_fn(out, batch_y.long())
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_acc = np.mean((torch.argmax(out, 1) == batch_y).cpu().numpy())
with torch.no_grad():
for batch_x, batch_y in test_loader:
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
print(batch_x.device, 2)
out = model(batch_x)
print(batch_x.device, 3)
test_acc = np.mean((torch.argmax(out, 1) == batch_y).cpu().numpy())
if test_acc > best_acc:
best_acc = test_acc
best_model = model.cpu().state_dict().copy()
谁能帮忙解释一下,我已经研究了一整天....
请注意,.to()
在应用于 nn.Module
和 torch.tensor
时具有不同的行为:
在您的代码中,您将模型移动到 CPU:
best_model = model.cpu().state_dict().copy()
确保在将模型移至 cpu 后将模型移回 device
。