RuntimeError: Expected object of device type cuda but got device type cpu for argument #3 'index' in call to _th_index_select site:stackoverflow.com
RuntimeError: Expected object of device type cuda but got device type cpu for argument #3 'index' in call to _th_index_select site:stackoverflow.com
我正在 SMILE 数据集上使用 bert。我写了下面的代码,你能指导我哪里出错了吗?
我已经编写了正确评估的训练代码,但是当我尝试 运行 评估代码以进行验证时,它给出了错误。我试图将参数直接传递给cuda。我仍然面临这个问题
'''
def evaluate(dataloader_val):
print("in evaluate")
model.eval()
loss_val_total = 0
predictions, true_value = [],[]
for batch in dataloader_val:
print("in for loop of dataloader")
barch = tuple(b.to(device) for b in batch)
inputs = {
'input_ids': batch[0],
'attention_mask': batch[1],
'labels' : batch[2],
}
with torch.no_grad():
outputs = model(**inputs)
loss = outputs[0]
logits = outputs[1]
loss_val_total += loss.item()
print("before logit")
logits = logits.to(device)
print("in the for batch evaluate: ",logits)
label_ids = inputs['labels'].to(device)
true_vals.append(label_ids)
loss_val_avg = loss_val_total/len(dataloader_val)
predictions = np.concatenate(predictions, axis = 0)
true_vals = np.concatenate(true_vals,axis = 0)
return loss_val_avg, predictions, true_vals
'''
另一个函数是
'''
for epoch in tqdm(range(1, epochs+1)):
model.train()
loss_train_total = 0
progress_bar = tqdm(dataloader_train,
desc = 'Epoch {:1d}'.format(epoch),
leave = False,
disable = False)
for batch in progress_bar:
model.zero_grad()
batch = tuple(b.to(device) for b in batch)
inputs = {
'input_ids' : batch[0],
'attention_mask' : batch[1],
'labels' : batch[2]
}
outputs = model(**inputs)
loss = outputs[0]
loss_train_total += loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
progress_bar.set_postfix({'training_loss' : '{:.3f}'.format(loss.item()/len(batch))})
torch.save(model.state_dict(), f'/content/drive/My Drive/Bert/Coursera/SMILE/Bert_ft_epoch{epoch}.model')
tqdm.write(f'\n Epoch {epoch}')
loss_train_avg = loss_train_total / len(dataloader_train)
tqdm.write(f'Training Loss: {loss_train_avg}')
val_loss, predictions, true_vals = evaluate(dataloader_val)
val_f1 = f1_score_func(predictions, true_vals)
tqdm.write(f'Validation loss : {val_loss}')
tqdm.write(f'F1 score(weighted): {val_f1}')
'''
你的评价函数有错别字:
barch = tuple(b.to(device) for b in batch)
您将 gpu 数据分配给 barch
而不是 batch
。
我正在 SMILE 数据集上使用 bert。我写了下面的代码,你能指导我哪里出错了吗? 我已经编写了正确评估的训练代码,但是当我尝试 运行 评估代码以进行验证时,它给出了错误。我试图将参数直接传递给cuda。我仍然面临这个问题
'''
def evaluate(dataloader_val):
print("in evaluate")
model.eval()
loss_val_total = 0
predictions, true_value = [],[]
for batch in dataloader_val:
print("in for loop of dataloader")
barch = tuple(b.to(device) for b in batch)
inputs = {
'input_ids': batch[0],
'attention_mask': batch[1],
'labels' : batch[2],
}
with torch.no_grad():
outputs = model(**inputs)
loss = outputs[0]
logits = outputs[1]
loss_val_total += loss.item()
print("before logit")
logits = logits.to(device)
print("in the for batch evaluate: ",logits)
label_ids = inputs['labels'].to(device)
true_vals.append(label_ids)
loss_val_avg = loss_val_total/len(dataloader_val)
predictions = np.concatenate(predictions, axis = 0)
true_vals = np.concatenate(true_vals,axis = 0)
return loss_val_avg, predictions, true_vals
'''
另一个函数是
'''
for epoch in tqdm(range(1, epochs+1)):
model.train()
loss_train_total = 0
progress_bar = tqdm(dataloader_train,
desc = 'Epoch {:1d}'.format(epoch),
leave = False,
disable = False)
for batch in progress_bar:
model.zero_grad()
batch = tuple(b.to(device) for b in batch)
inputs = {
'input_ids' : batch[0],
'attention_mask' : batch[1],
'labels' : batch[2]
}
outputs = model(**inputs)
loss = outputs[0]
loss_train_total += loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
progress_bar.set_postfix({'training_loss' : '{:.3f}'.format(loss.item()/len(batch))})
torch.save(model.state_dict(), f'/content/drive/My Drive/Bert/Coursera/SMILE/Bert_ft_epoch{epoch}.model')
tqdm.write(f'\n Epoch {epoch}')
loss_train_avg = loss_train_total / len(dataloader_train)
tqdm.write(f'Training Loss: {loss_train_avg}')
val_loss, predictions, true_vals = evaluate(dataloader_val)
val_f1 = f1_score_func(predictions, true_vals)
tqdm.write(f'Validation loss : {val_loss}')
tqdm.write(f'F1 score(weighted): {val_f1}')
'''
你的评价函数有错别字:
barch = tuple(b.to(device) for b in batch)
您将 gpu 数据分配给 barch
而不是 batch
。