PyTorch nn.CrossEntropyLoss IndexError: Target 2 is out of bounds
PyTorch nn.CrossEntropyLoss IndexError: Target 2 is out of bounds
我正在使用 bert 创建一个简单的 2 class 情感 classifier,但我收到与输出和标签大小相关的错误。我无法弄清楚我做错了什么。以下是所需的代码片段。
我的自定义数据集class:
class AmazonReviewsDataset(torch.utils.data.Dataset):
def __init__(self, df):
self.df = df
self.maxlen = 256
self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
def __len__(self):
return len(self.df)
def __getitem__(self, index):
review = self.df['reviews'].iloc[index].split()
review = ' '.join(review)
sentiment = int(self.df['sentiment'].iloc[index])
encodings = self.tokenizer.encode_plus(
review,
add_special_tokens=True,
max_length=self.maxlen,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return {
'input_ids': encodings.input_ids.flatten(),
'attention_mask': encodings.attention_mask.flatten(),
'labels': torch.tensor(sentiment, dtype=torch.long)
}
dataloader
的输出:
for batch in train_loader:
print(batch['input_ids'].shape)
print(batch['attention_mask'].shape)
print(batch['labels'])
print(batch['labels'].shape)
break
torch.Size([32, 256])
torch.Size([32, 256])
tensor([2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2,
2, 2, 2, 2, 2, 1, 1, 2])
torch.Size([32])
我的nn
:
criterion = nn.CrossEntropyLoss().to(device)
class SentimentClassifier(nn.Module):
def __init__(self):
super(SentimentClassifier, self).__init__()
self.distilbert = DistilBertModel.from_pretrained("distilbert-base-uncased")
self.drop0 = nn.Dropout(0.25)
self.linear1 = nn.Linear(3072, 512)
self.relu1 = nn.ReLU()
self.drop1 = nn.Dropout(0.25)
self.linear2 = nn.Linear(512, 2)
self.relu2 = nn.ReLU()
def forward(self, input_ids, attention_mask):
outputs = self.distilbert(input_ids, attention_mask)
last_hidden_state = outputs[0]
pooled_output = torch.cat(tuple([last_hidden_state[:, i] for i in [-4, -3, -2, -1]]), dim=-1)
x = self.drop0(pooled_output)
x = self.relu1(self.linear1(x))
x = self.drop1(x)
x = self.relu2(self.linear2(x))
return x
火车循环:
for batch in loop:
optimizer.zero_grad()
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
output = model(input_ids, attention_mask)
print(output.size(), labels.size())
loss = criterion(output, labels) # ERROR
loss.backward()
optimizer.step()
错误:
torch.Size([32, 2]) torch.Size([32])
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-6268781f396e> in <module>()
12 print(output.size(), labels.size())
13 # output_class = torch.argmax(results, dim=1)
---> 14 loss = criterion(output, labels)
15 train_loss += loss
16 loss.backward()
2 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
1119 def forward(self, input: Tensor, target: Tensor) -> Tensor:
1120 return F.cross_entropy(input, target, weight=self.weight,
-> 1121 ignore_index=self.ignore_index, reduction=self.reduction)
1122
1123
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2822 if size_average is not None or reduce is not None:
2823 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2824 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2825
2826
IndexError: Target 2 is out of bounds.
我读了一篇教程,上面说,在应用 nn.CrossEntropyLoss
时不要使用 softmax
,因为我有 2 个 class。谁能指导我有什么问题!谢谢!
您有两个 class,这意味着最大目标标签是 1
而不是 2
,因为 class 是从 0
索引的。您基本上必须将 1
减去 labels
张量,这样 class n°1 被分配值 0
,class n°2 值 1
.
反过来,您打印的批次标签如下所示:
tensor([1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 0, 0, 1])
我正在使用 bert 创建一个简单的 2 class 情感 classifier,但我收到与输出和标签大小相关的错误。我无法弄清楚我做错了什么。以下是所需的代码片段。
我的自定义数据集class:
class AmazonReviewsDataset(torch.utils.data.Dataset):
def __init__(self, df):
self.df = df
self.maxlen = 256
self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
def __len__(self):
return len(self.df)
def __getitem__(self, index):
review = self.df['reviews'].iloc[index].split()
review = ' '.join(review)
sentiment = int(self.df['sentiment'].iloc[index])
encodings = self.tokenizer.encode_plus(
review,
add_special_tokens=True,
max_length=self.maxlen,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return {
'input_ids': encodings.input_ids.flatten(),
'attention_mask': encodings.attention_mask.flatten(),
'labels': torch.tensor(sentiment, dtype=torch.long)
}
dataloader
的输出:
for batch in train_loader:
print(batch['input_ids'].shape)
print(batch['attention_mask'].shape)
print(batch['labels'])
print(batch['labels'].shape)
break
torch.Size([32, 256])
torch.Size([32, 256])
tensor([2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2,
2, 2, 2, 2, 2, 1, 1, 2])
torch.Size([32])
我的nn
:
criterion = nn.CrossEntropyLoss().to(device)
class SentimentClassifier(nn.Module):
def __init__(self):
super(SentimentClassifier, self).__init__()
self.distilbert = DistilBertModel.from_pretrained("distilbert-base-uncased")
self.drop0 = nn.Dropout(0.25)
self.linear1 = nn.Linear(3072, 512)
self.relu1 = nn.ReLU()
self.drop1 = nn.Dropout(0.25)
self.linear2 = nn.Linear(512, 2)
self.relu2 = nn.ReLU()
def forward(self, input_ids, attention_mask):
outputs = self.distilbert(input_ids, attention_mask)
last_hidden_state = outputs[0]
pooled_output = torch.cat(tuple([last_hidden_state[:, i] for i in [-4, -3, -2, -1]]), dim=-1)
x = self.drop0(pooled_output)
x = self.relu1(self.linear1(x))
x = self.drop1(x)
x = self.relu2(self.linear2(x))
return x
火车循环:
for batch in loop:
optimizer.zero_grad()
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
output = model(input_ids, attention_mask)
print(output.size(), labels.size())
loss = criterion(output, labels) # ERROR
loss.backward()
optimizer.step()
错误:
torch.Size([32, 2]) torch.Size([32])
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-6268781f396e> in <module>()
12 print(output.size(), labels.size())
13 # output_class = torch.argmax(results, dim=1)
---> 14 loss = criterion(output, labels)
15 train_loss += loss
16 loss.backward()
2 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
1119 def forward(self, input: Tensor, target: Tensor) -> Tensor:
1120 return F.cross_entropy(input, target, weight=self.weight,
-> 1121 ignore_index=self.ignore_index, reduction=self.reduction)
1122
1123
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2822 if size_average is not None or reduce is not None:
2823 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2824 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2825
2826
IndexError: Target 2 is out of bounds.
我读了一篇教程,上面说,在应用 nn.CrossEntropyLoss
时不要使用 softmax
,因为我有 2 个 class。谁能指导我有什么问题!谢谢!
您有两个 class,这意味着最大目标标签是 1
而不是 2
,因为 class 是从 0
索引的。您基本上必须将 1
减去 labels
张量,这样 class n°1 被分配值 0
,class n°2 值 1
.
反过来,您打印的批次标签如下所示:
tensor([1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 0, 0, 1])