GRU 中用于分类的不匹配暗淡
Mismatching dims in GRU for classification
我正在尝试完成一项任务并编写简单的 RNN。这是 class:
class RNNBaseline(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout, pad_idx):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
self.rnn = nn.GRU(input_size=embedding_dim, hidden_size=hidden_dim) #RNN(embedding_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, output_dim) # YOUR CODE GOES HERE
self.dropout = nn.Dropout(dropout)
def forward(self, text, text_lengths, hidden = None):
#text = [sent len, batch size]
embedded = self.embedding(text)
#embedded = [sent len, batch size, emb dim]
#pack sequence
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
# cell arg for LSTM, remove for GRU
# packed_output, (hidden, cell) = self.rnn(packed_embedded)
# unpack sequence
# output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
#output = [sent len, batch size, hid dim * num directions]
#output over padding tokens are zero tensors
#hidden = [num layers * num directions, batch size, hid dim]
#cell = [num layers * num directions, batch size, hid dim]
#concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
#and apply dropout
output, hidden = self.rnn(packed_embedded, hidden)
#hidden = None # concatenate
#hidden = [batch size, hid dim * num directions] or [batch_size, hid dim * num directions]
return self.fc(hidden)
目前我没有使用 LSTM 或尝试进行双向 RNN,我只希望简单的 GRU 能够无错误地进行训练。这是训练函数:
import numpy as np
min_loss = np.inf
cur_patience = 0
for epoch in range(1, max_epochs + 1):
train_loss = 0.0
model.train()
pbar = tqdm(enumerate(train_iter), total=len(train_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, ((text, txt_len), label) in pbar:
#YOUR CODE GOES HERE
opt.zero_grad()
input = text.to(device)
labels = label.to(device)
output = model(input, txt_len.type(torch.int64).cpu())
train_loss = loss_func(output, labels)
train_loss.backward()
opt.step()
train_loss /= len(train_iter)
val_loss = 0.0
model.eval()
pbar = tqdm(enumerate(valid_iter), total=len(valid_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, ((text, txt_len), label) in pbar:
# YOUR CODE GOES HERE
input = text.to(device)
labels = label.to(device)
output = model(input, txt_len.type(torch.int64).cpu())
val_loss = loss_func(output, labels)
val_loss /= len(valid_iter)
if val_loss < min_loss:
min_loss = val_loss
best_model = model.state_dict()
else:
cur_patience += 1
if cur_patience == patience:
cur_patience = 0
break
print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, train_loss, val_loss))
model.load_state_dict(best_model)
还有一些变量:
vocab_size = len(TEXT.vocab)
emb_dim = 100
hidden_dim = 256
output_dim = 1
n_layers = 2
bidirectional = False
dropout = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
patience=3
opt = torch.optim.Adam(model.parameters())
loss_func = nn.BCEWithLogitsLoss()
max_epochs = 1
但是我得到这个错误:
ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([1, 64, 1]))
...在这一行:
---> 18 train_loss = loss_func(output, labels)
我做错了什么?
nn.BCEWithLogitsLoss
期望 outputs
和 targets
(或者在您的情况下 labels
)的大小为 [b,d]
,其中 b
是批量大小和 d
是 classes 的数量(或您要预测的任何维度)。目前,您的输出大小为 [b,d,1]
,您的目标大小为 [d]
。需要进行两个修复,而且都非常简单:
为您的目标添加一个批次维度 (labels
)。这是使用 returns 个数据元素的数据集时的常见错误,因为它通常不会添加批次维度。将您的数据集 class 封装在 pytorch dataloader
中,但如果您不想这样做,只需添加一个 unsqueeze()
操作即可。请注意,unsqueeze 操作仅适用于批量大小为 1 的情况,否则使用 dataloader
可能是更好的选择。
您的输出有一个空的第 3 维,可以通过 squeeze()
操作轻松将其展平。 unsqueeze 和 squeeze 都是可区分的,因此不应该出现反向传播问题。
... code before here
for it, ((text, txt_len), label) in pbar:
# YOUR CODE GOES HERE
input = text.to(device)
labels = label.to(device).unsqueeze(0) # added unsqueeze operation
output = model(input, txt_len.type(torch.int64).cpu())
output = output.squeeze(-1) # added squeeze on last dim
val_loss = loss_func(output, labels)
... code after here
我正在尝试完成一项任务并编写简单的 RNN。这是 class:
class RNNBaseline(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout, pad_idx):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
self.rnn = nn.GRU(input_size=embedding_dim, hidden_size=hidden_dim) #RNN(embedding_dim, hidden_dim)
self.fc = nn.Linear(hidden_dim, output_dim) # YOUR CODE GOES HERE
self.dropout = nn.Dropout(dropout)
def forward(self, text, text_lengths, hidden = None):
#text = [sent len, batch size]
embedded = self.embedding(text)
#embedded = [sent len, batch size, emb dim]
#pack sequence
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
# cell arg for LSTM, remove for GRU
# packed_output, (hidden, cell) = self.rnn(packed_embedded)
# unpack sequence
# output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
#output = [sent len, batch size, hid dim * num directions]
#output over padding tokens are zero tensors
#hidden = [num layers * num directions, batch size, hid dim]
#cell = [num layers * num directions, batch size, hid dim]
#concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
#and apply dropout
output, hidden = self.rnn(packed_embedded, hidden)
#hidden = None # concatenate
#hidden = [batch size, hid dim * num directions] or [batch_size, hid dim * num directions]
return self.fc(hidden)
目前我没有使用 LSTM 或尝试进行双向 RNN,我只希望简单的 GRU 能够无错误地进行训练。这是训练函数:
import numpy as np
min_loss = np.inf
cur_patience = 0
for epoch in range(1, max_epochs + 1):
train_loss = 0.0
model.train()
pbar = tqdm(enumerate(train_iter), total=len(train_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, ((text, txt_len), label) in pbar:
#YOUR CODE GOES HERE
opt.zero_grad()
input = text.to(device)
labels = label.to(device)
output = model(input, txt_len.type(torch.int64).cpu())
train_loss = loss_func(output, labels)
train_loss.backward()
opt.step()
train_loss /= len(train_iter)
val_loss = 0.0
model.eval()
pbar = tqdm(enumerate(valid_iter), total=len(valid_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, ((text, txt_len), label) in pbar:
# YOUR CODE GOES HERE
input = text.to(device)
labels = label.to(device)
output = model(input, txt_len.type(torch.int64).cpu())
val_loss = loss_func(output, labels)
val_loss /= len(valid_iter)
if val_loss < min_loss:
min_loss = val_loss
best_model = model.state_dict()
else:
cur_patience += 1
if cur_patience == patience:
cur_patience = 0
break
print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, train_loss, val_loss))
model.load_state_dict(best_model)
还有一些变量:
vocab_size = len(TEXT.vocab)
emb_dim = 100
hidden_dim = 256
output_dim = 1
n_layers = 2
bidirectional = False
dropout = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
patience=3
opt = torch.optim.Adam(model.parameters())
loss_func = nn.BCEWithLogitsLoss()
max_epochs = 1
但是我得到这个错误:
ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([1, 64, 1]))
...在这一行:
---> 18 train_loss = loss_func(output, labels)
我做错了什么?
nn.BCEWithLogitsLoss
期望 outputs
和 targets
(或者在您的情况下 labels
)的大小为 [b,d]
,其中 b
是批量大小和 d
是 classes 的数量(或您要预测的任何维度)。目前,您的输出大小为 [b,d,1]
,您的目标大小为 [d]
。需要进行两个修复,而且都非常简单:
为您的目标添加一个批次维度 (
labels
)。这是使用 returns 个数据元素的数据集时的常见错误,因为它通常不会添加批次维度。将您的数据集 class 封装在 pytorchdataloader
中,但如果您不想这样做,只需添加一个unsqueeze()
操作即可。请注意,unsqueeze 操作仅适用于批量大小为 1 的情况,否则使用dataloader
可能是更好的选择。您的输出有一个空的第 3 维,可以通过
squeeze()
操作轻松将其展平。 unsqueeze 和 squeeze 都是可区分的,因此不应该出现反向传播问题。
... code before here
for it, ((text, txt_len), label) in pbar:
# YOUR CODE GOES HERE
input = text.to(device)
labels = label.to(device).unsqueeze(0) # added unsqueeze operation
output = model(input, txt_len.type(torch.int64).cpu())
output = output.squeeze(-1) # added squeeze on last dim
val_loss = loss_func(output, labels)
... code after here