Pytorch 中的 LSTM:如何 add/change 序列长度维度?
LSTM in Pytorch: how to add/change sequence length dimension?
我在 pytorch 中 运行ning LSTM 但据我所知,它只采用序列长度 = 1。当我将序列长度重塑为 4 或其他数字时,我会收到不匹配的错误输入和目标的长度。如果我同时重塑输入和目标,那么模型会抱怨它不接受多目标标签。
我的训练数据集有66512行和16839列,目标中有3 categories/classes。我想使用批处理大小 200 和序列长度 4,即在一个序列中使用 4 行数据。
请告知如何调整我的模型 and/or 数据,以便能够 运行 为各种序列长度(例如 4)建模。
batch_size=200
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.astype(np.float32))
train_tensor = TensorDataset(train.unsqueeze(1), train_target)
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
print(train.shape)
print(train_target.shape)
torch.Size([66512, 16839])
torch.Size([66512])
import torch.nn as nn
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# Building LSTM
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
# Initialize cell state
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
out, (hn, cn) = self.lstm(x, (h0,c0))
# Index hidden state of last time step
out = self.fc(out[:, -1, :])
return out
input_dim = 16839
hidden_dim = 100
output_dim = 3
layer_dim = 1
batch_size = batch_size
num_epochs = 1
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
print(list(model.parameters())[i].size())
6
torch.Size([400, 16839])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([3, 100])
torch.Size([3])
for epoch in range(num_epochs):
for i, (train, train_target) in enumerate(train_loader):
# Load data as a torch tensor with gradient accumulation abilities
train = train.requires_grad_().to(device)
train_target = train_target.to(device)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
outputs = model(train)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, train_target)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
print('Epoch: {}. Loss: {}. Accuracy: {}'.format(epoch, np.around(loss.item(), 4), np.around(accuracy,4)))
您已设置 input_dim = 16839
,因此您的模型需要形状为 (batch_size, seq_len, 16839)
的输入。
您从中提取批次的 train_tensor
的形状为 (66512, 1, 16839)
。所以你的批次的形状是 (batch_size, 1, 16839)
。这是可行的,因为它满足了上述要求。
但是,如果您尝试重塑相同的训练张量以使 seq_len
= 4,您的 input_dim
维度将不再是 16839,因此将不符合模型的预期,这这就是为什么您会收到尺寸不匹配错误的原因。
这就是最终起作用的方法 - 将输入数据重塑为 4 个序列,每个序列有一个目标值,为此我根据我的问题逻辑选择了目标序列中的最后一个值。现在看起来很容易,但当时非常棘手。发布的其余代码相同。
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].iloc[3::4].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.reshape(-1, 4, 16839).astype(np.float32))
train_tensor = TensorDataset(train, train_target)
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
print(train.shape)
print(train_target.shape)
torch.Size([16628, 4, 16839])
torch.Size([16628])
我在 pytorch 中 运行ning LSTM 但据我所知,它只采用序列长度 = 1。当我将序列长度重塑为 4 或其他数字时,我会收到不匹配的错误输入和目标的长度。如果我同时重塑输入和目标,那么模型会抱怨它不接受多目标标签。
我的训练数据集有66512行和16839列,目标中有3 categories/classes。我想使用批处理大小 200 和序列长度 4,即在一个序列中使用 4 行数据。
请告知如何调整我的模型 and/or 数据,以便能够 运行 为各种序列长度(例如 4)建模。
batch_size=200
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.astype(np.float32))
train_tensor = TensorDataset(train.unsqueeze(1), train_target)
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
print(train.shape)
print(train_target.shape)
torch.Size([66512, 16839])
torch.Size([66512])
import torch.nn as nn
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# Building LSTM
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
# Initialize cell state
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().to(device)
out, (hn, cn) = self.lstm(x, (h0,c0))
# Index hidden state of last time step
out = self.fc(out[:, -1, :])
return out
input_dim = 16839
hidden_dim = 100
output_dim = 3
layer_dim = 1
batch_size = batch_size
num_epochs = 1
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(len(list(model.parameters())))
for i in range(len(list(model.parameters()))):
print(list(model.parameters())[i].size())
6
torch.Size([400, 16839])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([3, 100])
torch.Size([3])
for epoch in range(num_epochs):
for i, (train, train_target) in enumerate(train_loader):
# Load data as a torch tensor with gradient accumulation abilities
train = train.requires_grad_().to(device)
train_target = train_target.to(device)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
outputs = model(train)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, train_target)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
print('Epoch: {}. Loss: {}. Accuracy: {}'.format(epoch, np.around(loss.item(), 4), np.around(accuracy,4)))
您已设置 input_dim = 16839
,因此您的模型需要形状为 (batch_size, seq_len, 16839)
的输入。
您从中提取批次的 train_tensor
的形状为 (66512, 1, 16839)
。所以你的批次的形状是 (batch_size, 1, 16839)
。这是可行的,因为它满足了上述要求。
但是,如果您尝试重塑相同的训练张量以使 seq_len
= 4,您的 input_dim
维度将不再是 16839,因此将不符合模型的预期,这这就是为什么您会收到尺寸不匹配错误的原因。
这就是最终起作用的方法 - 将输入数据重塑为 4 个序列,每个序列有一个目标值,为此我根据我的问题逻辑选择了目标序列中的最后一个值。现在看起来很容易,但当时非常棘手。发布的其余代码相同。
train_target = torch.tensor(train_data[['Label1','Label2','Label3']].iloc[3::4].values.astype(np.float32))
train_target = np.argmax(train_target, axis=1)
train = torch.tensor(train_data.drop(['Label1','Label2','Label3'], axis = 1).values.reshape(-1, 4, 16839).astype(np.float32))
train_tensor = TensorDataset(train, train_target)
train_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
print(train.shape)
print(train_target.shape)
torch.Size([16628, 4, 16839])
torch.Size([16628])