LSTM 模型的问题
Problems with LSTM model
我尝试在PyTorch中实现LSTM模型,遇到这样的问题:loss doesn't reduce。
我的任务是:我有不同功能的会话。会话长度是固定的,等于 20。我的目标是预测最后一个会话是否被跳过。
我试图缩放输入特征,我试图将 target
传递给特征(也许提供的特征绝对没有信息,我认为这会导致过度拟合并且损失应该接近 0),但我的损失减少总是这样的:
print(X.shape)
#(82770, 20, 31) where 82770 is count of sessions, 20 is seq_len, 31 is count of features
print(y.shape)
#(82770, 20)
我还定义了 get_batches
函数。是的,我知道这个生成器中最后一批的问题
def get_batches(X, y, batch_size):
'''Create a generator that returns batches of size
batch_size x seq_length from arr.
'''
assert X.shape[0] == y.shape[0]
assert X.shape[1] == y.shape[1]
assert len(X.shape) == 3
assert len(y.shape) == 2
seq_len = X.shape[1]
n_batches = X.shape[0]//seq_len
for batch_number in range(n_batches):
#print(batch_number*batch_size, )
batch_x = X[batch_number*batch_size:(batch_number+1)*batch_size, :, :]
batch_y = y[batch_number*batch_size:(batch_number+1)*batch_size, :]
if batch_x.shape[0] == batch_size:
yield batch_x, batch_y
else:
print('batch_x shape: {}'.format(batch_x.shape))
break
这是我的 RNN
class BaseRNN(nn.Module):
def __init__(self, n_features, hidden_size, n_layers, drop_p=0.3, lr=0.001, last_items=10):
super(BaseRNN, self).__init__()
# constants
self.n_features = n_features
self.hidden_size = hidden_size
self.n_layers = n_layers
self.drop_p = drop_p
self.lr = lr
self.last_items = last_items
# layers
self.lstm = nn.LSTM(
n_features, n_hidden, n_layers,
dropout=drop_p, batch_first=True
)
self.dropout = nn.Dropout(self.drop_p)
self.linear_layer = nn.Linear(self.hidden_size, 1)
self.sigm = nn.Sigmoid()
def forward(self, x, hidden):
out, hidden = self.lstm(x, hidden)
batch_size = x.shape[0]
out = self.dropout(out)
out = out.contiguous().view(-1, self.hidden_size)
out = self.linear_layer(out)
out = self.sigm(out)
# use only last elements
out = out.view(batch_size, -1)
out = out[:, -1]
return out, hidden
def init_hidden(self, batch_size):
#initialize with zeros
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_size).zero_())
return hidden
这是我的训练函数:
def train(net, X, y,
n_epochs=10, batch_size=10, clip=5):
'''
pass
'''
n_features = X.shape[2]
seq_len = X.shape[1]
net.train()
opt = torch.optim.Adam(net.parameters(), lr=net.lr)
criterion = nn.BCELoss()
counter = 0
losses = []
for e in range(n_epochs):
h = net.init_hidden(batch_size)
for x, y in get_batches(X=X, y=y, batch_size=batch_size):
counter += 1
h = net.init_hidden(batch_size)
inputs, targets = torch.from_numpy(x).float(), torch.from_numpy(y.astype(int))
targets = targets[:,-net.last_items:].float().view(net.last_items*batch_size)
h = tuple([each.data for each in h])
net.zero_grad()
output, h = net(inputs, h)
loss = criterion(output.view(net.last_items*batch_size), targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(net.parameters(), clip)
opt.step()
return losses
运行 培训:
n_hidden = 100
n_layers = 1
n_features = X.shape[2]
net = BaseRNN(n_features, n_hidden, n_layers,
lr=0.01, drop_p=0.1, last_items=1)
losses = train(net, X, y, n_epochs=5, batch_size=1000, lr=0.001, clip=5)
plt.plot(losses)
完成所有这些步骤后,我得到了问题顶部的情节。我想我在某个地方犯了一个巨大的错误,因为我把目标变量放在了特征中,但仍然没有减少损失。
我哪里错了?
PS.How 生成示例数据?我将使用真实的 y
数据并添加一些噪音。
Y = np.array([[0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1],
[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1],
[0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1]])
print(Y.shape)
#(10, 20)
# add 5 features with random noise
random_noise = np.random.randn(10*20*5).reshape(10,20,5)
X = np.concatenate((Y.reshape(10,20,1), random_noise), axis=2)
print(X.shape)
#(10, 20, 6)
我的失败,忘记缩放输入特征,现在工作正常。
我尝试在PyTorch中实现LSTM模型,遇到这样的问题:loss doesn't reduce。
我的任务是:我有不同功能的会话。会话长度是固定的,等于 20。我的目标是预测最后一个会话是否被跳过。
我试图缩放输入特征,我试图将 target
传递给特征(也许提供的特征绝对没有信息,我认为这会导致过度拟合并且损失应该接近 0),但我的损失减少总是这样的:
print(X.shape)
#(82770, 20, 31) where 82770 is count of sessions, 20 is seq_len, 31 is count of features
print(y.shape)
#(82770, 20)
我还定义了 get_batches
函数。是的,我知道这个生成器中最后一批的问题
def get_batches(X, y, batch_size):
'''Create a generator that returns batches of size
batch_size x seq_length from arr.
'''
assert X.shape[0] == y.shape[0]
assert X.shape[1] == y.shape[1]
assert len(X.shape) == 3
assert len(y.shape) == 2
seq_len = X.shape[1]
n_batches = X.shape[0]//seq_len
for batch_number in range(n_batches):
#print(batch_number*batch_size, )
batch_x = X[batch_number*batch_size:(batch_number+1)*batch_size, :, :]
batch_y = y[batch_number*batch_size:(batch_number+1)*batch_size, :]
if batch_x.shape[0] == batch_size:
yield batch_x, batch_y
else:
print('batch_x shape: {}'.format(batch_x.shape))
break
这是我的 RNN
class BaseRNN(nn.Module):
def __init__(self, n_features, hidden_size, n_layers, drop_p=0.3, lr=0.001, last_items=10):
super(BaseRNN, self).__init__()
# constants
self.n_features = n_features
self.hidden_size = hidden_size
self.n_layers = n_layers
self.drop_p = drop_p
self.lr = lr
self.last_items = last_items
# layers
self.lstm = nn.LSTM(
n_features, n_hidden, n_layers,
dropout=drop_p, batch_first=True
)
self.dropout = nn.Dropout(self.drop_p)
self.linear_layer = nn.Linear(self.hidden_size, 1)
self.sigm = nn.Sigmoid()
def forward(self, x, hidden):
out, hidden = self.lstm(x, hidden)
batch_size = x.shape[0]
out = self.dropout(out)
out = out.contiguous().view(-1, self.hidden_size)
out = self.linear_layer(out)
out = self.sigm(out)
# use only last elements
out = out.view(batch_size, -1)
out = out[:, -1]
return out, hidden
def init_hidden(self, batch_size):
#initialize with zeros
weight = next(self.parameters()).data
hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_size).zero_())
return hidden
这是我的训练函数:
def train(net, X, y,
n_epochs=10, batch_size=10, clip=5):
'''
pass
'''
n_features = X.shape[2]
seq_len = X.shape[1]
net.train()
opt = torch.optim.Adam(net.parameters(), lr=net.lr)
criterion = nn.BCELoss()
counter = 0
losses = []
for e in range(n_epochs):
h = net.init_hidden(batch_size)
for x, y in get_batches(X=X, y=y, batch_size=batch_size):
counter += 1
h = net.init_hidden(batch_size)
inputs, targets = torch.from_numpy(x).float(), torch.from_numpy(y.astype(int))
targets = targets[:,-net.last_items:].float().view(net.last_items*batch_size)
h = tuple([each.data for each in h])
net.zero_grad()
output, h = net(inputs, h)
loss = criterion(output.view(net.last_items*batch_size), targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(net.parameters(), clip)
opt.step()
return losses
运行 培训:
n_hidden = 100
n_layers = 1
n_features = X.shape[2]
net = BaseRNN(n_features, n_hidden, n_layers,
lr=0.01, drop_p=0.1, last_items=1)
losses = train(net, X, y, n_epochs=5, batch_size=1000, lr=0.001, clip=5)
plt.plot(losses)
完成所有这些步骤后,我得到了问题顶部的情节。我想我在某个地方犯了一个巨大的错误,因为我把目标变量放在了特征中,但仍然没有减少损失。 我哪里错了?
PS.How 生成示例数据?我将使用真实的 y
数据并添加一些噪音。
Y = np.array([[0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1],
[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1],
[0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1]])
print(Y.shape)
#(10, 20)
# add 5 features with random noise
random_noise = np.random.randn(10*20*5).reshape(10,20,5)
X = np.concatenate((Y.reshape(10,20,1), random_noise), axis=2)
print(X.shape)
#(10, 20, 6)
我的失败,忘记缩放输入特征,现在工作正常。