LSTM 输出只是输入数据的变体
LSTM output just a variation of input data
我正在构建一个 LSTM,我想用变量 q_max
预测 s_max
,但网络似乎只是改变了输入数据并将其作为输出。我试过增加隐藏大小和时代但没有成功。我假设我构建数据的方式或网络设置方式存在问题。
这是我的模型做出的预测图:
我真的只是想适应训练数据,这样我就知道它可以学习一个简单的问题。
这是我的模型:
class LSTM(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers):
super(LSTM, self).__init__()
self.num_classes = num_classes
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
ula, (h_out, _) = self.lstm(x, (h_0, c_0))
h_out = h_out.view(-1, self.hidden_size)
out = self.fc(h_out)
return out
数据预处理:
def data_manipulator(data):
df = pd.read_hdf(data)
df = df.iloc[:, [1, 4]]
scaler = MinMaxScaler()
scaler = scaler.fit_transform(df)
df = scaler
return pd.DataFrame(df)
def sliding_windows(data, seq_length):
y = np.ones([len(data)-seq_length-1,1])
x = np.ones([len(data)-seq_length-1,seq_length,1])
for i in range(len(data)-seq_length-1):
x[i] = np.array(data.iloc[i:i + seq_length,0]).reshape(-1,1) # ex. [1406, 5, 1]
y[i] = data.iloc[i + seq_length, 1] # ex. [1406, 1]
return torch.tensor(x, dtype=torch.float), torch.tensor(y, dtype=torch.float)
设定、训练、情节:
data_files = glob.glob('data/*.hdf')
seq_length = 5
df = data_manipulator(data_files[0])
x, y = sliding_windows(df, seq_length)
lstm = LSTM(num_classes= 1,input_size=1, hidden_size = 1, num_layers = 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)
num_epochs = 2000
for epoch in range(num_epochs):
optimizer.zero_grad()
outputs = lstm(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
lstm.eval()
output2 = lstm(x).detach().numpy()
plt.plot(df[0], label='q_max train')
plt.plot(df[1], label='s_max train')
plt.plot(output2, label='s_max output with q_max train as input')
plt.legend()
plt.show()
训练输出:
Epoch: 0, loss: 0.52164
Epoch: 100, loss: 0.10143
Epoch: 200, loss: 0.04956
Epoch: 300, loss: 0.02736
Epoch: 400, loss: 0.02732
Epoch: 500, loss: 0.02727
Epoch: 600, loss: 0.02722
Epoch: 700, loss: 0.02714
Epoch: 800, loss: 0.02704
Epoch: 900, loss: 0.02689
Epoch: 1000, loss: 0.02663
在与我的项目主管交谈后,有几件事我没有考虑过。首先,前向传播 returns h_out
而不是预测值 ula
。其次,我的函数 def sliding_windows(data, seq_length):
是一个“多对一”网络,而他所追求的是一个“多对多”网络,它更适合这个应用程序,我将努力改变数据输入和输出架构。
我正在构建一个 LSTM,我想用变量 q_max
预测 s_max
,但网络似乎只是改变了输入数据并将其作为输出。我试过增加隐藏大小和时代但没有成功。我假设我构建数据的方式或网络设置方式存在问题。
这是我的模型做出的预测图:
我真的只是想适应训练数据,这样我就知道它可以学习一个简单的问题。
这是我的模型:
class LSTM(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers):
super(LSTM, self).__init__()
self.num_classes = num_classes
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
ula, (h_out, _) = self.lstm(x, (h_0, c_0))
h_out = h_out.view(-1, self.hidden_size)
out = self.fc(h_out)
return out
数据预处理:
def data_manipulator(data):
df = pd.read_hdf(data)
df = df.iloc[:, [1, 4]]
scaler = MinMaxScaler()
scaler = scaler.fit_transform(df)
df = scaler
return pd.DataFrame(df)
def sliding_windows(data, seq_length):
y = np.ones([len(data)-seq_length-1,1])
x = np.ones([len(data)-seq_length-1,seq_length,1])
for i in range(len(data)-seq_length-1):
x[i] = np.array(data.iloc[i:i + seq_length,0]).reshape(-1,1) # ex. [1406, 5, 1]
y[i] = data.iloc[i + seq_length, 1] # ex. [1406, 1]
return torch.tensor(x, dtype=torch.float), torch.tensor(y, dtype=torch.float)
设定、训练、情节:
data_files = glob.glob('data/*.hdf')
seq_length = 5
df = data_manipulator(data_files[0])
x, y = sliding_windows(df, seq_length)
lstm = LSTM(num_classes= 1,input_size=1, hidden_size = 1, num_layers = 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)
num_epochs = 2000
for epoch in range(num_epochs):
optimizer.zero_grad()
outputs = lstm(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
lstm.eval()
output2 = lstm(x).detach().numpy()
plt.plot(df[0], label='q_max train')
plt.plot(df[1], label='s_max train')
plt.plot(output2, label='s_max output with q_max train as input')
plt.legend()
plt.show()
训练输出:
Epoch: 0, loss: 0.52164
Epoch: 100, loss: 0.10143
Epoch: 200, loss: 0.04956
Epoch: 300, loss: 0.02736
Epoch: 400, loss: 0.02732
Epoch: 500, loss: 0.02727
Epoch: 600, loss: 0.02722
Epoch: 700, loss: 0.02714
Epoch: 800, loss: 0.02704
Epoch: 900, loss: 0.02689
Epoch: 1000, loss: 0.02663
在与我的项目主管交谈后,有几件事我没有考虑过。首先,前向传播 returns h_out
而不是预测值 ula
。其次,我的函数 def sliding_windows(data, seq_length):
是一个“多对一”网络,而他所追求的是一个“多对多”网络,它更适合这个应用程序,我将努力改变数据输入和输出架构。