在这个张量流 lstm 模型上无法减少损失
Can't get loss to decrease on this tensorflow lstm model
我正在尝试使用 tensorflow 的 LSTM 模块预测序列,但无济于事。我无法弄清楚问题所在,我希望有人能帮我一把。这是我的代码:
首先我主要创建合成数据,并准备数据加载器
x = np.linspace(0,30.,500)
y = x*np.sin(x) + 2*np.sin(5*x)
nb_steps = 20
def load_batch(batch_size = 32):
x_b = np.zeros((nb_steps,batch_size,1))
y_b = np.zeros((nb_steps*batch_size,1))
inds = np.random.randint(0, 479, (batch_size))
for i,ind in enumerate(inds):
x_b[:,i,0] = x[ind:ind+nb_steps]
y_b[i*nb_steps:(i+1)*nb_steps,0] = y[ind+1:ind+nb_steps+1]
return x_b, y_b
一些快捷键
adam = tf.train.AdamOptimizer
layers = tf.layers
dense = layers.dense
lstm = tf.contrib.rnn.LSTMCell
batch_size = 64
然后是我创建模型的部分
with tf.variable_scope('data'):
x_p = tf.placeholder(tf.float32, shape = [nb_steps, None, 1], name = 'x') # batch, steps, features
y_p = tf.placeholder(tf.float32, shape = [None, 1], name = 'labels')
with tf.variable_scope('network'):
cell = lstm(num_units = 100)
outputs, states = tf.nn.dynamic_rnn(cell, x_p, dtype = tf.float32, time_major = True)
reshaped_outputs = tf.reshape(outputs, [-1,100])
projection = dense(reshaped_outputs, 1, activation = None, name = 'projection')
以上是我最不确定的部分。我为每个时间步重塑 lstm 的输出并将它们堆叠在第一个轴上(或者我这样做?)。然后我将整个矩阵发送到一个线性层中。
with tf.variable_scope('training'):
loss = tf.reduce_mean(tf.square(projection - y_p))
train_lstm = adam(1e-3).minimize(loss)
epochs = 1000
batch_size = 64
f, ax = plt.subplots(2,1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
mean_loss = 0.
for epoch in range(1,epochs+1):
x_b,y_b = load_batch(batch_size)
batch_loss,_ = sess.run([loss, train_lstm], feed_dict = {x_p:x_b, y_p:y_b})
mean_loss += batch_loss
if epoch%100 == 0:
print('Epoch: {} | Loss: {:.6f}'.format(epoch, mean_loss/100.))
mean_loss = 0.
while True :
x_b, y_b = load_batch(1)
pred = sess.run(projection, feed_dict = {x_p:x_b}).reshape(-1)
ax[0].plot(x,y, label= 'Real')
ax[0].plot(x_b.reshape(-1),y_b.reshape(-1), label= 'Real batch')
ax[0].plot(x_b.reshape(-1), pred, label = 'Pred')
ax[1].scatter(x_b.reshape(-1),y_b.reshape(-1), label= 'Real')
ax[1].scatter(x_b.reshape(-1), pred, label = 'Pred')
for a in ax: a.legend()
plt.pause(0.1)
input()
for a in ax:
a.clear()
非常感谢!
每个 LSTM 单元产生 100 个输出,因此在执行 tf.nn.dynamic_rnn 后,您需要展平该输出。我宁愿使用
reshaped_outputs = tf.layers.Flatten()(outputs)
在这一行之后:
outputs, states = tf.nn.dynamic_rnn(cell, x_p, dtype = tf.float32, time_major = True)
而不是这一行:
reshaped_outputs = tf.reshape(outputs, [-1,100])
希望对您有所帮助:)
编辑:我没有注意到您使用了 time_major = True。我稍微更改了您的代码,使用 time_major = False,因为它使用起来更简单。
我假设您想预测 nb_steps 输出。
代码:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0,30.,500)
y = x*np.sin(x) + 2*np.sin(5*x)
nb_steps = 20
def load_batch(batch_size = 32):
x_b = np.zeros((batch_size, nb_steps))
y_b = np.zeros((batch_size, nb_steps))
inds = np.random.randint(0, 479, (batch_size))
for i,ind in enumerate(inds):
x_b[i] = x[ind:ind+nb_steps]
y_b[i] = y[ind+1:ind+nb_steps+1]
return x_b, y_b
adam = tf.train.AdamOptimizer
layers = tf.layers
dense = layers.dense
lstm = tf.contrib.rnn.LSTMCell
batch_size = 64
with tf.variable_scope('data'):
x_p = tf.placeholder(tf.float32, shape = [None, nb_steps], name = 'x')
x_rnn = tf.expand_dims(x_p, 2) # batch, steps, features
y_p = tf.placeholder(tf.float32, shape = [None, nb_steps], name = 'labels')
with tf.variable_scope('network'):
cell = lstm(num_units = 100)
outputs, states = tf.nn.dynamic_rnn(cell, x_rnn, dtype = tf.float32, time_major = False)
reshaped_outputs = tf.layers.Flatten()(outputs)
projection = dense(reshaped_outputs, nb_steps, activation = None, name = 'projection')
with tf.variable_scope('training'):
loss = tf.reduce_mean(tf.square(projection - y_p))
train_lstm = adam(1e-3).minimize(loss)
epochs = 1000
batch_size = 64
f, ax = plt.subplots(2,1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
mean_loss = 0.
for epoch in range(1,epochs+1):
x_b,y_b = load_batch(batch_size)
batch_loss,_ = sess.run([loss, train_lstm], feed_dict = {x_p:x_b, y_p:y_b})
mean_loss += batch_loss
if epoch%100 == 0:
print('Epoch: {} | Loss: {:.6f}'.format(epoch, mean_loss/100.))
mean_loss = 0.
我正在尝试使用 tensorflow 的 LSTM 模块预测序列,但无济于事。我无法弄清楚问题所在,我希望有人能帮我一把。这是我的代码:
首先我主要创建合成数据,并准备数据加载器
x = np.linspace(0,30.,500)
y = x*np.sin(x) + 2*np.sin(5*x)
nb_steps = 20
def load_batch(batch_size = 32):
x_b = np.zeros((nb_steps,batch_size,1))
y_b = np.zeros((nb_steps*batch_size,1))
inds = np.random.randint(0, 479, (batch_size))
for i,ind in enumerate(inds):
x_b[:,i,0] = x[ind:ind+nb_steps]
y_b[i*nb_steps:(i+1)*nb_steps,0] = y[ind+1:ind+nb_steps+1]
return x_b, y_b
一些快捷键
adam = tf.train.AdamOptimizer
layers = tf.layers
dense = layers.dense
lstm = tf.contrib.rnn.LSTMCell
batch_size = 64
然后是我创建模型的部分
with tf.variable_scope('data'):
x_p = tf.placeholder(tf.float32, shape = [nb_steps, None, 1], name = 'x') # batch, steps, features
y_p = tf.placeholder(tf.float32, shape = [None, 1], name = 'labels')
with tf.variable_scope('network'):
cell = lstm(num_units = 100)
outputs, states = tf.nn.dynamic_rnn(cell, x_p, dtype = tf.float32, time_major = True)
reshaped_outputs = tf.reshape(outputs, [-1,100])
projection = dense(reshaped_outputs, 1, activation = None, name = 'projection')
以上是我最不确定的部分。我为每个时间步重塑 lstm 的输出并将它们堆叠在第一个轴上(或者我这样做?)。然后我将整个矩阵发送到一个线性层中。
with tf.variable_scope('training'):
loss = tf.reduce_mean(tf.square(projection - y_p))
train_lstm = adam(1e-3).minimize(loss)
epochs = 1000
batch_size = 64
f, ax = plt.subplots(2,1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
mean_loss = 0.
for epoch in range(1,epochs+1):
x_b,y_b = load_batch(batch_size)
batch_loss,_ = sess.run([loss, train_lstm], feed_dict = {x_p:x_b, y_p:y_b})
mean_loss += batch_loss
if epoch%100 == 0:
print('Epoch: {} | Loss: {:.6f}'.format(epoch, mean_loss/100.))
mean_loss = 0.
while True :
x_b, y_b = load_batch(1)
pred = sess.run(projection, feed_dict = {x_p:x_b}).reshape(-1)
ax[0].plot(x,y, label= 'Real')
ax[0].plot(x_b.reshape(-1),y_b.reshape(-1), label= 'Real batch')
ax[0].plot(x_b.reshape(-1), pred, label = 'Pred')
ax[1].scatter(x_b.reshape(-1),y_b.reshape(-1), label= 'Real')
ax[1].scatter(x_b.reshape(-1), pred, label = 'Pred')
for a in ax: a.legend()
plt.pause(0.1)
input()
for a in ax:
a.clear()
非常感谢!
每个 LSTM 单元产生 100 个输出,因此在执行 tf.nn.dynamic_rnn 后,您需要展平该输出。我宁愿使用
reshaped_outputs = tf.layers.Flatten()(outputs)
在这一行之后:
outputs, states = tf.nn.dynamic_rnn(cell, x_p, dtype = tf.float32, time_major = True)
而不是这一行:
reshaped_outputs = tf.reshape(outputs, [-1,100])
希望对您有所帮助:)
编辑:我没有注意到您使用了 time_major = True。我稍微更改了您的代码,使用 time_major = False,因为它使用起来更简单。
我假设您想预测 nb_steps 输出。
代码:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0,30.,500)
y = x*np.sin(x) + 2*np.sin(5*x)
nb_steps = 20
def load_batch(batch_size = 32):
x_b = np.zeros((batch_size, nb_steps))
y_b = np.zeros((batch_size, nb_steps))
inds = np.random.randint(0, 479, (batch_size))
for i,ind in enumerate(inds):
x_b[i] = x[ind:ind+nb_steps]
y_b[i] = y[ind+1:ind+nb_steps+1]
return x_b, y_b
adam = tf.train.AdamOptimizer
layers = tf.layers
dense = layers.dense
lstm = tf.contrib.rnn.LSTMCell
batch_size = 64
with tf.variable_scope('data'):
x_p = tf.placeholder(tf.float32, shape = [None, nb_steps], name = 'x')
x_rnn = tf.expand_dims(x_p, 2) # batch, steps, features
y_p = tf.placeholder(tf.float32, shape = [None, nb_steps], name = 'labels')
with tf.variable_scope('network'):
cell = lstm(num_units = 100)
outputs, states = tf.nn.dynamic_rnn(cell, x_rnn, dtype = tf.float32, time_major = False)
reshaped_outputs = tf.layers.Flatten()(outputs)
projection = dense(reshaped_outputs, nb_steps, activation = None, name = 'projection')
with tf.variable_scope('training'):
loss = tf.reduce_mean(tf.square(projection - y_p))
train_lstm = adam(1e-3).minimize(loss)
epochs = 1000
batch_size = 64
f, ax = plt.subplots(2,1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
mean_loss = 0.
for epoch in range(1,epochs+1):
x_b,y_b = load_batch(batch_size)
batch_loss,_ = sess.run([loss, train_lstm], feed_dict = {x_p:x_b, y_p:y_b})
mean_loss += batch_loss
if epoch%100 == 0:
print('Epoch: {} | Loss: {:.6f}'.format(epoch, mean_loss/100.))
mean_loss = 0.