为什么手动构建 timestep-unfold LSTM 与使用 static_rnn 的输出不同?
Why build timestep-unfold LSTM manually has different outputs from using static_rnn?
这是我手动构建 LSTM 的代码:
import tensorflow as tf
import numpy as np
batch_size = 1
hidden_size = 4
num_steps = 3
input_dim = 5
np.random.seed(123)
input = np.ones([batch_size, num_steps, input_dim], dtype=int)
x = tf.placeholder(dtype=tf.float32, shape=[batch_size, num_steps, input_dim], name='input_x')
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_size)
initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
outputs = []
with tf.variable_scope('for_loop', initializer= tf.ones_initializer):
for i in range(num_steps):
if i > 0:
tf.get_variable_scope().reuse_variables()
output = lstm_cell(x[:, i, :], initial_state)
outputs.append(output)
with tf.Session() as sess:
init_op = tf.initialize_all_variables()
sess.run(init_op)
result = sess.run(outputs, feed_dict={x: input})
print(result)
输出:
[(array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32), LSTMStateTuple(c=array([[0.99321693, 0.99321693, 0.99321693, 0.99321693]], dtype=float32), h=array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32))),
(array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32), LSTMStateTuple(c=array([[0.99321693, 0.99321693, 0.99321693, 0.99321693]], dtype=float32), h=array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32))),
(array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32), LSTMStateTuple(c=array([[0.99321693, 0.99321693, 0.99321693, 0.99321693]], dtype=float32), h=array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32)))]
虽然这是使用 static_rnn 的代码:
import tensorflow as tf
import numpy as np
batch_size = 1
hidden_size = 4
num_steps = 3
input_dim = 5
np.random.seed(123)
input = np.ones([batch_size, num_steps, input_dim], dtype=int)
x = tf.placeholder(dtype=tf.float32, shape=[batch_size, num_steps, input_dim], name='input_x')
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_size)
initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
y = tf.unstack(x, axis=1)
with tf.variable_scope('static_rnn', initializer= tf.ones_initializer):
output, state = tf.nn.static_rnn(lstm_cell, y, initial_state=initial_state)
with tf.Session() as sess:
init_op = tf.initialize_all_variables()
sess.run(init_op)
result = (sess.run([output, state], feed_dict={x: input}))
print(result)
输出:
[[array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32),
array([[0.9631945, 0.9631945, 0.9631945, 0.9631945]], dtype=float32),
array([[0.9948382, 0.9948382, 0.9948382, 0.9948382]], dtype=float32)], LSTMStateTuple(c=array([[2.9925175, 2.9925175, 2.9925175, 2.9925175]], dtype=float32), h=array([[0.9948382, 0.9948382, 0.9948382, 0.9948382]], dtype=float32))]
第一个单元格得到完全相等的输出,但是从第二个单元格开始,手动构建似乎与它的前后单元格没有关系——3 个单元格的输出是相同的。我认为手动代码是错误的,但我找不到如何连接 BasicLSTMCell s。求助!
感谢@Susmit Agrawal,我将代码更改为:
for i in range(num_steps):
if i > 0:
output = lstm_cell(x[:, i, :], outputs[i-1][1])
else:
output = lstm_cell(x[:, i, :], z_state)
outputs.append(output)
这会产生与 static_rnn 相同的正确输出。
这是我手动构建 LSTM 的代码:
import tensorflow as tf
import numpy as np
batch_size = 1
hidden_size = 4
num_steps = 3
input_dim = 5
np.random.seed(123)
input = np.ones([batch_size, num_steps, input_dim], dtype=int)
x = tf.placeholder(dtype=tf.float32, shape=[batch_size, num_steps, input_dim], name='input_x')
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_size)
initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
outputs = []
with tf.variable_scope('for_loop', initializer= tf.ones_initializer):
for i in range(num_steps):
if i > 0:
tf.get_variable_scope().reuse_variables()
output = lstm_cell(x[:, i, :], initial_state)
outputs.append(output)
with tf.Session() as sess:
init_op = tf.initialize_all_variables()
sess.run(init_op)
result = sess.run(outputs, feed_dict={x: input})
print(result)
输出:
[(array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32), LSTMStateTuple(c=array([[0.99321693, 0.99321693, 0.99321693, 0.99321693]], dtype=float32), h=array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32))),
(array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32), LSTMStateTuple(c=array([[0.99321693, 0.99321693, 0.99321693, 0.99321693]], dtype=float32), h=array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32))),
(array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32), LSTMStateTuple(c=array([[0.99321693, 0.99321693, 0.99321693, 0.99321693]], dtype=float32), h=array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32)))]
虽然这是使用 static_rnn 的代码:
import tensorflow as tf
import numpy as np
batch_size = 1
hidden_size = 4
num_steps = 3
input_dim = 5
np.random.seed(123)
input = np.ones([batch_size, num_steps, input_dim], dtype=int)
x = tf.placeholder(dtype=tf.float32, shape=[batch_size, num_steps, input_dim], name='input_x')
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_size)
initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
y = tf.unstack(x, axis=1)
with tf.variable_scope('static_rnn', initializer= tf.ones_initializer):
output, state = tf.nn.static_rnn(lstm_cell, y, initial_state=initial_state)
with tf.Session() as sess:
init_op = tf.initialize_all_variables()
sess.run(init_op)
result = (sess.run([output, state], feed_dict={x: input}))
print(result)
输出:
[[array([[0.7536526, 0.7536526, 0.7536526, 0.7536526]], dtype=float32),
array([[0.9631945, 0.9631945, 0.9631945, 0.9631945]], dtype=float32),
array([[0.9948382, 0.9948382, 0.9948382, 0.9948382]], dtype=float32)], LSTMStateTuple(c=array([[2.9925175, 2.9925175, 2.9925175, 2.9925175]], dtype=float32), h=array([[0.9948382, 0.9948382, 0.9948382, 0.9948382]], dtype=float32))]
第一个单元格得到完全相等的输出,但是从第二个单元格开始,手动构建似乎与它的前后单元格没有关系——3 个单元格的输出是相同的。我认为手动代码是错误的,但我找不到如何连接 BasicLSTMCell s。求助!
感谢@Susmit Agrawal,我将代码更改为:
for i in range(num_steps):
if i > 0:
output = lstm_cell(x[:, i, :], outputs[i-1][1])
else:
output = lstm_cell(x[:, i, :], z_state)
outputs.append(output)
这会产生与 static_rnn 相同的正确输出。