在 TensorFlow 2.0 中,如何将 LSTM 模型在前一个时间步的输出作为输入传递到下一个时间步?
In TensorFlow 2.0 how to pass the output of a LSTM model at the previous time-step as input to next time-step?
我想构建一个 LSTM 模型,其中第 (n+1) 个时间步的输入是第 (n) 个时间步的输出的函数。我看不到在当前框架中可以做到这一点的方法。人们一直在提到使用 raw_rnn,我认为它在 TensorFlow 2.0 中已被弃用。谁能帮我解决这个问题?目前这就是我所拥有的,
class RNN(tf.keras.Model):
def __init__(self):
super(RNN, self).__init__()
rnn_units = 16
self.bn_layer = tf.keras.layers.BatchNormalization(
momentum=0.99,
epsilon=1e-6,
beta_initializer=tf.random_normal_initializer(0.0, stddev=0.1),
gamma_initializer=tf.random_uniform_initializer(0.1, 0.5)
)
self.lstm1 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
input_shape=[None, 4])
self.lstm2 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.dense = tf.keras.layers.Dense(4)
def call(self, x, training):
for i in range(sequence_length):
if i == 0:
init_state1 = None
init_state2 = None
x = self.bn_layer(x, training)
lstm_output, new_h1, new_c1 = self.lstm1(x, initial_state=init_state1)
lstm_output, new_h2, new_c2 = self.lstm2(lstm_output, initial_state=init_state2)
output = self.dense(lstm_output)
x = process_output_to_input(output)
init_state1 = [new_h1, new_c1]
init_state2 = [new_h2, new_c2]
return output
我找到了利用LSTM层的stateful
属性的解决方案,你可以参考:https://adgefficiency.com/tf2-lstm-hidden/。我的实现是:
class SingleStepLSTM(tf.keras.Model):
def __init__(self, config):
super(SingleStepLSTM, self).__init__()
state_dim = config.state_dim
rnn_units = config.rnn_units
self.bn_layer = tf.keras.layers.BatchNormalization(
momentum=0.99,
epsilon=1e-6,
beta_initializer=tf.random_normal_initializer(0.0, stddev=0.1),
gamma_initializer=tf.random_uniform_initializer(0.1, 0.5)
)
self.lstm1 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
recurrent_initializer='glorot_uniform',
stateful=True,
input_shape=[None, state_dim])
self.lstm2 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform')
self.dense = tf.keras.layers.Dense(state_dim)
def call(self, x, training=True):
x = self.bn_layer(x, training)
h = self.lstm1(x)
h = self.lstm2(h)
x = self.dense(h)
return x
这是单步 LSTM 模型,在训练或测试时我们可以循环它,
single_lstm_step = SingleStepLSTM(config)
for i in range(num_seqs):
output = single_lstm_step(input)
input = process_to_input(output)
而且我认为问题陈述中的实现也可行,在我看来使用有状态 RNN 是一个更优雅的解决方案。
我想构建一个 LSTM 模型,其中第 (n+1) 个时间步的输入是第 (n) 个时间步的输出的函数。我看不到在当前框架中可以做到这一点的方法。人们一直在提到使用 raw_rnn,我认为它在 TensorFlow 2.0 中已被弃用。谁能帮我解决这个问题?目前这就是我所拥有的,
class RNN(tf.keras.Model):
def __init__(self):
super(RNN, self).__init__()
rnn_units = 16
self.bn_layer = tf.keras.layers.BatchNormalization(
momentum=0.99,
epsilon=1e-6,
beta_initializer=tf.random_normal_initializer(0.0, stddev=0.1),
gamma_initializer=tf.random_uniform_initializer(0.1, 0.5)
)
self.lstm1 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
input_shape=[None, 4])
self.lstm2 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.dense = tf.keras.layers.Dense(4)
def call(self, x, training):
for i in range(sequence_length):
if i == 0:
init_state1 = None
init_state2 = None
x = self.bn_layer(x, training)
lstm_output, new_h1, new_c1 = self.lstm1(x, initial_state=init_state1)
lstm_output, new_h2, new_c2 = self.lstm2(lstm_output, initial_state=init_state2)
output = self.dense(lstm_output)
x = process_output_to_input(output)
init_state1 = [new_h1, new_c1]
init_state2 = [new_h2, new_c2]
return output
我找到了利用LSTM层的stateful
属性的解决方案,你可以参考:https://adgefficiency.com/tf2-lstm-hidden/。我的实现是:
class SingleStepLSTM(tf.keras.Model):
def __init__(self, config):
super(SingleStepLSTM, self).__init__()
state_dim = config.state_dim
rnn_units = config.rnn_units
self.bn_layer = tf.keras.layers.BatchNormalization(
momentum=0.99,
epsilon=1e-6,
beta_initializer=tf.random_normal_initializer(0.0, stddev=0.1),
gamma_initializer=tf.random_uniform_initializer(0.1, 0.5)
)
self.lstm1 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
recurrent_initializer='glorot_uniform',
stateful=True,
input_shape=[None, state_dim])
self.lstm2 = tf.keras.layers.LSTM(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform')
self.dense = tf.keras.layers.Dense(state_dim)
def call(self, x, training=True):
x = self.bn_layer(x, training)
h = self.lstm1(x)
h = self.lstm2(h)
x = self.dense(h)
return x
这是单步 LSTM 模型,在训练或测试时我们可以循环它,
single_lstm_step = SingleStepLSTM(config)
for i in range(num_seqs):
output = single_lstm_step(input)
input = process_to_input(output)
而且我认为问题陈述中的实现也可行,在我看来使用有状态 RNN 是一个更优雅的解决方案。