训练后获取 LSTM 层的内部状态,并在预测后或下一次训练前用保存的内部状态初始化 LSTM 层?
Get the internal states of LSTM layer after training and initialize the LSTM layer with saved internal state after prediction or before next training?
我创建了一个带有 LSTM 层的模型,如下所示,并希望在训练步骤后获取内部状态(隐藏状态和单元状态)并保存。在训练步骤之后,我将使用网络进行预测,并希望在下一个训练步骤之前使用保存的内部状态重新初始化 LSTM。这样我可以在每个训练步骤后从同一点继续。我没能找到对当前版本的 tensoflow 有帮助的东西,即 2.x.
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
#self.model.add(tf.keras.layers.LSTM(units=self.num_hidden_neurons[1], stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
if __name__=='__main__':
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
model.summary()
例如,您可以自定义 Callback
并在每个时期保存隐藏状态和单元格状态。之后,您可以选择要从哪个纪元中提取状态,然后使用 lstm_layer.reset_states(*)
再次设置初始状态:
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
states = {}
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, lstm_layer):
self.lstm_layer = lstm_layer
def on_epoch_end(self, epoch, logs=None):
states[epoch] = lstm_layer.states
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
lstm_layer = model.layers[0]
x = tf.random.normal((1, 1, 3))
y = tf.random.normal((1, 1, 3))
model.fit(x, y, epochs=5, callbacks=[CustomCallback(lstm_layer)])
model.summary()
lstm_layer.reset_states(states[0]) # Sets hidden state from first epoch.
States
由 5 个时期的每个时期的 5 个内部状态组成。
我已经设法在训练步骤之后保存 LSTM 的内部状态,并在下一个训练步骤之前使用保存的内部状态重新初始化 LSTM。
您可以创建一个变量并将其值设置为变量中当前存储的值。 How can I copy a variable in tensorflow
states_ = {}
# Save the hidden state
internal_state_h = lstm_layer.states[0]
v1 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_h = v1.assign(internal_state_h)
# Save the cell state
internal_state_c = lstm_layer.states[1]
v2 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_c = v2.assign(internal_state_c)
# Create a tuple and add it to the dictionary
states_[0] = (copy_state_h, copy_state_c)
# Reset the internal state
lstm_layer.reset_states(states_[0])
预测调用会改变内部状态,但是通过执行这些步骤,您可以将 RNN 的内部状态恢复到预测之前的状态。
我创建了一个带有 LSTM 层的模型,如下所示,并希望在训练步骤后获取内部状态(隐藏状态和单元状态)并保存。在训练步骤之后,我将使用网络进行预测,并希望在下一个训练步骤之前使用保存的内部状态重新初始化 LSTM。这样我可以在每个训练步骤后从同一点继续。我没能找到对当前版本的 tensoflow 有帮助的东西,即 2.x.
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
#self.model.add(tf.keras.layers.LSTM(units=self.num_hidden_neurons[1], stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
if __name__=='__main__':
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
model.summary()
例如,您可以自定义 Callback
并在每个时期保存隐藏状态和单元格状态。之后,您可以选择要从哪个纪元中提取状态,然后使用 lstm_layer.reset_states(*)
再次设置初始状态:
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
states = {}
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, lstm_layer):
self.lstm_layer = lstm_layer
def on_epoch_end(self, epoch, logs=None):
states[epoch] = lstm_layer.states
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
lstm_layer = model.layers[0]
x = tf.random.normal((1, 1, 3))
y = tf.random.normal((1, 1, 3))
model.fit(x, y, epochs=5, callbacks=[CustomCallback(lstm_layer)])
model.summary()
lstm_layer.reset_states(states[0]) # Sets hidden state from first epoch.
States
由 5 个时期的每个时期的 5 个内部状态组成。
我已经设法在训练步骤之后保存 LSTM 的内部状态,并在下一个训练步骤之前使用保存的内部状态重新初始化 LSTM。 您可以创建一个变量并将其值设置为变量中当前存储的值。 How can I copy a variable in tensorflow
states_ = {}
# Save the hidden state
internal_state_h = lstm_layer.states[0]
v1 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_h = v1.assign(internal_state_h)
# Save the cell state
internal_state_c = lstm_layer.states[1]
v2 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_c = v2.assign(internal_state_c)
# Create a tuple and add it to the dictionary
states_[0] = (copy_state_h, copy_state_c)
# Reset the internal state
lstm_layer.reset_states(states_[0])
预测调用会改变内部状态,但是通过执行这些步骤,您可以将 RNN 的内部状态恢复到预测之前的状态。