如果批量归一化是模型的一部分,如何在 tensorflow 中为 LSTM 应用 Monte Carlo Dropout?
How to apply Monte Carlo Dropout, in tensorflow, for an LSTM if batch normalization is part of the model?
我有一个由 3 个 LSTM 层组成的模型,然后是批规范层,最后是密集层。这是代码:
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
现在我知道要应用MCDropout,我们可以应用下面的代码:
y_predict = np.stack([my_model(X_test, training=True) for x in range(100)])
y_proba = y_predict.mean(axis=0)
但是,设置 training = True
将强制批量归一化层过度拟合测试数据集。
此外,在将训练设置为 True 的同时构建自定义 Dropout 层并不是我的解决方案,因为我使用的是 LSTM。
class MCDropout(tf.keras.layers.Dropout):
def call(self, inputs):
return super().call(inputs, training=True)
非常感谢任何帮助!!
一种可能的解决方案是创建自定义 LSTM 层。您应该覆盖调用方法以强制训练标志为 True
class MCLSTM(keras.layers.LSTM):
def __init__(self, units, **kwargs):
super(MCLSTM, self).__init__(units, **kwargs)
def call(self, inputs, mask=None, training=None, initial_state=None):
return super(MCLSTM, self).call(
inputs,
mask=mask,
training=True,
initial_state=initial_state,
)
然后你就可以在你的代码中使用它了
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
x = MCLSTM(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
或将其添加到您的 return_RNN
工厂(更优雅的方式)
===== 编辑 =====
另一种解决方案是在创建模型时添加训练标志。像这样:
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
# This the Monte Carlo LSTM
x = LSTM(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs, training=True)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
我有一个由 3 个 LSTM 层组成的模型,然后是批规范层,最后是密集层。这是代码:
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
现在我知道要应用MCDropout,我们可以应用下面的代码:
y_predict = np.stack([my_model(X_test, training=True) for x in range(100)])
y_proba = y_predict.mean(axis=0)
但是,设置 training = True
将强制批量归一化层过度拟合测试数据集。
此外,在将训练设置为 True 的同时构建自定义 Dropout 层并不是我的解决方案,因为我使用的是 LSTM。
class MCDropout(tf.keras.layers.Dropout):
def call(self, inputs):
return super().call(inputs, training=True)
非常感谢任何帮助!!
一种可能的解决方案是创建自定义 LSTM 层。您应该覆盖调用方法以强制训练标志为 True
class MCLSTM(keras.layers.LSTM):
def __init__(self, units, **kwargs):
super(MCLSTM, self).__init__(units, **kwargs)
def call(self, inputs, mask=None, training=None, initial_state=None):
return super(MCLSTM, self).call(
inputs,
mask=mask,
training=True,
initial_state=initial_state,
)
然后你就可以在你的代码中使用它了
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
x = MCLSTM(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model
或将其添加到您的 return_RNN
工厂(更优雅的方式)
===== 编辑 =====
另一种解决方案是在创建模型时添加训练标志。像这样:
def build_uncomplied_model(hparams):
inputs = tf.keras.Input(shape=(None, hparams["n_features"]))
# This the Monte Carlo LSTM
x = LSTM(hparams["cell_size_1"], return_sequences=True, recurrent_dropout=hparams['dropout'])(inputs, training=True)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_2"], return_sequences=True)(x)
x = return_RNN(hparams["rnn_type"])(hparams["cell_size_3"], return_sequences=True)(x)
x = layers.BatchNormalization()(x)
outputs = layers.TimeDistributed(layers.Dense(hparams["n_features"]))(x)
model = tf.keras.Model(inputs, outputs, name=RNN_type + "_model")
return model