冻结tensorflow 2中的子层
Freeze sublayers in tensorflow 2
我有一个由自定义图层组成的模型。每个自定义图层包含许多 tf.keras.layers。问题是,如果我想在定义模型后冻结这些层,循环:
for i, layer in enumerate(model.layers):
print(i, layer.name)
仅打印 "outer" 自定义层,而不打印存在于其中的层。有什么方法可以访问内层以便我可以冻结它们吗?
来自 official tf docs 的自定义图层示例:
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
好的,我想出了一个解决方案。
"update" 函数必须在自定义层内实现,它会更新内层,使它们变得不可训练。
这是一个示例代码:
import tensorflow as tf
import numpy as np
layers = tf.keras.layers
seq_model = tf.keras.models.Sequential
class MDBlock(layers.Layer):
def __init__(self):
super(MDBlock, self).__init__()
self.dense1 = layers.Dense(784, name="first")
self.dense2 = layers.Dense(32, name="second")
self.dense3 = layers.Dense(32, name="third")
self.dense4 = layers.Dense(1, activation='sigmoid', name="outp")
def call(self, inputs):
x = self.dense1(inputs)
x = tf.nn.relu(x)
x = self.dense2(x)
x = tf.nn.relu(x)
x = self.dense3(x)
x = tf.nn.relu(x)
x = self.dense4(x)
return x
def updt(self):
self.dense1.trainable = False
def __str__(self):
return "\nd1:{0}\nd2:{1}\nd3:{2}\nd4:{3}".format(self.dense1.trainable, self.dense2.trainable,
self.dense3.trainable, self.dense4.trainable)
# define layer block
layer = MDBlock()
model = seq_model()
model.add(layers.Input(shape=(784,)))
model.add(layer)
# Use updt function to make layers non-trainable
for i, layer in enumerate(model.layers):
layer.updt()
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# Generate dummy data
data = np.random.random((1000, 784))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)
# print block's layers state
for i, layer in enumerate(model.layers):
print(i, layer)
您在更新函数中所做的是用另一个 Dense()
层替换第一个 Dense()
层,这次设置 trainable = false
.
虽然这有效,但我会按如下方式更新 'update' 函数:
def updt(self):
self.dense1.trainable = False
您可以使用 keras 回调。如果你想在一定数量的 epoch 后冻结你的第一层,添加这个回调
class FreezeCallback(tf.keras.callbacks.Callback):
def __init__(self, n_epochs=10):
super().__init__()
self.n_epochs = n_epochs
def on_epoch_end(self, epoch, logs=None):
if epoch == self.n_epochs:
l = self.model.get_layer('first')
l.trainable = False
我有一个由自定义图层组成的模型。每个自定义图层包含许多 tf.keras.layers。问题是,如果我想在定义模型后冻结这些层,循环:
for i, layer in enumerate(model.layers):
print(i, layer.name)
仅打印 "outer" 自定义层,而不打印存在于其中的层。有什么方法可以访问内层以便我可以冻结它们吗?
来自 official tf docs 的自定义图层示例:
class MLPBlock(layers.Layer):
def __init__(self):
super(MLPBlock, self).__init__()
self.linear_1 = Linear(32)
self.linear_2 = Linear(32)
self.linear_3 = Linear(1)
def call(self, inputs):
x = self.linear_1(inputs)
x = tf.nn.relu(x)
x = self.linear_2(x)
x = tf.nn.relu(x)
return self.linear_3(x)
好的,我想出了一个解决方案。 "update" 函数必须在自定义层内实现,它会更新内层,使它们变得不可训练。 这是一个示例代码:
import tensorflow as tf
import numpy as np
layers = tf.keras.layers
seq_model = tf.keras.models.Sequential
class MDBlock(layers.Layer):
def __init__(self):
super(MDBlock, self).__init__()
self.dense1 = layers.Dense(784, name="first")
self.dense2 = layers.Dense(32, name="second")
self.dense3 = layers.Dense(32, name="third")
self.dense4 = layers.Dense(1, activation='sigmoid', name="outp")
def call(self, inputs):
x = self.dense1(inputs)
x = tf.nn.relu(x)
x = self.dense2(x)
x = tf.nn.relu(x)
x = self.dense3(x)
x = tf.nn.relu(x)
x = self.dense4(x)
return x
def updt(self):
self.dense1.trainable = False
def __str__(self):
return "\nd1:{0}\nd2:{1}\nd3:{2}\nd4:{3}".format(self.dense1.trainable, self.dense2.trainable,
self.dense3.trainable, self.dense4.trainable)
# define layer block
layer = MDBlock()
model = seq_model()
model.add(layers.Input(shape=(784,)))
model.add(layer)
# Use updt function to make layers non-trainable
for i, layer in enumerate(model.layers):
layer.updt()
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# Generate dummy data
data = np.random.random((1000, 784))
labels = np.random.randint(2, size=(1000, 1))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=10, batch_size=32)
# print block's layers state
for i, layer in enumerate(model.layers):
print(i, layer)
您在更新函数中所做的是用另一个 Dense()
层替换第一个 Dense()
层,这次设置 trainable = false
.
虽然这有效,但我会按如下方式更新 'update' 函数:
def updt(self):
self.dense1.trainable = False
您可以使用 keras 回调。如果你想在一定数量的 epoch 后冻结你的第一层,添加这个回调
class FreezeCallback(tf.keras.callbacks.Callback):
def __init__(self, n_epochs=10):
super().__init__()
self.n_epochs = n_epochs
def on_epoch_end(self, epoch, logs=None):
if epoch == self.n_epochs:
l = self.model.get_layer('first')
l.trainable = False