Tensorflow:更新不可训练模型层的权重
Tensorflow: Weights of non-trainable model layers are updated
我有一个训练有素的模型,它是使用 Keras 创建的。在这个模型上,我想通过冻结除最后一个卷积层以外的所有层来应用迁移学习。但是,当我在冻结层后拟合模型时,我注意到一些冻结层具有不同的权重。我怎样才能避免这种情况?
我试图用 model.trainable = False
冻结整个模型,但这也没有成功。
我正在使用 python 3.5.0、tensorflow 1.0.1 和 Keras 2.0.3
示例脚本
import os
import timeit
import datetime
import numpy as np
from keras.layers.core import Activation, Reshape, Permute
from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras import models
from keras import backend as K
K.set_image_dim_ordering('th')
def conv_model(input_shape, data_shape, kern_size, filt_size, pad_size,\
maxpool_size, n_classes, compile_model=True):
"""
Create a small conv neural network
input_shape: input shape of the images
data_shape: 1d shape of the data
kern_size: Kernel size used in all convolutional2d layers
filt_size: Filter size of the first and last convolutional2d layer
pad_size: size of padding
maxpool_size: Pool size of all maxpooling2d and upsampling2d layers
n_classes: number of output classes
compile_model: True if the model should be compiled
output: Keras deep learning model
"""
#keep track of compilation time
start_time = timeit.default_timer()
model = models.Sequential()
# Add a noise layer to get a denoising autoencoder. This helps avoid overfitting
model.add(ZeroPadding2D(padding=(pad_size, pad_size), input_shape=input_shape))
#Encoding layers
model.add(Convolution2D(filt_size, kern_size, kern_size, border_mode='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(maxpool_size, maxpool_size)))
model.add(UpSampling2D(size=(maxpool_size, maxpool_size)))
model.add(ZeroPadding2D(padding=(pad_size, pad_size)))
model.add(Convolution2D(filt_size, kern_size, kern_size, border_mode='valid'))
model.add(BatchNormalization())
model.add(Convolution2D(n_classes, 1, 1, border_mode='valid'))
model.add(Reshape((n_classes, data_shape), input_shape=(n_classes,)+input_shape[1:]))
model.add(Permute((2, 1)))
model.add(Activation('softmax'))
if compile_model:
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
print('Model compiled in {0} seconds'.format(datetime.timedelta(seconds=round(\
timeit.default_timer() - start_time))))
return model
if __name__ == '__main__':
#Create some random training data
train_data = np.random.randint(0, 10, 3*512*512*20, dtype='uint8').reshape(-1, 3, 512, 512)
train_labels = np.random.randint(0, 1, 7*512*512*20, dtype='uint8').reshape(-1, 512*512, 7)
#Get dims of the data
data_dims = train_data.shape[2:]
data_shape = np.prod(data_dims)
#Create initial model
initial_model = conv_model((train_data.shape[1], train_data.shape[2], train_data.shape[3]),\
data_shape, 3, 4, 1, 2, train_labels.shape[-1])
#Train initial model on first part of the training data
initial_model.fit(train_data[0:10], train_labels[0:10], verbose=2)
#Store initial weights
initial_weights = initial_model.get_weights()
#Create transfer learning model
transf_model = conv_model((train_data.shape[1], train_data.shape[2], train_data.shape[3]),\
data_shape, 3, 4, 1, 2, train_labels.shape[-1], False)
#Set transfer model weights
transf_model.set_weights(initial_weights)
#Set all layers trainable to False (except final conv layer)
for layer in transf_model.layers:
layer.trainable = False
transf_model.layers[9].trainable = True
print(transf_model.layers[9])
#Compile model
transf_model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=1e-4),\
metrics=["accuracy"])
#Train model on second part of the data
transf_model.fit(train_data[10:20], train_labels[10:20], verbose=2)
#Store transfer model weights
transf_weights = transf_model.get_weights()
#Check where the weights have changed
for i in range(len(initial_weights)):
update_w = np.sum(initial_weights[i] != transf_weights[i])
if update_w != 0:
print(str(update_w)+' updated weights for layer '+str(transf_model.layers[i]))
一旦你编译了你的模型——你失去了你以前的权重,因为它们被重新采样了。您需要先将它们转移,将权重设置为不可训练,然后再编译:
#Compile model
transf_model.set_weights(initial_weights)
#Set all layers trainable to False (except final conv layer)
for layer in transf_model.layers:
layer.trainable = False
transf_model.layers[9].trainable = True
transf_model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=1e-4),\
metrics=["accuracy"])
否则 - 权重会随着重新采样而改变。
编辑:
模型应在更改后编译 - 因为在编译期间 keras
正在将所有可训练/不可训练的权重设置在一个列表中,该列表不会进一步更改。
您应该将 Keras 升级到 Keras v2.1.3
这个问题刚刚解决,冻结 BatchNormalization 层的最后一个功能现在可以在最近的版本中使用:
trainable attribute in BatchNormalization now disables the updates of the batch statistics (i.e. if trainable == False the layer will now run 100% in inference mode).
错误原因:
在以前的版本中,BatchNormalization层的方差和均值参数无法设置无法训练并且它不起作用,尽管你坐layer.trainable = False
。
现在,它起作用了!
我有一个训练有素的模型,它是使用 Keras 创建的。在这个模型上,我想通过冻结除最后一个卷积层以外的所有层来应用迁移学习。但是,当我在冻结层后拟合模型时,我注意到一些冻结层具有不同的权重。我怎样才能避免这种情况?
我试图用 model.trainable = False
冻结整个模型,但这也没有成功。
我正在使用 python 3.5.0、tensorflow 1.0.1 和 Keras 2.0.3
示例脚本
import os
import timeit
import datetime
import numpy as np
from keras.layers.core import Activation, Reshape, Permute
from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras import models
from keras import backend as K
K.set_image_dim_ordering('th')
def conv_model(input_shape, data_shape, kern_size, filt_size, pad_size,\
maxpool_size, n_classes, compile_model=True):
"""
Create a small conv neural network
input_shape: input shape of the images
data_shape: 1d shape of the data
kern_size: Kernel size used in all convolutional2d layers
filt_size: Filter size of the first and last convolutional2d layer
pad_size: size of padding
maxpool_size: Pool size of all maxpooling2d and upsampling2d layers
n_classes: number of output classes
compile_model: True if the model should be compiled
output: Keras deep learning model
"""
#keep track of compilation time
start_time = timeit.default_timer()
model = models.Sequential()
# Add a noise layer to get a denoising autoencoder. This helps avoid overfitting
model.add(ZeroPadding2D(padding=(pad_size, pad_size), input_shape=input_shape))
#Encoding layers
model.add(Convolution2D(filt_size, kern_size, kern_size, border_mode='valid'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(maxpool_size, maxpool_size)))
model.add(UpSampling2D(size=(maxpool_size, maxpool_size)))
model.add(ZeroPadding2D(padding=(pad_size, pad_size)))
model.add(Convolution2D(filt_size, kern_size, kern_size, border_mode='valid'))
model.add(BatchNormalization())
model.add(Convolution2D(n_classes, 1, 1, border_mode='valid'))
model.add(Reshape((n_classes, data_shape), input_shape=(n_classes,)+input_shape[1:]))
model.add(Permute((2, 1)))
model.add(Activation('softmax'))
if compile_model:
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
print('Model compiled in {0} seconds'.format(datetime.timedelta(seconds=round(\
timeit.default_timer() - start_time))))
return model
if __name__ == '__main__':
#Create some random training data
train_data = np.random.randint(0, 10, 3*512*512*20, dtype='uint8').reshape(-1, 3, 512, 512)
train_labels = np.random.randint(0, 1, 7*512*512*20, dtype='uint8').reshape(-1, 512*512, 7)
#Get dims of the data
data_dims = train_data.shape[2:]
data_shape = np.prod(data_dims)
#Create initial model
initial_model = conv_model((train_data.shape[1], train_data.shape[2], train_data.shape[3]),\
data_shape, 3, 4, 1, 2, train_labels.shape[-1])
#Train initial model on first part of the training data
initial_model.fit(train_data[0:10], train_labels[0:10], verbose=2)
#Store initial weights
initial_weights = initial_model.get_weights()
#Create transfer learning model
transf_model = conv_model((train_data.shape[1], train_data.shape[2], train_data.shape[3]),\
data_shape, 3, 4, 1, 2, train_labels.shape[-1], False)
#Set transfer model weights
transf_model.set_weights(initial_weights)
#Set all layers trainable to False (except final conv layer)
for layer in transf_model.layers:
layer.trainable = False
transf_model.layers[9].trainable = True
print(transf_model.layers[9])
#Compile model
transf_model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=1e-4),\
metrics=["accuracy"])
#Train model on second part of the data
transf_model.fit(train_data[10:20], train_labels[10:20], verbose=2)
#Store transfer model weights
transf_weights = transf_model.get_weights()
#Check where the weights have changed
for i in range(len(initial_weights)):
update_w = np.sum(initial_weights[i] != transf_weights[i])
if update_w != 0:
print(str(update_w)+' updated weights for layer '+str(transf_model.layers[i]))
一旦你编译了你的模型——你失去了你以前的权重,因为它们被重新采样了。您需要先将它们转移,将权重设置为不可训练,然后再编译:
#Compile model
transf_model.set_weights(initial_weights)
#Set all layers trainable to False (except final conv layer)
for layer in transf_model.layers:
layer.trainable = False
transf_model.layers[9].trainable = True
transf_model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=1e-4),\
metrics=["accuracy"])
否则 - 权重会随着重新采样而改变。
编辑:
模型应在更改后编译 - 因为在编译期间 keras
正在将所有可训练/不可训练的权重设置在一个列表中,该列表不会进一步更改。
您应该将 Keras 升级到 Keras v2.1.3
这个问题刚刚解决,冻结 BatchNormalization 层的最后一个功能现在可以在最近的版本中使用:
trainable attribute in BatchNormalization now disables the updates of the batch statistics (i.e. if trainable == False the layer will now run 100% in inference mode).
错误原因:
在以前的版本中,BatchNormalization层的方差和均值参数无法设置无法训练并且它不起作用,尽管你坐layer.trainable = False
。
现在,它起作用了!