如何在转置层之间绑定权重?
How to tie weights between transposed layers?
我尝试使用以下代码在 tensorflow 2.0 keras 中绑定权重。但它显示了这个错误?有谁知道如何写绑定权重密集层?
tf.random.set_seed(0)
with tf.device('/cpu:0'):
# This returns a tensor
inputs = Input(shape=(784,))
# a layer instance is callable on a tensor, and returns a tensor
layer_1 = Dense(64, activation='relu')
layer_1_output = layer_1(inputs)
layer_2 = Dense(64, activation='relu')
layer_2_output = layer_2(layer_1_output)
weights = tf.transpose(layer_1.weights[0]).numpy()
print(weights.shape)
transpose_layer = Dense(
784, activation='relu')
transpose_layer_output = transpose_layer(layer_2_output)
transpose_layer.set_weights(weights)
predictions = Dense(10, activation='softmax')(transpose_layer)
# This creates a model that includes
# the Input layer and three Dense layers
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
# print(model.weights)
model.summary()
错误
Traceback (most recent call last):
File "practice_2.py", line 62, in <module>
transpose_layer.set_weights(weights)
File "/Users/cheesiang_leow/.virtualenvs/tensorflow-2.0/lib/python3.6/site-
packages/tensorflow/python/keras/engine/base_layer.py", line 934, in set_weights
str(weights)[:50] + '...')
ValueError: You called `set_weights(weights)` on layer "dense_2" with a weight
list of length 64, but the layer was expecting 2 weights. Provided weights:
[[-0.03499636 0.0214913 0.04076344 ... -0.06531...
让我们先看看模型架构和模型参数(不绑定权重)
蓝色箭头代表偏差。因此,具有 n 个输入的神经元将具有 n+1 个权重。
现在您想将 transpose_layer
的权重与 layer_1
联系起来。您将 layers_1
的权重转换为 64*784
并将其设置为 transpose_layers
但有几个问题
weight[0]
将给出权重,weight[1]
将给出密集层的偏差。所以你在那里很好。但是 set_weights
需要一个权重列表。在 Dense
层的情况下,它将需要一个包含两个 np 数组的列表,第一个列表是大小 (64*784) 的权重,第二个列表是一个大小为 784 的 np 数组用于偏置。那么如何得到784个偏置值呢?
解法:
- 一个不错的选择是通过设置
use_bias=False
来禁用偏差
- 保持偏差值不变。 (通过
weight[1]
读取偏置值并将它们传回 set_weights
)
- 只需将偏差设置为一些小的随机值(非常非常糟糕的主意)
使用方案一的代码:
import tensorflow as tf
from keras.layers import Dense, Input
from keras.models import Model
with tf.device('/cpu:0'):
inputs = Input(shape=(784,))
layer_1 = Dense(64, activation='relu')
layer_1_output = layer_1(inputs)
layer_2 = Dense(64, activation='relu')
layer_2_output = layer_2(layer_1_output)
transpose_layer = Dense(784, activation='relu', use_bias=False)
transpose_layer_output = transpose_layer(layer_2_output)
transpose_layer.set_weights([layer_1.get_weights()[0].T])
model = Model(inputs=inputs, outputs=transpose_layer_output)
model.compile('adam', loss='categorical_crossentropy')
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_36 (InputLayer) (None, 784) 0
_________________________________________________________________
dense_155 (Dense) (None, 64) 50240
_________________________________________________________________
dense_156 (Dense) (None, 64) 4160
_________________________________________________________________
dense_157 (Dense) (None, 784) 50176
=================================================================
Total params: 104,576
Trainable params: 104,576
Non-trainable params: 0
注意: 您可以看到 use_bias=False
在 transpose_layer
中的结果是 784*64 = 50176
权重而不是 50960
权重如图(有偏差)
我花了很多时间才弄清楚,但我认为这是通过子类化 Keras Dense 层来实现 Tied Weights 的方式。
class TiedLayer(Dense):
def __init__(self, layer_sizes, l2_normalize=False, dropout=0.0, *args, **kwargs):
self.layer_sizes = layer_sizes
self.l2_normalize = l2_normalize
self.dropout = dropout
self.kernels = []
self.biases = []
self.biases2 = []
self.uses_learning_phase = True
self.activation = kwargs['activation']
if self.activation == "leaky_relu":
self.activation = kwargs.pop('activation')
self.activation = LeakyReLU()
print(self.activation)
super().__init__(units=1, *args, **kwargs) # 'units' not used
def compute_output_shape(self, input_shape):
return input_shape
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = int(input_shape[-1])
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
# print(input_dim)
for i in range(len(self.layer_sizes)):
self.kernels.append(
self.add_weight(
shape=(
input_dim,
self.layer_sizes[i]),
initializer=self.kernel_initializer,
name='ae_kernel_{}'.format(i),
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint))
if self.use_bias:
self.biases.append(
self.add_weight(
shape=(
self.layer_sizes[i],
),
initializer=self.bias_initializer,
name='ae_bias_{}'.format(i),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
input_dim = self.layer_sizes[i]
if self.use_bias:
for n, i in enumerate(range(len(self.layer_sizes)-2, -1, -1)):
self.biases2.append(
self.add_weight(
shape=(
self.layer_sizes[i],
),
initializer=self.bias_initializer,
name='ae_bias2_{}'.format(n),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
self.biases2.append(self.add_weight(
shape=(
int(input_shape[-1]),
),
initializer=self.bias_initializer,
name='ae_bias2_{}'.format(len(self.layer_sizes)),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
self.built = True
def call(self, inputs):
return self.decode(self.encode(inputs))
def _apply_dropout(self, inputs):
dropped = K.backend.dropout(inputs, self.dropout)
return K.backend.in_train_phase(dropped, inputs)
def encode(self, inputs):
latent = inputs
for i in range(len(self.layer_sizes)):
if self.dropout > 0:
latent = self._apply_dropout(latent)
print(self.kernels[i])
latent = K.backend.dot(latent, self.kernels[i])
if self.use_bias:
print(self.biases[i])
latent = K.backend.bias_add(latent, self.biases[i])
if self.activation is not None:
latent = self.activation(latent)
if self.l2_normalize:
latent = latent / K.backend.l2_normalize(latent, axis=-1)
return latent
def decode(self, latent):
recon = latent
for i in range(len(self.layer_sizes)):
if self.dropout > 0:
recon = self._apply_dropout(recon)
print(self.kernels[len(self.layer_sizes) - i - 1])
recon = K.backend.dot(recon, K.backend.transpose(
self.kernels[len(self.layer_sizes) - i - 1]))
if self.use_bias:
print(self.biases2[i])
recon = K.backend.bias_add(recon, self.biases2[i])
if self.activation is not None:
recon = self.activation(recon)
return recon
def get_config(self):
config = {
'layer_sizes': self.layer_sizes
}
base_config = super().get_config()
base_config.pop('units', None)
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config):
return cls(**config)
希望能帮到别人。
我尝试使用以下代码在 tensorflow 2.0 keras 中绑定权重。但它显示了这个错误?有谁知道如何写绑定权重密集层?
tf.random.set_seed(0)
with tf.device('/cpu:0'):
# This returns a tensor
inputs = Input(shape=(784,))
# a layer instance is callable on a tensor, and returns a tensor
layer_1 = Dense(64, activation='relu')
layer_1_output = layer_1(inputs)
layer_2 = Dense(64, activation='relu')
layer_2_output = layer_2(layer_1_output)
weights = tf.transpose(layer_1.weights[0]).numpy()
print(weights.shape)
transpose_layer = Dense(
784, activation='relu')
transpose_layer_output = transpose_layer(layer_2_output)
transpose_layer.set_weights(weights)
predictions = Dense(10, activation='softmax')(transpose_layer)
# This creates a model that includes
# the Input layer and three Dense layers
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
# print(model.weights)
model.summary()
错误
Traceback (most recent call last):
File "practice_2.py", line 62, in <module>
transpose_layer.set_weights(weights)
File "/Users/cheesiang_leow/.virtualenvs/tensorflow-2.0/lib/python3.6/site-
packages/tensorflow/python/keras/engine/base_layer.py", line 934, in set_weights
str(weights)[:50] + '...')
ValueError: You called `set_weights(weights)` on layer "dense_2" with a weight
list of length 64, but the layer was expecting 2 weights. Provided weights:
[[-0.03499636 0.0214913 0.04076344 ... -0.06531...
让我们先看看模型架构和模型参数(不绑定权重)
蓝色箭头代表偏差。因此,具有 n 个输入的神经元将具有 n+1 个权重。
现在您想将 transpose_layer
的权重与 layer_1
联系起来。您将 layers_1
的权重转换为 64*784
并将其设置为 transpose_layers
但有几个问题
weight[0]
将给出权重,weight[1]
将给出密集层的偏差。所以你在那里很好。但是 set_weights
需要一个权重列表。在 Dense
层的情况下,它将需要一个包含两个 np 数组的列表,第一个列表是大小 (64*784) 的权重,第二个列表是一个大小为 784 的 np 数组用于偏置。那么如何得到784个偏置值呢?
解法:
- 一个不错的选择是通过设置
use_bias=False
来禁用偏差
- 保持偏差值不变。 (通过
weight[1]
读取偏置值并将它们传回set_weights
) - 只需将偏差设置为一些小的随机值(非常非常糟糕的主意)
使用方案一的代码:
import tensorflow as tf
from keras.layers import Dense, Input
from keras.models import Model
with tf.device('/cpu:0'):
inputs = Input(shape=(784,))
layer_1 = Dense(64, activation='relu')
layer_1_output = layer_1(inputs)
layer_2 = Dense(64, activation='relu')
layer_2_output = layer_2(layer_1_output)
transpose_layer = Dense(784, activation='relu', use_bias=False)
transpose_layer_output = transpose_layer(layer_2_output)
transpose_layer.set_weights([layer_1.get_weights()[0].T])
model = Model(inputs=inputs, outputs=transpose_layer_output)
model.compile('adam', loss='categorical_crossentropy')
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_36 (InputLayer) (None, 784) 0
_________________________________________________________________
dense_155 (Dense) (None, 64) 50240
_________________________________________________________________
dense_156 (Dense) (None, 64) 4160
_________________________________________________________________
dense_157 (Dense) (None, 784) 50176
=================================================================
Total params: 104,576
Trainable params: 104,576
Non-trainable params: 0
注意: 您可以看到 use_bias=False
在 transpose_layer
中的结果是 784*64 = 50176
权重而不是 50960
权重如图(有偏差)
我花了很多时间才弄清楚,但我认为这是通过子类化 Keras Dense 层来实现 Tied Weights 的方式。
class TiedLayer(Dense):
def __init__(self, layer_sizes, l2_normalize=False, dropout=0.0, *args, **kwargs):
self.layer_sizes = layer_sizes
self.l2_normalize = l2_normalize
self.dropout = dropout
self.kernels = []
self.biases = []
self.biases2 = []
self.uses_learning_phase = True
self.activation = kwargs['activation']
if self.activation == "leaky_relu":
self.activation = kwargs.pop('activation')
self.activation = LeakyReLU()
print(self.activation)
super().__init__(units=1, *args, **kwargs) # 'units' not used
def compute_output_shape(self, input_shape):
return input_shape
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = int(input_shape[-1])
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
# print(input_dim)
for i in range(len(self.layer_sizes)):
self.kernels.append(
self.add_weight(
shape=(
input_dim,
self.layer_sizes[i]),
initializer=self.kernel_initializer,
name='ae_kernel_{}'.format(i),
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint))
if self.use_bias:
self.biases.append(
self.add_weight(
shape=(
self.layer_sizes[i],
),
initializer=self.bias_initializer,
name='ae_bias_{}'.format(i),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
input_dim = self.layer_sizes[i]
if self.use_bias:
for n, i in enumerate(range(len(self.layer_sizes)-2, -1, -1)):
self.biases2.append(
self.add_weight(
shape=(
self.layer_sizes[i],
),
initializer=self.bias_initializer,
name='ae_bias2_{}'.format(n),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
self.biases2.append(self.add_weight(
shape=(
int(input_shape[-1]),
),
initializer=self.bias_initializer,
name='ae_bias2_{}'.format(len(self.layer_sizes)),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
self.built = True
def call(self, inputs):
return self.decode(self.encode(inputs))
def _apply_dropout(self, inputs):
dropped = K.backend.dropout(inputs, self.dropout)
return K.backend.in_train_phase(dropped, inputs)
def encode(self, inputs):
latent = inputs
for i in range(len(self.layer_sizes)):
if self.dropout > 0:
latent = self._apply_dropout(latent)
print(self.kernels[i])
latent = K.backend.dot(latent, self.kernels[i])
if self.use_bias:
print(self.biases[i])
latent = K.backend.bias_add(latent, self.biases[i])
if self.activation is not None:
latent = self.activation(latent)
if self.l2_normalize:
latent = latent / K.backend.l2_normalize(latent, axis=-1)
return latent
def decode(self, latent):
recon = latent
for i in range(len(self.layer_sizes)):
if self.dropout > 0:
recon = self._apply_dropout(recon)
print(self.kernels[len(self.layer_sizes) - i - 1])
recon = K.backend.dot(recon, K.backend.transpose(
self.kernels[len(self.layer_sizes) - i - 1]))
if self.use_bias:
print(self.biases2[i])
recon = K.backend.bias_add(recon, self.biases2[i])
if self.activation is not None:
recon = self.activation(recon)
return recon
def get_config(self):
config = {
'layer_sizes': self.layer_sizes
}
base_config = super().get_config()
base_config.pop('units', None)
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config):
return cls(**config)
希望能帮到别人。