子类化时创建 tf.keras.add 层 tf.keras.Model
Create tf.keras.add Layer when Subclassing tf.keras.Model
Objective:构建一个带有skip connections的神经网络,其中block的数量是一个参数,我们对tf.keras.Model
.[=25进行子类化=]
问题:当子类化tf.keras.Model
时,我们在__init__()
中定义网络层并在call()
中指定前向传递。当在 __init__()
中定义 tf.keras.layers.add
层时,必须指定两个参数(要添加的两个张量)。然而,那些张量只存在于call()
的范围内。在哪里以及如何为 tf.keras.layers.add()
?
提供参数
代码如下。请参阅以 # PROBLEM
结尾的行。我尝试创建实例变量用作 add()
的参数(例如 internalTensor
,这是通常写为 x
的内部“流”张量)但没有成功。
class Network_very_simple(tf.keras.Model):
def __init__(self, num_blocks):
super(Network_very_simple, self).__init__()
self.units_per_layer = 100
self.num_blocks = num_blocks
self.internalTensor = None
self.block_output_tensors = [None for _ in range(self.num_blocks)]
self.block_shortcut_tensors = [None for _ in range(self.num_blocks)]
# Individual layers
self.flatten_layer = tf.keras.layers.Flatten()
self.final_dense_layer = tf.keras.layers.Dense(1, activation='linear')
# Per-block layers
self.block_dense_layers = []
self.block_activations_0 = []
self.block_activations_1 = []
self.block_add_layers = []
for block_num in range(num_blocks):
self.block_dense_layers.append(tf.keras.layers.Dense(self.units_per_layer, activation=None))
self.block_activations_0.append(tf.keras.layers.Activation('selu'))
self.block_activations_1.append(tf.keras.layers.Activation('selu'))
self.block_add_layers.append(tf.keras.layers.add([self.block_shortcut_tensors[block_num], self.internalTensor])) # PROBLEM
def call(self, inputs):
input_tensor = self.flatten_layer(inputs)
for block_num in range(self.num_blocks):
if block_num == 0:
block_input_tensor = input_tensor
else:
block_input_tensor = self.block_output_tensors[block_num - 1]
self.internalTensor = self.block_dense_layers[block_num](block_input_tensor)
self.internalTensor = self.block_activations_0[block_num](self.internalTensor)
if block_num > 0: # Skip connection
self.block_shortcut_tensors[block_num] = self.block_output_tensors[block_num - 1]
self.internalTensor = self.block_add_layers[block_num]() # PROBLEM
self.internalTensor = self.block_activations_1(self.internalTensor)
self.block_output_tensors[block_num] = tf.keras.identity(self.internalTensor)
self.internalTensor = self.final_dense_layer(self.block_output_tensors[-1])
return(self.internalTensor)
两种解决方案:
- 使用
tf.keras.layers.Add
。注意大写 A
。您可以像任何其他层一样在 __init__
中定义此层,例如add_layer = tf.keras.layers.Add()
,然后在 call
中的两个输入列表中使用它,例如added = add_layer([x1, x2])
.
- 真的没必要用图层来做加法。只需在通话中执行
added = x1 + x2
即可。只有 Sequential
模型需要“一切”成为一个层。
Objective:构建一个带有skip connections的神经网络,其中block的数量是一个参数,我们对tf.keras.Model
.[=25进行子类化=]
问题:当子类化tf.keras.Model
时,我们在__init__()
中定义网络层并在call()
中指定前向传递。当在 __init__()
中定义 tf.keras.layers.add
层时,必须指定两个参数(要添加的两个张量)。然而,那些张量只存在于call()
的范围内。在哪里以及如何为 tf.keras.layers.add()
?
代码如下。请参阅以 # PROBLEM
结尾的行。我尝试创建实例变量用作 add()
的参数(例如 internalTensor
,这是通常写为 x
的内部“流”张量)但没有成功。
class Network_very_simple(tf.keras.Model):
def __init__(self, num_blocks):
super(Network_very_simple, self).__init__()
self.units_per_layer = 100
self.num_blocks = num_blocks
self.internalTensor = None
self.block_output_tensors = [None for _ in range(self.num_blocks)]
self.block_shortcut_tensors = [None for _ in range(self.num_blocks)]
# Individual layers
self.flatten_layer = tf.keras.layers.Flatten()
self.final_dense_layer = tf.keras.layers.Dense(1, activation='linear')
# Per-block layers
self.block_dense_layers = []
self.block_activations_0 = []
self.block_activations_1 = []
self.block_add_layers = []
for block_num in range(num_blocks):
self.block_dense_layers.append(tf.keras.layers.Dense(self.units_per_layer, activation=None))
self.block_activations_0.append(tf.keras.layers.Activation('selu'))
self.block_activations_1.append(tf.keras.layers.Activation('selu'))
self.block_add_layers.append(tf.keras.layers.add([self.block_shortcut_tensors[block_num], self.internalTensor])) # PROBLEM
def call(self, inputs):
input_tensor = self.flatten_layer(inputs)
for block_num in range(self.num_blocks):
if block_num == 0:
block_input_tensor = input_tensor
else:
block_input_tensor = self.block_output_tensors[block_num - 1]
self.internalTensor = self.block_dense_layers[block_num](block_input_tensor)
self.internalTensor = self.block_activations_0[block_num](self.internalTensor)
if block_num > 0: # Skip connection
self.block_shortcut_tensors[block_num] = self.block_output_tensors[block_num - 1]
self.internalTensor = self.block_add_layers[block_num]() # PROBLEM
self.internalTensor = self.block_activations_1(self.internalTensor)
self.block_output_tensors[block_num] = tf.keras.identity(self.internalTensor)
self.internalTensor = self.final_dense_layer(self.block_output_tensors[-1])
return(self.internalTensor)
两种解决方案:
- 使用
tf.keras.layers.Add
。注意大写A
。您可以像任何其他层一样在__init__
中定义此层,例如add_layer = tf.keras.layers.Add()
,然后在call
中的两个输入列表中使用它,例如added = add_layer([x1, x2])
. - 真的没必要用图层来做加法。只需在通话中执行
added = x1 + x2
即可。只有Sequential
模型需要“一切”成为一个层。