无法创建组（名称已存在）

Question

import tensorflow as tf
from ..models.encoder import encoder_build
from ..models.decoder import decoder_build

def compute_attention_maps(inputs,name,upsample=False):

    attMap = tf.reduce_sum(tf.square(inputs),axis=-1,keepdims=True,name= str(name)+"reducSum") 
    if upsample:
        attMap = tf.keras.layers.UpSampling2D(size=(2, 2), 
                                              interpolation='bilinear',
                                              name = str(name)+"bilinear")(attMap)
    attMap = tf.squeeze(attMap,axis=-1,name = str(name)+"squeeze")
    attMap = tf.reshape(attMap,
                        (tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]),
                        name = str(name)+"reshape")
    attMap = tf.nn.softmax(attMap, 
                           axis=-1,
                           name = str(name)+"spatialSoftmax")
    return attMap

def compute_mse(x,y,name):

    diff = tf.math.squared_difference(x,y,name = str(name)+"squError")
    diff = tf.reduce_mean(diff,axis=0, name = str(name)+"mean")
    diff = tf.reduce_sum(diff, name = str(name)+"sum")
    return diff

def compute_distillation(attention_inputs):
    inp1,inp2,inp3,inp4 = attention_inputs 

    attMap1          = compute_attention_maps(inp1,"attmap1_")
    attMap2_upsample = compute_attention_maps(inp2,"attmap2UP_",upsample=True)
    attMap2          = compute_attention_maps(inp2,"attmap2_")
    attMap3_upsample = compute_attention_maps(inp3,"attmap3UP_",upsample=True)
    attMap3          = compute_attention_maps(inp3,"attmap3_")
    attMap4          = compute_attention_maps(inp4,"attmap4_")

    distillation1 = compute_mse(attMap1,attMap2_upsample,"distil1_")
    distillation2 = compute_mse(attMap2,attMap3_upsample,"distil2_")
    distillation3 = compute_mse(attMap3,attMap4,"distil3_")
    return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")

if __name__ == '__main__':
    inputs = tf.keras.layers.Input(shape=(None, None, 3), name='image')
    encoderTuple = encoder_build(inputs) # import from encoder.py file
    attention_inputs = encoderTuple[1]
    outputs = decoder_build(encoderTuple) # import from decoder.py file
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    model.add_loss(compute_distillation(attention_inputs))
    model.summary()
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001, clipnorm=0.001), 
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
    model.fit(x = train_generator,
          epochs=epochs, 
          verbose=1, 
          callbacks=callbacks,
          validation_data=validation_generator, 
          shuffle=True)

我已经创建了用于车道检测的 keras 分割模型 (https://arxiv.org/pdf/1908.00821.pdf)。我能够为每个时期编译、开始训练和保存模型，而不会出现任何错误。但是，如果我将我的自定义损失添加到模型 model.add_loss(compute_distillation(attention_inputs)) 模型得到 1 个时期的训练，之后该模型不是保存并显示以下错误。如何解决这个错误？

374/375 [============================>.] - ETA: 0s - loss: 4.4717 - acc: 0.9781Epoch 1/50
 78/78[============================>.] - ETA: 37:38 - val_loss: 4.5855 - val_acc: 0.9758
Epoch 00001: saving model to /workspace/work/enet_sad_naiveresize/snapshot/enetNRSAD_Tusimple_L_4.4718_VL_4.5855.h5
Traceback (most recent call last):
  File "/workspace/work/enet_sad_naiveresize/bin/train.py", line 82, in <module>
    shuffle=True)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py", line 727, in fit
    use_multiprocessing=use_multiprocessing)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 603, in fit
    steps_name='steps_per_epoch')
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_generator.py", line 332, in model_iteration
    callbacks.on_epoch_end(epoch, epoch_logs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 299, in on_epoch_end
    callback.on_epoch_end(epoch, logs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 968, in on_epoch_end
    self._save_model(epoch=epoch, logs=logs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/callbacks.py", line 1015, in _save_model
    self.model.save(filepath, overwrite=True)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py", line 1171, in save
    signatures)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/save.py", line 109, in save_model
    model, filepath, overwrite, include_optimizer)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 103, in save_model_to_hdf5
    save_weights_to_hdf5_group(model_weights_group, model_layers)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 619, in save_weights_to_hdf5_group
    g = f.create_group(layer.name)
  File "/usr/local/lib/python3.6/dist-packages/h5py/_hl/group.py", line 68, in create_group
    gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5g.pyx", line 161, in h5py.h5g.create
ValueError: Unable to create group (name already exists)

Answer 1

在 keras github 中打开了一些与此相关的问题。

https://github.com/keras-team/keras/issues/6005
https://github.com/keras-team/keras/issues/12195

这个问题不是由于自定义损失函数引起的，而是由于模型的定义方式引起的。您可以尝试上述链接中提供的解决方案，例如将模型权重保存为 tf 文件而不是 h5，或者避免在模型的多个位置添加相同的激活层实例。如果这不能解决您的问题，请更新问题以包含模型。

Answer 2

问题是因为您通过调用 compute_attention_maps 和 compute_mse 等其他函数在 compute_distillation 函数中堆叠图层（并错误地命名它们）。如果您还没有命名，您将得到一个类似的层，并且即使在您命名它们之后错误仍然存在的事实是因为 h5 模型需要某种格式的名称，如此处 https://www.gitmemory.com/issue/keras-team/keras/12195/523749332 所述。一个好的解决方案是在 compute_distilation 函数中使用 keras lambda 层来创建 attMap1、attMap2 等，或者定义您自己的自定义 AttentionMap 层，如下所示。

class AttentionMaps(tf.keras.layers.Layer):
  def __init__(self, upsample=False):
    super(AttentionMaps, self).__init__()
    self.upsample = upsample

  def call(self, inputs):
    attMap = tf.reduce_sum(
        tf.square(inputs),
        axis=-1,
        keepdims=True
    ) 
    if self.upsample:
        attMap = tf.keras.layers.UpSampling2D(
            size=(2, 2), 
            interpolation='bilinear'
        )(attMap)
    attMap = tf.squeeze(attMap,axis=-1)
    attMap = tf.reshape(
        attMap,
        (tf.shape(attMap)[0],tf.shape(attMap)[1]*tf.shape(attMap)[2]))
    attMap = tf.nn.softmax(attMap, 
                            axis=-1,)

    return attMap

然后可以按照以下示例将此自定义层添加到您的模型中。不再需要图层的名称，所以我删除了它们。

def compute_distillation(attention_inputs):

    inp1,inp2,inp3,inp4 = attention_inputs
    attention_layer_1 = AttentionMaps()
    attMap1          = attention_layer_1(inp1)
    attention_layer_2 = AttentionMaps(upsample=True)
    attMap2_upsample = attention_layer_2(inp2)
    attention_layer_3 = AttentionMaps()
    attMap2          = attention_layer_3(inp2)
    attention_layer_4 = AttentionMaps(upsample=True)
    attMap3_upsample = attention_layer_4(inp3)
    attention_layer_5 = AttentionMaps()
    attMap3          = attention_layer_5(inp3)
    attention_layer_6 = AttentionMaps(upsample=True)
    attMap4_upsample = attention_layer_6(inp4)

    distillation1 = compute_mse(attMap1,attMap2_upsample)
    distillation2 = compute_mse(attMap2,attMap3_upsample)
    distillation3 = compute_mse(attMap3,attMap4_upsample)

    return tf.math.add_n([distillation1,distillation2,distillation3], name="distill_loss")

无法创建组（名称已存在）

Unable to create group (name already exists)

image-segmentation

h5py

tensorflow

attention-model

tf.keras