dimensions must equal error 但它们是相等的

Question

我在“discriminator_loss”函数中添加了打印以查看发生了什么。起初它会告诉我两者的形状都是 16。后来它告诉我“real_loss”的形状只有 15，而另一个保持 16。到目前为止，我只尝试降低批量大小并将它们增加 1等。我提供了代码中最相关的部分。如果需要，我可以提供其余代码。我不知道为什么会这样，它破坏了代码。

with strategy.scope():
  BATCH_SIZE = 16
  GLOBAL_BATCH_SIZE = 32#batchsize*# of gpus
  im_size = 256
  latent_size = 512
with strategy.scope():
  cross_entropy = tf.keras.losses.BinaryCrossentropy(
    from_logits=True,\
    reduction = tf.keras.losses.Reduction.NONE)

  #this is used to evaluate discriminators ability to discriminate
  def discriminator_loss(real_output, fake_output):
      real_loss = cross_entropy(tf.ones_like(real_output), real_output)#compares prediction to actual value of 1
      fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)#compares rediction to actual value of 0
      print(real_loss)
      print(fake_loss)
      total_loss = real_loss + fake_loss
      total_loss = total_loss/GLOBAL_BATCH_SIZE
      return total_loss


  #how well was generator able to trick discriminator
  def generator_loss(fake_output):
      gen_loss = cross_entropy(tf.ones_like(fake_output), fake_output)#compares predictions to the expected value 1 of a real image
      gen_loss = gen_loss / GLOBAL_BATCH_SIZE
      return gen_loss
with strategy.scope():
  EPOCHS = 80
  noise_dim = 512
  num_examples_to_generate = 32



# We will reuse this seed overtime (so it's easier)
# to visualize progress in the animated GIF)
with strategy.scope():
  def noise(n):
    return tf.random.normal([n, latent_size])

  def noiseImage(n):
    return tf.random.uniform([n, im_size, im_size, 1])
  #seed = tf.random.normal([num_examples_to_generate, noise_dim])



#seed used to generate image>the discriminator than classifies real images from training set and a set of generated images>loss is calculated and gradients are used to update the model
# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
with strategy.scope():
  #@tf.function
  def train_step(images):
      with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator((noise(BATCH_SIZE), noiseImage(BATCH_SIZE), np.ones([BATCH_SIZE,1])), training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        g_loss = generator_loss(fake_output)#runs generator loss
        d_loss = discriminator_loss(real_output, fake_output)#runs disc loss
            
      G_grads = gen_tape.gradient(g_loss, generator.trainable_variables)
      D_grads = disc_tape.gradient(d_loss, discriminator.trainable_variables)

      generator_optimizer.apply_gradients(zip(G_grads, generator.trainable_variables))
      discriminator_optimizer.apply_gradients(zip(D_grads, discriminator.trainable_variables))

      #run g_optim twice to make sure d_loss doesn't go to zero
      with tf.GradientTape() as gen_tape:
        generated_imgs = generator((noise(BATCH_SIZE), noiseImage(BATCH_SIZE), np.ones([BATCH_SIZE,1])), training=True)
        fake_output = discriminator(generated_imgs, training=True)
        g_loss = generator_loss(fake_output)

      G_grads = gen_tape.gradient(g_loss, generator.trainable_variables)
      generator_optimizer.apply_gradients(zip(G_grads, generator.trainable_variables))

      return g_loss, d_loss


  @tf.function
  def distributed_train_step(dist_dataset):
      per_replica_g_losses, per_replica_d_losses = strategy.run(train_step, args=(dist_dataset,))
      total_g_loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_g_losses,axis=0)
      total_d_loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_d_losses,axis=0)
      return total_g_loss, total_d_loss


with strategy.scope():
  def train(dist_dataset, epochs):
    for epoch in range(epochs):
      start = time.time()
      for image_batch in dist_dataset:
        total_g_loss, total_d_loss = distributed_train_step(image_batch)#runs train_step function


with strategy.scope():
  train(dist_dataset, EPOCHS)#in some cases can take up to 20000 epochs to train well

错误和回溯

Traceback (most recent call last):
  File "C:\image generator\pixiv\#image generator.py", line 507, in <module>
    train(dist_dataset, EPOCHS)#in some cases can take up to 20000 epochs to train well
  File "C:\image generator\pixiv\#image generator.py", line 441, in train
    total_g_loss, total_d_loss = distributed_train_step(image_batch)#runs train_step function
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 580, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 611, in _call
    return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\function.py", line 2419, in __call__
    graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\function.py", line 2777, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\function.py", line 2667, in _create_graph_function
    capture_by_value=self._capture_by_value),
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 981, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 441, in wrapped_fn
    return weak_wrapped_fn().__wrapped__(*args, **kwds)
  File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 968, in wrapper
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    C:\image generator\pixiv\#image generator.py:419 distributed_train_step  *
        per_replica_g_losses, per_replica_d_losses = strategy.run(train_step, args=(dist_dataset,))
    C:\image generator\pixiv\#image generator.py:393 train_step  *
        d_loss = discriminator_loss(real_output, fake_output)#runs disc loss
    C:\image generator\pixiv\#image generator.py:328 discriminator_loss  *
        total_loss = real_loss + fake_loss
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:984 binary_op_wrapper
        return func(x, y, name=name)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:1276 _add_dispatch
        return gen_math_ops.add_v2(x, y, name=name)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:483 add_v2
        "AddV2", x=x, y=y, name=name)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:744 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:595 _create_op_internal
        compute_device)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\ops.py:3327 _create_op_internal
        op_def=op_def)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\ops.py:1817 __init__
        control_input_ops, op_def)
    C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\ops.py:1657 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 0 and 2 for '{{node replica_1/add}} = AddV2[T=DT_FLOAT](replica_1/binary_crossentropy_1/weighted_loss/Mul, replica_1/binary_crossentropy_2/weighted_loss/Mul)' with input shapes: [0], [2].

Answer 1

所以根据问题在于批大小不相等，因为最终批小于指定的批大小。我相信这是由于这一行：

generated_images = generator((noise(BATCH_SIZE), noiseImage(BATCH_SIZE), np.ones([BATCH_SIZE,1])), training=True)

其中使用了常数大小 BATCH_SIZE，而不是批次的实际输入形状，因此 generated_images 的形状不同于 images。

所以提到的一种解决方案是简单地在 batch() 中使用 drop_remainder=True。但是，最好让生成器输出与输入形状相同的图像，因此不要将 BATCH_SIZE 作为参数传递给噪声生成函数，而应使用输入批次的实际大小。所以也许使用 tf.shape(images)[0] 会有所帮助。或者，您可以使用 BATCH_SIZE 生成一批固定的图像，然后简单地丢弃任何额外的图像，例如

num_images = tf.shape(images)[0]
generated_images = generated_images[:num_images]

dimensions must equal error 但它们是相等的

dimensions must equal error but they are equal

tensorflow

stylegan