Tensorflow/keras 中的维度和 sparse_categorical_crossentropy

Question

我无法理解如何使用 tensorflow 数据集作为我的模型的输入。我有一个 X 作为 (n_sample, max_sentence_size) 和一个 y 作为 (n_sample) 但我无法匹配尺寸，我不确定 tensorflow 在内部做了什么。

下面你可以找到一个空矩阵的可重现示例，但我的数据不是空的，它是文本的整数表示。

X_train = np.zeros((16, 6760))
y_train = np.zeros((16))

train = tf.data.Dataset.from_tensor_slices((X_train, y_train))

# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354

train = train.shuffle(BUFFER_SIZE)#.batch(BATCH_SIZE)



# Select index of interest in text
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64, mask_zero=False),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])

model.compile(loss="sparse_categorical_crossentropy",
              # loss=tf.keras.losses.MeanAbsoluteError(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['sparse_categorical_accuracy'])


history = model.fit(train, epochs=3,
                   )

    ValueError                                Traceback (most recent call last)
    <ipython-input-74-3a160a5713dd> in <module>
    ----> 1 history = model.fit(train, epochs=3,
          2                     # validation_data=test,
          3                     # validation_steps=30
          4                    )
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
        817         max_queue_size=max_queue_size,
        818         workers=workers,
    --> 819         use_multiprocessing=use_multiprocessing)
        820 
        821   def evaluate(self,
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
        340                 mode=ModeKeys.TRAIN,
        341                 training_context=training_context,
    --> 342                 total_epochs=epochs)
        343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
        344 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
        126         step=step, mode=mode, size=current_batch_size) as batch_logs:
        127       try:
    --> 128         batch_outs = execution_function(iterator)
        129       except (StopIteration, errors.OutOfRangeError):
        130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
         96     # `numpy` translates Tensors to values in Eager mode.
         97     return nest.map_structure(_non_none_constant_value,
    ---> 98                               distributed_function(input_fn))
         99 
        100   return execution_function
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
        566         xla_context.Exit()
        567     else:
    --> 568       result = self._call(*args, **kwds)
        569 
        570     if tracing_count == self._get_tracing_count():
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
        613       # This is the first call of __call__, so we have to initialize.
        614       initializers = []
    --> 615       self._initialize(args, kwds, add_initializers_to=initializers)
        616     finally:
        617       # At this point we know that the initialization is complete (or less
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
        495     self._concrete_stateful_fn = (
        496         self._stateful_fn._get_concrete_function_internal_garbage_collected( 
# pylint: disable=protected-access
    --> 497             *args, **kwds))
        498 
        499     def invalid_creator_scope(*unused_args, **unused_kwds):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs)
       2387       args, kwargs = None, None
       2388     with self._lock:
    -> 2389       graph_function, _, _ = self._maybe_define_function(args, kwargs)
       2390     return graph_function
       2391 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
       2701 
       2702       self._function_cache.missed.add(call_context_key)
    -> 2703       graph_function = self._create_graph_function(args, kwargs)
       2704       self._function_cache.primary[cache_key] = graph_function
       2705       return graph_function, args, kwargs
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
       2591             arg_names=arg_names,
       2592             override_flat_arg_shapes=override_flat_arg_shapes,
    -> 2593             capture_by_value=self._capture_by_value),
       2594         self._function_attributes,
       2595         # Tell the ConcreteFunction to clean up its graph once it goes out of
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
        976                                           converted_func)
        977 
    --> 978       func_outputs = python_func(*func_args, **func_kwargs)
        979 
        980       # invariant: `func_outputs` contains only Tensors, CompositeTensors,
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
        437         # __wrapped__ allows AutoGraph to swap in a converted function. We give
        438         # the function a weak reference to itself to avoid a reference cycle.
    --> 439         return weak_wrapped_fn().__wrapped__(*args, **kwds)
        440     weak_wrapped_fn = weakref.ref(wrapped_fn)
        441 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
         83     args = _prepare_feed_values(model, input_iterator, mode, strategy)
         84     outputs = strategy.experimental_run_v2(
    ---> 85         per_replica_function, args=args)
         86     # Out of PerReplica outputs reduce or pick values to return.
         87     all_outputs = dist_utils.unwrap_output_dict(
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
        761       fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
        762                                 convert_by_default=False)
    --> 763       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
        764 
        765   def reduce(self, reduce_op, value, axis):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
       1817       kwargs = {}
       1818     with self._container_strategy().scope():
    -> 1819       return self._call_for_each_replica(fn, args, kwargs)
       1820 
       1821   def _call_for_each_replica(self, fn, args, kwargs):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
       2162         self._container_strategy(),
       2163         replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
    -> 2164       return fn(*args, **kwargs)
       2165 
       2166   def _reduce_to(self, reduce_op, value, destinations):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
        290   def wrapper(*args, **kwargs):
        291     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
    --> 292       return func(*args, **kwargs)
        293 
        294   if inspect.isfunction(func) or inspect.ismethod(func):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
        431       y,
        432       sample_weights=sample_weights,
    --> 433       output_loss_metrics=model._output_loss_metrics)
        434 
        435   if reset_metrics:
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
        310           sample_weights=sample_weights,
        311           training=True,
    --> 312           output_loss_metrics=output_loss_metrics))
        313   if not isinstance(outs, list):
        314     outs = [outs]
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
        251               output_loss_metrics=output_loss_metrics,
        252               sample_weights=sample_weights,
    --> 253               training=training))
        254       if total_loss is None:
        255         raise ValueError('The model cannot be run '
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
        165 
        166         if hasattr(loss_fn, 'reduction'):
    --> 167           per_sample_losses = loss_fn.call(targets[i], outs[i])
        168           weighted_losses = losses_utils.compute_weighted_loss(
        169               per_sample_losses,
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in call(self, y_true, y_pred)
        219       y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
        220           y_pred, y_true)
    --> 221     return self.fn(y_true, y_pred, **self._fn_kwargs)
        222 
        223   def get_config(self):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in sparse_categorical_crossentropy(y_true, y_pred, from_logits, axis)
        976 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
        977   return K.sparse_categorical_crossentropy(
    --> 978       y_true, y_pred, from_logits=from_logits, axis=axis)
        979 
        980 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in sparse_categorical_crossentropy(target, output, from_logits, axis)
       4571     with get_graph().as_default():
       4572       res = nn.sparse_softmax_cross_entropy_with_logits_v2(
    -> 4573           labels=target, logits=output)
       4574   else:
       4575     res = nn.sparse_softmax_cross_entropy_with_logits_v2(
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits_v2(labels, logits, name)
       3535   """
       3536   return sparse_softmax_cross_entropy_with_logits(
    -> 3537       labels=labels, logits=logits, name=name)
       3538 
       3539 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits(_sentinel, labels, logits, name)
       3451                        "should equal the shape of logits except for the last "
       3452                        "dimension (received %s)." % (labels_static_shape,
    -> 3453                                                      logits.get_shape()))
       3454     # Check if no reshapes are required.
       3455     if logits.get_shape().ndims == 2:
    
    ValueError: Shape mismatch: The shape of labels (received (1,)) should equal the shape of logits except for the last dimension (received (6760, 5354)).

Answer 1

这适用于 Tensorflow 2.0。

import numpy as np

# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354


X_train = np.zeros((16,6760))
y_train = np.zeros((16,1))  # This is changed
train = tf.data.Dataset.from_tensor_slices((X_train, y_train))

train = train.shuffle(BUFFER_SIZE).batch(8) # This is changed

# Select index of interest in text

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64,input_length= 6760, mask_zero=False),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])

print(model.summary())

model.compile(loss="sparse_categorical_crossentropy",
              # loss=tf.keras.losses.MeanAbsoluteError(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['sparse_categorical_accuracy'])

history = model.fit(train, epochs=3)

Answer 2

有同样问题的人，我没有马上理解rajesh的变化，问题是没有batch维度。

我替换了：

train = train.shuffle(BUFFER_SIZE)  #.batch(BATCH_SIZE)

with（未注释 "batch"）：

train = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

成功了。

Tensorflow/keras 中的维度和 sparse_categorical_crossentropy

Dimension in Tensorflow / keras and sparse_categorical_crossentropy

nlp

keras

tensorflow

cross-entropy