Tensorflow/keras 中的维度和 sparse_categorical_crossentropy
Dimension in Tensorflow / keras and sparse_categorical_crossentropy
我无法理解如何使用 tensorflow 数据集作为我的模型的输入。我有一个 X 作为 (n_sample, max_sentence_size) 和一个 y 作为 (n_sample) 但我无法匹配尺寸,我不确定 tensorflow 在内部做了什么。
下面你可以找到一个空矩阵的可重现示例,但我的数据不是空的,它是文本的整数表示。
X_train = np.zeros((16, 6760))
y_train = np.zeros((16))
train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354
train = train.shuffle(BUFFER_SIZE)#.batch(BATCH_SIZE)
# Select index of interest in text
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64, mask_zero=False),
tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])
model.compile(loss="sparse_categorical_crossentropy",
# loss=tf.keras.losses.MeanAbsoluteError(),
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['sparse_categorical_accuracy'])
history = model.fit(train, epochs=3,
)
ValueError Traceback (most recent call last)
<ipython-input-74-3a160a5713dd> in <module>
----> 1 history = model.fit(train, epochs=3,
2 # validation_data=test,
3 # validation_steps=30
4 )
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
613 # This is the first call of __call__, so we have to initialize.
614 initializers = []
--> 615 self._initialize(args, kwds, add_initializers_to=initializers)
616 finally:
617 # At this point we know that the initialization is complete (or less
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
495 self._concrete_stateful_fn = (
496 self._stateful_fn._get_concrete_function_internal_garbage_collected(
# pylint: disable=protected-access
--> 497 *args, **kwds))
498
499 def invalid_creator_scope(*unused_args, **unused_kwds):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs)
2387 args, kwargs = None, None
2388 with self._lock:
-> 2389 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2390 return graph_function
2391
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2701
2702 self._function_cache.missed.add(call_context_key)
-> 2703 graph_function = self._create_graph_function(args, kwargs)
2704 self._function_cache.primary[cache_key] = graph_function
2705 return graph_function, args, kwargs
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2591 arg_names=arg_names,
2592 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593 capture_by_value=self._capture_by_value),
2594 self._function_attributes,
2595 # Tell the ConcreteFunction to clean up its graph once it goes out of
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
976 converted_func)
977
--> 978 func_outputs = python_func(*func_args, **func_kwargs)
979
980 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
437 # __wrapped__ allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().__wrapped__(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
83 args = _prepare_feed_values(model, input_iterator, mode, strategy)
84 outputs = strategy.experimental_run_v2(
---> 85 per_replica_function, args=args)
86 # Out of PerReplica outputs reduce or pick values to return.
87 all_outputs = dist_utils.unwrap_output_dict(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
761 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
762 convert_by_default=False)
--> 763 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
764
765 def reduce(self, reduce_op, value, axis):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1817 kwargs = {}
1818 with self._container_strategy().scope():
-> 1819 return self._call_for_each_replica(fn, args, kwargs)
1820
1821 def _call_for_each_replica(self, fn, args, kwargs):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2162 self._container_strategy(),
2163 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164 return fn(*args, **kwargs)
2165
2166 def _reduce_to(self, reduce_op, value, destinations):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
431 y,
432 sample_weights=sample_weights,
--> 433 output_loss_metrics=model._output_loss_metrics)
434
435 if reset_metrics:
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
310 sample_weights=sample_weights,
311 training=True,
--> 312 output_loss_metrics=output_loss_metrics))
313 if not isinstance(outs, list):
314 outs = [outs]
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
251 output_loss_metrics=output_loss_metrics,
252 sample_weights=sample_weights,
--> 253 training=training))
254 if total_loss is None:
255 raise ValueError('The model cannot be run '
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
165
166 if hasattr(loss_fn, 'reduction'):
--> 167 per_sample_losses = loss_fn.call(targets[i], outs[i])
168 weighted_losses = losses_utils.compute_weighted_loss(
169 per_sample_losses,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in call(self, y_true, y_pred)
219 y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
220 y_pred, y_true)
--> 221 return self.fn(y_true, y_pred, **self._fn_kwargs)
222
223 def get_config(self):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in sparse_categorical_crossentropy(y_true, y_pred, from_logits, axis)
976 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
977 return K.sparse_categorical_crossentropy(
--> 978 y_true, y_pred, from_logits=from_logits, axis=axis)
979
980
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in sparse_categorical_crossentropy(target, output, from_logits, axis)
4571 with get_graph().as_default():
4572 res = nn.sparse_softmax_cross_entropy_with_logits_v2(
-> 4573 labels=target, logits=output)
4574 else:
4575 res = nn.sparse_softmax_cross_entropy_with_logits_v2(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits_v2(labels, logits, name)
3535 """
3536 return sparse_softmax_cross_entropy_with_logits(
-> 3537 labels=labels, logits=logits, name=name)
3538
3539
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits(_sentinel, labels, logits, name)
3451 "should equal the shape of logits except for the last "
3452 "dimension (received %s)." % (labels_static_shape,
-> 3453 logits.get_shape()))
3454 # Check if no reshapes are required.
3455 if logits.get_shape().ndims == 2:
ValueError: Shape mismatch: The shape of labels (received (1,)) should equal the shape of logits except for the last dimension (received (6760, 5354)).
这适用于 Tensorflow 2.0。
import numpy as np
# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354
X_train = np.zeros((16,6760))
y_train = np.zeros((16,1)) # This is changed
train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train = train.shuffle(BUFFER_SIZE).batch(8) # This is changed
# Select index of interest in text
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64,input_length= 6760, mask_zero=False),
tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])
print(model.summary())
model.compile(loss="sparse_categorical_crossentropy",
# loss=tf.keras.losses.MeanAbsoluteError(),
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['sparse_categorical_accuracy'])
history = model.fit(train, epochs=3)
有同样问题的人,我没有马上理解rajesh的变化,问题是没有batch维度。
我替换了:
train = train.shuffle(BUFFER_SIZE) #.batch(BATCH_SIZE)
with(未注释 "batch"):
train = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
成功了。
我无法理解如何使用 tensorflow 数据集作为我的模型的输入。我有一个 X 作为 (n_sample, max_sentence_size) 和一个 y 作为 (n_sample) 但我无法匹配尺寸,我不确定 tensorflow 在内部做了什么。
下面你可以找到一个空矩阵的可重现示例,但我的数据不是空的,它是文本的整数表示。
X_train = np.zeros((16, 6760))
y_train = np.zeros((16))
train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354
train = train.shuffle(BUFFER_SIZE)#.batch(BATCH_SIZE)
# Select index of interest in text
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64, mask_zero=False),
tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])
model.compile(loss="sparse_categorical_crossentropy",
# loss=tf.keras.losses.MeanAbsoluteError(),
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['sparse_categorical_accuracy'])
history = model.fit(train, epochs=3,
)
ValueError Traceback (most recent call last)
<ipython-input-74-3a160a5713dd> in <module>
----> 1 history = model.fit(train, epochs=3,
2 # validation_data=test,
3 # validation_steps=30
4 )
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
613 # This is the first call of __call__, so we have to initialize.
614 initializers = []
--> 615 self._initialize(args, kwds, add_initializers_to=initializers)
616 finally:
617 # At this point we know that the initialization is complete (or less
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
495 self._concrete_stateful_fn = (
496 self._stateful_fn._get_concrete_function_internal_garbage_collected(
# pylint: disable=protected-access
--> 497 *args, **kwds))
498
499 def invalid_creator_scope(*unused_args, **unused_kwds):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs)
2387 args, kwargs = None, None
2388 with self._lock:
-> 2389 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2390 return graph_function
2391
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2701
2702 self._function_cache.missed.add(call_context_key)
-> 2703 graph_function = self._create_graph_function(args, kwargs)
2704 self._function_cache.primary[cache_key] = graph_function
2705 return graph_function, args, kwargs
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2591 arg_names=arg_names,
2592 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593 capture_by_value=self._capture_by_value),
2594 self._function_attributes,
2595 # Tell the ConcreteFunction to clean up its graph once it goes out of
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
976 converted_func)
977
--> 978 func_outputs = python_func(*func_args, **func_kwargs)
979
980 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
437 # __wrapped__ allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().__wrapped__(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
83 args = _prepare_feed_values(model, input_iterator, mode, strategy)
84 outputs = strategy.experimental_run_v2(
---> 85 per_replica_function, args=args)
86 # Out of PerReplica outputs reduce or pick values to return.
87 all_outputs = dist_utils.unwrap_output_dict(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
761 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
762 convert_by_default=False)
--> 763 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
764
765 def reduce(self, reduce_op, value, axis):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1817 kwargs = {}
1818 with self._container_strategy().scope():
-> 1819 return self._call_for_each_replica(fn, args, kwargs)
1820
1821 def _call_for_each_replica(self, fn, args, kwargs):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2162 self._container_strategy(),
2163 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164 return fn(*args, **kwargs)
2165
2166 def _reduce_to(self, reduce_op, value, destinations):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
431 y,
432 sample_weights=sample_weights,
--> 433 output_loss_metrics=model._output_loss_metrics)
434
435 if reset_metrics:
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
310 sample_weights=sample_weights,
311 training=True,
--> 312 output_loss_metrics=output_loss_metrics))
313 if not isinstance(outs, list):
314 outs = [outs]
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
251 output_loss_metrics=output_loss_metrics,
252 sample_weights=sample_weights,
--> 253 training=training))
254 if total_loss is None:
255 raise ValueError('The model cannot be run '
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
165
166 if hasattr(loss_fn, 'reduction'):
--> 167 per_sample_losses = loss_fn.call(targets[i], outs[i])
168 weighted_losses = losses_utils.compute_weighted_loss(
169 per_sample_losses,
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in call(self, y_true, y_pred)
219 y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
220 y_pred, y_true)
--> 221 return self.fn(y_true, y_pred, **self._fn_kwargs)
222
223 def get_config(self):
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in sparse_categorical_crossentropy(y_true, y_pred, from_logits, axis)
976 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
977 return K.sparse_categorical_crossentropy(
--> 978 y_true, y_pred, from_logits=from_logits, axis=axis)
979
980
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in sparse_categorical_crossentropy(target, output, from_logits, axis)
4571 with get_graph().as_default():
4572 res = nn.sparse_softmax_cross_entropy_with_logits_v2(
-> 4573 labels=target, logits=output)
4574 else:
4575 res = nn.sparse_softmax_cross_entropy_with_logits_v2(
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits_v2(labels, logits, name)
3535 """
3536 return sparse_softmax_cross_entropy_with_logits(
-> 3537 labels=labels, logits=logits, name=name)
3538
3539
/opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits(_sentinel, labels, logits, name)
3451 "should equal the shape of logits except for the last "
3452 "dimension (received %s)." % (labels_static_shape,
-> 3453 logits.get_shape()))
3454 # Check if no reshapes are required.
3455 if logits.get_shape().ndims == 2:
ValueError: Shape mismatch: The shape of labels (received (1,)) should equal the shape of logits except for the last dimension (received (6760, 5354)).
这适用于 Tensorflow 2.0。
import numpy as np
# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354
X_train = np.zeros((16,6760))
y_train = np.zeros((16,1)) # This is changed
train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train = train.shuffle(BUFFER_SIZE).batch(8) # This is changed
# Select index of interest in text
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64,input_length= 6760, mask_zero=False),
tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])
print(model.summary())
model.compile(loss="sparse_categorical_crossentropy",
# loss=tf.keras.losses.MeanAbsoluteError(),
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['sparse_categorical_accuracy'])
history = model.fit(train, epochs=3)
有同样问题的人,我没有马上理解rajesh的变化,问题是没有batch维度。
我替换了:
train = train.shuffle(BUFFER_SIZE) #.batch(BATCH_SIZE)
with(未注释 "batch"):
train = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
成功了。