ValueError: logits and labels must have the same shape ((1, 21) vs (21, 1))
ValueError: logits and labels must have the same shape ((1, 21) vs (21, 1))
我正在尝试使用 huggingface TFBertModel
重现 this 示例来执行分类任务。
我的模型与示例几乎相同,但我正在执行多标签分类。出于这个原因,我使用 sklearn 的 MultiLabelBinarizer
.
对我的标签进行了二值化
然后,我调整了我的模型以进行相应的预测。
def loadBertModel(max_length,n_classes):
bert_model = TFBertModel.from_pretrained('bert-base-multilingual-uncased')
input_ids = keras.Input(shape=(max_length,), dtype=np.int32)
attention_mask = keras.Input(shape=(max_length,), dtype=np.int32)
token_type_ids = keras.Input(shape=(max_length,), dtype=np.int32)
_, output = bert_model([input_ids, attention_mask,token_type_ids])
output = keras.layers.Dense(n_classes, activation="sigmoid", name="dense_out_dom")(output)
model = keras.Model(
inputs=[input_ids, attention_mask,token_type_ids],
outputs=output,
name='bert_classifier',
)
model.compile(
optimizer=Adam(lr=2e-5),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
)
model.summary()
return model
此外,我正在使用 tensorflow 的 Dataset
来生成模型的输入:
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
return {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_masks,
}, label
def tokenize_sequences(tokenizer, max_length, corpus, labels):
input_ids = []
token_type_ids = []
attention_masks = []
for i in tqdm(range(len(corpus))):
encoded = tokenizer.encode_plus(
corpus[i],
max_length=max_length,
add_special_tokens=True,
padding='max_length',
truncation=True,
return_token_type_ids=True,
return_attention_mask=True, # add attention mask to not focus on pad tokens)
return_tensors="tf"
)
input_ids.append(encoded["input_ids"])
attention_masks.append(encoded["attention_mask"])
token_type_ids.append(encoded["token_type_ids"])
input_ids = tf.convert_to_tensor(input_ids)
attention_masks = tf.convert_to_tensor(attention_masks)
token_type_ids = tf.convert_to_tensor(token_type_ids)
labels = labels.toarray()
return tf.data.Dataset.from_tensor_slices((input_ids, attention_masks, token_type_ids, labels)).map(map_example_to_dict)
最后,当我尝试拟合我的模型时,我发现 logits 和标签的形状不一致:
ValueError: logits and labels must have the same shape ((1, 21) vs (21, 1))
我真的不知道 Dataset
转换是否干扰了我输入的形状,或者我是否遗漏了一些其他细节。有什么想法吗?
完整堆栈跟踪:
ValueError Traceback(最后一次调用)
<ipython-input-42-19f4c0665eeb> in <module>()
4 epochs=N_EPOCHS,
5 verbose=1,
----> 6 batch_size=1,
7 )
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
695 self._concrete_stateful_fn = (
696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 697 *args, **kwds))
698
699 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3073 arg_names=arg_names,
3074 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3075 capture_by_value=self._capture_by_value),
3076 self._function_attributes,
3077 function_spec=self.function_spec,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:749 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:149 __call__
losses = ag_call(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:253 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1605 binary_crossentropy
K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4814 binary_crossentropy
return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:174 sigmoid_cross_entropy_with_logits
(logits.get_shape(), labels.get_shape()))
ValueError: logits and labels must have the same shape ((1, 21) vs (21, 1))
对于单个示例,您的标签似乎具有 (21,1)
的形状,表示 21 个数据点。相反,您有 1 个数据点和 21 个可能的标签。因此,它应该是 (1,21)
。您必须相应地重塑数据。
我正在尝试使用 huggingface TFBertModel
重现 this 示例来执行分类任务。
我的模型与示例几乎相同,但我正在执行多标签分类。出于这个原因,我使用 sklearn 的 MultiLabelBinarizer
.
然后,我调整了我的模型以进行相应的预测。
def loadBertModel(max_length,n_classes):
bert_model = TFBertModel.from_pretrained('bert-base-multilingual-uncased')
input_ids = keras.Input(shape=(max_length,), dtype=np.int32)
attention_mask = keras.Input(shape=(max_length,), dtype=np.int32)
token_type_ids = keras.Input(shape=(max_length,), dtype=np.int32)
_, output = bert_model([input_ids, attention_mask,token_type_ids])
output = keras.layers.Dense(n_classes, activation="sigmoid", name="dense_out_dom")(output)
model = keras.Model(
inputs=[input_ids, attention_mask,token_type_ids],
outputs=output,
name='bert_classifier',
)
model.compile(
optimizer=Adam(lr=2e-5),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
)
model.summary()
return model
此外,我正在使用 tensorflow 的 Dataset
来生成模型的输入:
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
return {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_masks,
}, label
def tokenize_sequences(tokenizer, max_length, corpus, labels):
input_ids = []
token_type_ids = []
attention_masks = []
for i in tqdm(range(len(corpus))):
encoded = tokenizer.encode_plus(
corpus[i],
max_length=max_length,
add_special_tokens=True,
padding='max_length',
truncation=True,
return_token_type_ids=True,
return_attention_mask=True, # add attention mask to not focus on pad tokens)
return_tensors="tf"
)
input_ids.append(encoded["input_ids"])
attention_masks.append(encoded["attention_mask"])
token_type_ids.append(encoded["token_type_ids"])
input_ids = tf.convert_to_tensor(input_ids)
attention_masks = tf.convert_to_tensor(attention_masks)
token_type_ids = tf.convert_to_tensor(token_type_ids)
labels = labels.toarray()
return tf.data.Dataset.from_tensor_slices((input_ids, attention_masks, token_type_ids, labels)).map(map_example_to_dict)
最后,当我尝试拟合我的模型时,我发现 logits 和标签的形状不一致:
ValueError: logits and labels must have the same shape ((1, 21) vs (21, 1))
我真的不知道 Dataset
转换是否干扰了我输入的形状,或者我是否遗漏了一些其他细节。有什么想法吗?
完整堆栈跟踪:
ValueError Traceback(最后一次调用)
<ipython-input-42-19f4c0665eeb> in <module>()
4 epochs=N_EPOCHS,
5 verbose=1,
----> 6 batch_size=1,
7 )
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
695 self._concrete_stateful_fn = (
696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 697 *args, **kwds))
698
699 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3073 arg_names=arg_names,
3074 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3075 capture_by_value=self._capture_by_value),
3076 self._function_attributes,
3077 function_spec=self.function_spec,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:749 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:149 __call__
losses = ag_call(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:253 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1605 binary_crossentropy
K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4814 binary_crossentropy
return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:174 sigmoid_cross_entropy_with_logits
(logits.get_shape(), labels.get_shape()))
ValueError: logits and labels must have the same shape ((1, 21) vs (21, 1))
对于单个示例,您的标签似乎具有 (21,1)
的形状,表示 21 个数据点。相反,您有 1 个数据点和 21 个可能的标签。因此,它应该是 (1,21)
。您必须相应地重塑数据。