keras model.fit ValueError: The outer 2 dimensions of indices.shape=[1,11,1] must match the outer 2 dimensions of updates.shape=[2]
keras model.fit ValueError: The outer 2 dimensions of indices.shape=[1,11,1] must match the outer 2 dimensions of updates.shape=[2]
我正在使用自定义损失和评估指标训练 keras 模型。它在没有度量的情况下进行训练。但是当我尝试像这样训练时它给出了以下错误:
model.compile(optimizer= keras.optimizers.Adam(learning_rate = 1e-3), loss = inner_product, metrics=dice_index_metric)
model.fit([X_train], [y_train], epochs=50, batch_size = 1, validation_split=0.2,
callbacks = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5), verbose=2)
错误:
ValueError Traceback (most recent call last)
<ipython-input-38-270dbe25d468> in <module>
----> 1 hist = model.fit([X_train], [y_train1], epochs=50, batch_size = 1, validation_split=0.2,
2 callbacks = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5), verbose=2)
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
694 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
695 self._concrete_stateful_fn = (
--> 696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
697 *args, **kwds))
698
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3063 arg_names = base_arg_names + missing_arg_names
3064 graph_function = ConcreteFunction(
-> 3065 func_graph_module.func_graph_from_py_func(
3066 self._name,
3067 self._python_function,
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
<ipython-input-36-2c54f0983574>:5 dice_index_metric *
y_pred1 = tf.scatter_nd(ind, updates, tf.shape(y_pred))
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\ops\gen_array_ops.py:8855 scatter_nd **
_, _, _op, _outputs = _op_def_library._apply_op_helper(
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\op_def_library.py:742 _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\func_graph.py:591 _create_op_internal
return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:3477 _create_op_internal
ret = Operation(
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:1974 __init__
self._c_op = _create_c_op(self._graph, node_def, inputs,
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
raise ValueError(str(e))
ValueError: The outer 2 dimensions of indices.shape=[1,11,1] must match the outer 2 dimensions of updates.shape=[2]: Shapes must be equal rank, but are 2 and 1 for '{{node ScatterNd}} = ScatterNd[T=DT_INT32, Tindices=DT_INT32](strided_slice_2, Const_3, Shape_1)' with input shapes: [1,11,1], [2], [2].
自定义指标如下:
def dice_index_metric(y_true, y_pred):
ind = tf.argsort(y_pred,axis=-1,direction='ASCENDING',stable=False,name=None)[-2:]
ind = ind[..., tf.newaxis]
updates = tf.constant([1, 1])
y_pred1 = tf.scatter_nd(ind, updates, tf.shape(y_pred))
innerproduct = tf.minimum(y_true, y_pred1)
innerproduct = tf.reduce_sum(innerproduct)
union= tf.maximum(y_true, y_pred1)
union = tf.reduce_sum(union)
return innerproduct/union
自定义指标将预测向量转换为预测中前 2 个元素为 1,其他元素为 0 的向量,然后将其与真值进行比较并计算它们的(交集数)/(联合数)所以让我们说预测来自模型是:
pred = [0.01, 0.3, 0,4 0.01, 0.01, 0.2, 0.02, 0.05],
前 2 个值是 0.3 和 0.4,索引为 1、2。那么我应该推荐这个:
推荐 = [0, 1, 1, 0, 0, 0, 0, 0],
如果真值如下,则它们的交集仅为索引 2,而它们的并集为 [1,2,3],那么我应该 return 1/3。
真实 = [0, 0, 1, 1, 0, 0, 0, 0]
型号:
inputs = keras.Input(shape =(None,23))
features = layers.LSTM(100)(inputs)
next = layers.Dense(11, activation=activations.sigmoid)(features)
next = layers.Softmax()(next)
model = keras.Model(inputs=[inputs] , outputs=next, name="LSTMmodel2")
首先,来自您的代码
ind = tf.argsort(y_pred,axis=-1,direction='ASCENDING',stable=False,name=None)[-2:]
你忘了y_pred是批量处理的。这意味着 y_pred 的形状不是 [11,] 而是 [N,11],并且根据您的错误消息假设批量大小为 1。
因此,上面的行在批处理轴(即轴 0)中拆分。
这就是错误显示
的原因
indices.shape=[1,11,1]
第二,scatter_nd 不是那样的。与 gather_nd 不同,它不支持 batch_dims。 'indices' 的最后一个轴值是唯一可以贡献 'update' 元素可以进入的地方。
例如,
tf.scatter_nd([[1,2],[1,3]],[1,1],shape=(2,10))
#<tf.Tensor: shape=(2, 10), dtype=int32,
#numpy=array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
# [0, 0, 1, 1, 0, 0, 0, 0, 0, 0]])>
所以在你的情况下,
N = tf.shape(y_pred)[0]
# Shape: [N,2]
ind = tf.argsort(y_pred,axis=-1,direction='ASCENDING',stable=False,name=None)[:,-2:]
# Shape: [N,2,1]
ind = ind[...,tf.newaxis]
# Dummy range to add index for batch axis
# Shape : [N,1,1]
r = tf.range(N)[:,tf.newaxis,tf.newaxis]
# Shape : [N,2,1]
r = tf.repeat(r,2,axis=1)
# Shape : [N,2,2]
ind = tf.concat([r,ind],axis=-1)
# Shape : [N,2]
updates = tf.ones((N,2))
y_pred1 = tf.scatter_nd(ind, updates, tf.shape(y_pred))
才是正确的使用方式tf.scatter_nd
然而,它看起来很脏。我宁愿推荐使用这种方式:
second_max = tf.sort(y_pred,axis=-1,direction='ASCENDING')[:,-2,tf.newaxis]
y_pred1 = tf.cast(y_pred>=second_max,tf.int32)
我正在使用自定义损失和评估指标训练 keras 模型。它在没有度量的情况下进行训练。但是当我尝试像这样训练时它给出了以下错误:
model.compile(optimizer= keras.optimizers.Adam(learning_rate = 1e-3), loss = inner_product, metrics=dice_index_metric)
model.fit([X_train], [y_train], epochs=50, batch_size = 1, validation_split=0.2,
callbacks = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5), verbose=2)
错误:
ValueError Traceback (most recent call last)
<ipython-input-38-270dbe25d468> in <module>
----> 1 hist = model.fit([X_train], [y_train1], epochs=50, batch_size = 1, validation_split=0.2,
2 callbacks = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5), verbose=2)
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
821 # This is the first call of __call__, so we have to initialize.
822 initializers = []
--> 823 self._initialize(args, kwds, add_initializers_to=initializers)
824 finally:
825 # At this point we know that the initialization is complete (or less
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
694 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
695 self._concrete_stateful_fn = (
--> 696 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
697 *args, **kwds))
698
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2853 args, kwargs = None, None
2854 with self._lock:
-> 2855 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2856 return graph_function
2857
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3211
3212 self._function_cache.missed.add(call_context_key)
-> 3213 graph_function = self._create_graph_function(args, kwargs)
3214 self._function_cache.primary[cache_key] = graph_function
3215 return graph_function, args, kwargs
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3063 arg_names = base_arg_names + missing_arg_names
3064 graph_function = ConcreteFunction(
-> 3065 func_graph_module.func_graph_from_py_func(
3066 self._name,
3067 self._python_function,
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
984 _, original_func = tf_decorator.unwrap(python_func)
985
--> 986 func_outputs = python_func(*func_args, **func_kwargs)
987
988 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
598 # __wrapped__ allows AutoGraph to swap in a converted function. We give
599 # the function a weak reference to itself to avoid a reference cycle.
--> 600 return weak_wrapped_fn().__wrapped__(*args, **kwds)
601 weak_wrapped_fn = weakref.ref(wrapped_fn)
602
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
971 except Exception as e: # pylint:disable=broad-except
972 if hasattr(e, "ag_error_metadata"):
--> 973 raise e.ag_error_metadata.to_exception(e)
974 else:
975 raise
ValueError: in user code:
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
<ipython-input-36-2c54f0983574>:5 dice_index_metric *
y_pred1 = tf.scatter_nd(ind, updates, tf.shape(y_pred))
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\ops\gen_array_ops.py:8855 scatter_nd **
_, _, _op, _outputs = _op_def_library._apply_op_helper(
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\op_def_library.py:742 _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\func_graph.py:591 _create_op_internal
return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:3477 _create_op_internal
ret = Operation(
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:1974 __init__
self._c_op = _create_c_op(self._graph, node_def, inputs,
C:\Users\haluk\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
raise ValueError(str(e))
ValueError: The outer 2 dimensions of indices.shape=[1,11,1] must match the outer 2 dimensions of updates.shape=[2]: Shapes must be equal rank, but are 2 and 1 for '{{node ScatterNd}} = ScatterNd[T=DT_INT32, Tindices=DT_INT32](strided_slice_2, Const_3, Shape_1)' with input shapes: [1,11,1], [2], [2].
自定义指标如下:
def dice_index_metric(y_true, y_pred):
ind = tf.argsort(y_pred,axis=-1,direction='ASCENDING',stable=False,name=None)[-2:]
ind = ind[..., tf.newaxis]
updates = tf.constant([1, 1])
y_pred1 = tf.scatter_nd(ind, updates, tf.shape(y_pred))
innerproduct = tf.minimum(y_true, y_pred1)
innerproduct = tf.reduce_sum(innerproduct)
union= tf.maximum(y_true, y_pred1)
union = tf.reduce_sum(union)
return innerproduct/union
自定义指标将预测向量转换为预测中前 2 个元素为 1,其他元素为 0 的向量,然后将其与真值进行比较并计算它们的(交集数)/(联合数)所以让我们说预测来自模型是:
pred = [0.01, 0.3, 0,4 0.01, 0.01, 0.2, 0.02, 0.05],
前 2 个值是 0.3 和 0.4,索引为 1、2。那么我应该推荐这个:
推荐 = [0, 1, 1, 0, 0, 0, 0, 0],
如果真值如下,则它们的交集仅为索引 2,而它们的并集为 [1,2,3],那么我应该 return 1/3。
真实 = [0, 0, 1, 1, 0, 0, 0, 0]
型号:
inputs = keras.Input(shape =(None,23))
features = layers.LSTM(100)(inputs)
next = layers.Dense(11, activation=activations.sigmoid)(features)
next = layers.Softmax()(next)
model = keras.Model(inputs=[inputs] , outputs=next, name="LSTMmodel2")
首先,来自您的代码
ind = tf.argsort(y_pred,axis=-1,direction='ASCENDING',stable=False,name=None)[-2:]
你忘了y_pred是批量处理的。这意味着 y_pred 的形状不是 [11,] 而是 [N,11],并且根据您的错误消息假设批量大小为 1。 因此,上面的行在批处理轴(即轴 0)中拆分。 这就是错误显示
的原因indices.shape=[1,11,1]
第二,scatter_nd 不是那样的。与 gather_nd 不同,它不支持 batch_dims。 'indices' 的最后一个轴值是唯一可以贡献 'update' 元素可以进入的地方。
例如,
tf.scatter_nd([[1,2],[1,3]],[1,1],shape=(2,10))
#<tf.Tensor: shape=(2, 10), dtype=int32,
#numpy=array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
# [0, 0, 1, 1, 0, 0, 0, 0, 0, 0]])>
所以在你的情况下,
N = tf.shape(y_pred)[0]
# Shape: [N,2]
ind = tf.argsort(y_pred,axis=-1,direction='ASCENDING',stable=False,name=None)[:,-2:]
# Shape: [N,2,1]
ind = ind[...,tf.newaxis]
# Dummy range to add index for batch axis
# Shape : [N,1,1]
r = tf.range(N)[:,tf.newaxis,tf.newaxis]
# Shape : [N,2,1]
r = tf.repeat(r,2,axis=1)
# Shape : [N,2,2]
ind = tf.concat([r,ind],axis=-1)
# Shape : [N,2]
updates = tf.ones((N,2))
y_pred1 = tf.scatter_nd(ind, updates, tf.shape(y_pred))
才是正确的使用方式tf.scatter_nd
然而,它看起来很脏。我宁愿推荐使用这种方式:
second_max = tf.sort(y_pred,axis=-1,direction='ASCENDING')[:,-2,tf.newaxis]
y_pred1 = tf.cast(y_pred>=second_max,tf.int32)