训练 seq2seq 模型时出现 InvalidArgumentError
InvalidArgumentError when training a seq2seq model
我正在尝试对单词使用序列到序列模型,但在训练期间我不断收到无效参数错误。我不知道我在这里做错了什么。请帮帮我。
这是重现我遇到的错误的示例代码。
我正在使用,
tensorflow 2.0.0,cudatoolkit 10.0.130,cudnn 7.6.4
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
x = ['this is really good',
'i am feeling better',
'yesterday was a bad day',
'today is better']
y = ['<sos> Ceci est vraiment bon <eos>',
'<sos> je me sens mieux <eos>',
'<sos> hier était une mauvaise journée <eos>',
"<sos> aujourd'hui c`est mieux <eos>"]
x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))
MAX_LEN_X = 5
MAX_LEN_Y = 7
encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')
for i, j in enumerate(x):
for k, l in enumerate(j.split()):
encoder_input[i, k] = x_dict[l]
decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')
for i, j in enumerate(y):
for k, l in enumerate(j.split()):
decoder_input[i, k] = y_dict[l]
if k > 0:
decoder_output[i, k - 1, y_dict[l]] = 1.
latent_dim = 30
INPUT_VOCAB = len(x_dict)
TARGET_VOCAB = len(y_dict) + 1
# Encoder
encoder_inputs = Input(shape=(None,))
encoder_emb = Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]
# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()
这是我得到的错误的完整回溯:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-2-f704c3b2a0b8> in <module>
2 epochs=10,
3 verbose=1,
----> 4 shuffle=True)
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
518 # Lifting succeeded, so variables are initialized and we can run the
519 # stateless function.
--> 520 return self._stateless_fn(*args, **kwds)
521 else:
522 canon_args, canon_kwds = \
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
1821 """Calls a graph function specialized to the inputs."""
1822 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1823 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
1824
1825 @property
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
1139 if isinstance(t, (ops.Tensor,
1140 resource_variable_ops.BaseResourceVariable))),
-> 1141 self.captured_inputs)
1142
1143 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1222 if executing_eagerly:
1223 flat_outputs = forward_function.call(
-> 1224 ctx, args, cancellation_manager=cancellation_manager)
1225 else:
1226 gradient_name = self._delayed_rewrite_functions.register()
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
509 inputs=args,
510 attrs=("executor_type", executor_type, "config_proto", config),
--> 511 ctx=ctx)
512 else:
513 outputs = execute.execute_with_cancellation(
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~/miniconda3/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: indices[3,0] = 14 is not in [0, 14)
[[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
[[loss/dense_loss/weighted_loss/broadcast_weights/assert_broadcastable/AssertGuard/else/_13/Assert/data_2/_92]]
(1) Invalid argument: indices[3,0] = 14 is not in [0, 14)
[[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_16237]
Function call stack:
distributed_function -> distributed_function
您的代码中的错误很少。
词汇表大小(编码器和解码器)
如果您执行 print(x_dict)
,您将看到您的词汇表从 1 开始并增加到某个值(假设为 n
)。现在您将 INPUT_VOCAB
设置为 len(x_dict)
。这会使您的 Embedding
图层缺少一行来表示您词汇表中的最后一个单词。因此,每当您的模型遇到最后一个词时,您都会收到 embedding_look_up
类型错误。所以你需要设置INPUT_SIZE=len(x_dict)+1
在你的输出形状中有两个 None
这是我个人一直试图避免的事情。留下你的batch dimensionNone
也没问题。但是在你的输出形状中有多个 None
是危险的。例如 TensorFlow/Keras 有时会分层重塑。如果你有多个 None
你就不能恢复 Tensor 的原始形状(或者甚至可能不允许执行重塑)。无论哪种方式,这都不是最佳做法。所以我在你的 Input
形状中设置了序列长度。
所以在更改之后你的代码看起来像这样。
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
x = ['this is really good',
'i am feeling better',
'yesterday was a bad day',
'today is better']
y = ['<sos> Ceci est vraiment bon <eos>',
'<sos> je me sens mieux <eos>',
'<sos> hier était une mauvaise journée <eos>',
"<sos> aujourd'hui c`est mieux <eos>"]
x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))
MAX_LEN_X = 5
MAX_LEN_Y = 7
encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')
for i, j in enumerate(x):
for k, l in enumerate(j.split()):
encoder_input[i, k] = x_dict[l]
decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')
for i, j in enumerate(y):
for k, l in enumerate(j.split()):
decoder_input[i, k] = y_dict[l]
if k > 0:
decoder_output[i, k - 1, y_dict[l]] = 1.
latent_dim = 30
INPUT_VOCAB = len(x_dict) + 1
TARGET_VOCAB = len(y_dict) + 1
print(MAX_LEN_X, MAX_LEN_Y)
# Encoder
encoder_inputs = Input(shape=(MAX_LEN_X,))
encoder_emb = Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]
# Decoder
decoder_inputs = Input(shape=(MAX_LEN_Y,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()
我正在尝试对单词使用序列到序列模型,但在训练期间我不断收到无效参数错误。我不知道我在这里做错了什么。请帮帮我。
这是重现我遇到的错误的示例代码。
我正在使用, tensorflow 2.0.0,cudatoolkit 10.0.130,cudnn 7.6.4
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
x = ['this is really good',
'i am feeling better',
'yesterday was a bad day',
'today is better']
y = ['<sos> Ceci est vraiment bon <eos>',
'<sos> je me sens mieux <eos>',
'<sos> hier était une mauvaise journée <eos>',
"<sos> aujourd'hui c`est mieux <eos>"]
x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))
MAX_LEN_X = 5
MAX_LEN_Y = 7
encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')
for i, j in enumerate(x):
for k, l in enumerate(j.split()):
encoder_input[i, k] = x_dict[l]
decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')
for i, j in enumerate(y):
for k, l in enumerate(j.split()):
decoder_input[i, k] = y_dict[l]
if k > 0:
decoder_output[i, k - 1, y_dict[l]] = 1.
latent_dim = 30
INPUT_VOCAB = len(x_dict)
TARGET_VOCAB = len(y_dict) + 1
# Encoder
encoder_inputs = Input(shape=(None,))
encoder_emb = Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]
# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()
这是我得到的错误的完整回溯:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-2-f704c3b2a0b8> in <module>
2 epochs=10,
3 verbose=1,
----> 4 shuffle=True)
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
518 # Lifting succeeded, so variables are initialized and we can run the
519 # stateless function.
--> 520 return self._stateless_fn(*args, **kwds)
521 else:
522 canon_args, canon_kwds = \
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
1821 """Calls a graph function specialized to the inputs."""
1822 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1823 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
1824
1825 @property
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
1139 if isinstance(t, (ops.Tensor,
1140 resource_variable_ops.BaseResourceVariable))),
-> 1141 self.captured_inputs)
1142
1143 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1222 if executing_eagerly:
1223 flat_outputs = forward_function.call(
-> 1224 ctx, args, cancellation_manager=cancellation_manager)
1225 else:
1226 gradient_name = self._delayed_rewrite_functions.register()
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
509 inputs=args,
510 attrs=("executor_type", executor_type, "config_proto", config),
--> 511 ctx=ctx)
512 else:
513 outputs = execute.execute_with_cancellation(
~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~/miniconda3/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: indices[3,0] = 14 is not in [0, 14)
[[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
[[loss/dense_loss/weighted_loss/broadcast_weights/assert_broadcastable/AssertGuard/else/_13/Assert/data_2/_92]]
(1) Invalid argument: indices[3,0] = 14 is not in [0, 14)
[[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_16237]
Function call stack:
distributed_function -> distributed_function
您的代码中的错误很少。
词汇表大小(编码器和解码器)
如果您执行 print(x_dict)
,您将看到您的词汇表从 1 开始并增加到某个值(假设为 n
)。现在您将 INPUT_VOCAB
设置为 len(x_dict)
。这会使您的 Embedding
图层缺少一行来表示您词汇表中的最后一个单词。因此,每当您的模型遇到最后一个词时,您都会收到 embedding_look_up
类型错误。所以你需要设置INPUT_SIZE=len(x_dict)+1
在你的输出形状中有两个 None
这是我个人一直试图避免的事情。留下你的batch dimensionNone
也没问题。但是在你的输出形状中有多个 None
是危险的。例如 TensorFlow/Keras 有时会分层重塑。如果你有多个 None
你就不能恢复 Tensor 的原始形状(或者甚至可能不允许执行重塑)。无论哪种方式,这都不是最佳做法。所以我在你的 Input
形状中设置了序列长度。
所以在更改之后你的代码看起来像这样。
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
x = ['this is really good',
'i am feeling better',
'yesterday was a bad day',
'today is better']
y = ['<sos> Ceci est vraiment bon <eos>',
'<sos> je me sens mieux <eos>',
'<sos> hier était une mauvaise journée <eos>',
"<sos> aujourd'hui c`est mieux <eos>"]
x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))
MAX_LEN_X = 5
MAX_LEN_Y = 7
encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')
for i, j in enumerate(x):
for k, l in enumerate(j.split()):
encoder_input[i, k] = x_dict[l]
decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')
for i, j in enumerate(y):
for k, l in enumerate(j.split()):
decoder_input[i, k] = y_dict[l]
if k > 0:
decoder_output[i, k - 1, y_dict[l]] = 1.
latent_dim = 30
INPUT_VOCAB = len(x_dict) + 1
TARGET_VOCAB = len(y_dict) + 1
print(MAX_LEN_X, MAX_LEN_Y)
# Encoder
encoder_inputs = Input(shape=(MAX_LEN_X,))
encoder_emb = Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]
# Decoder
decoder_inputs = Input(shape=(MAX_LEN_Y,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()