训练 seq2seq 模型时出现 InvalidArgumentError

Question

我正在尝试对单词使用序列到序列模型，但在训练期间我不断收到无效参数错误。我不知道我在这里做错了什么。请帮帮我。

这是重现我遇到的错误的示例代码。

我正在使用， tensorflow 2.0.0，cudatoolkit 10.0.130，cudnn 7.6.4

import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

x = ['this is really good',
 'i am feeling better',
 'yesterday was a bad day',
 'today is better']

y = ['<sos> Ceci est vraiment bon <eos>',
     '<sos> je me sens mieux <eos>',
     '<sos> hier était une mauvaise journée <eos>',
     "<sos> aujourd'hui c`est mieux <eos>"]

x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))

MAX_LEN_X = 5
MAX_LEN_Y = 7

encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')

for i, j in enumerate(x):
    for k, l in enumerate(j.split()):
        encoder_input[i, k] = x_dict[l]

decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')

for i, j in enumerate(y):
    for k, l in enumerate(j.split()):
        decoder_input[i, k] = y_dict[l]
        if k > 0:
            decoder_output[i, k - 1, y_dict[l]] = 1.

latent_dim = 30
INPUT_VOCAB = len(x_dict)
TARGET_VOCAB = len(y_dict) + 1

# Encoder
encoder_inputs = Input(shape=(None,))
encoder_emb =  Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs  = Input(shape=(None,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()

这是我得到的错误的完整回溯：

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-2-f704c3b2a0b8> in <module>
      2            epochs=10,
      3            verbose=1,
----> 4            shuffle=True)

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    726         max_queue_size=max_queue_size,
    727         workers=workers,
--> 728         use_multiprocessing=use_multiprocessing)
    729 
    730   def evaluate(self,

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    455 
    456     tracing_count = self._get_tracing_count()
--> 457     result = self._call(*args, **kwds)
    458     if tracing_count == self._get_tracing_count():
    459       self._call_counter.called_without_tracing()

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
    518         # Lifting succeeded, so variables are initialized and we can run the
    519         # stateless function.
--> 520         return self._stateless_fn(*args, **kwds)
    521     else:
    522       canon_args, canon_kwds = \

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1821     """Calls a graph function specialized to the inputs."""
   1822     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1823     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1824 
   1825   @property

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

~/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     65     else:
     66       message = e.message
---> 67     six.raise_from(core._status_to_exception(e.code, message), None)
     68   except TypeError as e:
     69     keras_symbolic_tensors = [

~/miniconda3/lib/python3.7/site-packages/six.py in raise_from(value, from_value)

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  indices[3,0] = 14 is not in [0, 14)
     [[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
     [[loss/dense_loss/weighted_loss/broadcast_weights/assert_broadcastable/AssertGuard/else/_13/Assert/data_2/_92]]
  (1) Invalid argument:  indices[3,0] = 14 is not in [0, 14)
     [[node model/embedding/embedding_lookup (defined at /home/user83/miniconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_16237]

Function call stack:
distributed_function -> distributed_function

Answer 1

您的代码中的错误很少。

词汇表大小（编码器和解码器）

如果您执行 print(x_dict)，您将看到您的词汇表从 1 开始并增加到某个值（假设为 n）。现在您将 INPUT_VOCAB 设置为 len(x_dict)。这会使您的 Embedding 图层缺少一行来表示您词汇表中的最后一个单词。因此，每当您的模型遇到最后一个词时，您都会收到 embedding_look_up 类型错误。所以你需要设置INPUT_SIZE=len(x_dict)+1

在你的输出形状中有两个 `None`

这是我个人一直试图避免的事情。留下你的batch dimensionNone也没问题。但是在你的输出形状中有多个 None 是危险的。例如 TensorFlow/Keras 有时会分层重塑。如果你有多个 None 你就不能恢复 Tensor 的原始形状（或者甚至可能不允许执行重塑）。无论哪种方式，这都不是最佳做法。所以我在你的 Input 形状中设置了序列长度。

所以在更改之后你的代码看起来像这样。

import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, LSTM, Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

x = ['this is really good',
 'i am feeling better',
 'yesterday was a bad day',
 'today is better']

y = ['<sos> Ceci est vraiment bon <eos>',
     '<sos> je me sens mieux <eos>',
     '<sos> hier était une mauvaise journée <eos>',
     "<sos> aujourd'hui c`est mieux <eos>"]

x_dict = dict(zip(np.unique(np.hstack([i.split() for i in x])), range(1, 16)))
y_dict = dict(zip(np.unique(np.hstack([i.split() for i in y])), range(1, 18)))

MAX_LEN_X = 5
MAX_LEN_Y = 7

encoder_input = np.zeros((4, MAX_LEN_X), dtype='float32')

for i, j in enumerate(x):
    for k, l in enumerate(j.split()):
        encoder_input[i, k] = x_dict[l]

decoder_input = np.zeros((4, MAX_LEN_Y), dtype='float32')
decoder_output = np.zeros((4, MAX_LEN_Y, len(y_dict)+1), dtype='float32')

for i, j in enumerate(y):
    for k, l in enumerate(j.split()):
        decoder_input[i, k] = y_dict[l]
        if k > 0:
            decoder_output[i, k - 1, y_dict[l]] = 1.

latent_dim = 30
INPUT_VOCAB = len(x_dict) + 1
TARGET_VOCAB = len(y_dict) + 1

print(MAX_LEN_X, MAX_LEN_Y)
# Encoder
encoder_inputs = Input(shape=(MAX_LEN_X,))
encoder_emb =  Embedding(INPUT_VOCAB, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs  = Input(shape=(MAX_LEN_Y,))
dec_emb_layer = Embedding(TARGET_VOCAB, latent_dim, mask_zero=True)
decoder_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_emb, initial_state=encoder_states)
decoder_dense = Dense(TARGET_VOCAB, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()

训练 seq2seq 模型时出现 InvalidArgumentError

InvalidArgumentError when training a seq2seq model

seq2seq

词汇表大小（编码器和解码器）

在你的输出形状中有两个 None

在你的输出形状中有两个 `None`