Tensorflow Keras - 堆叠 LSTM 层时出错

Tensorflow Keras - Error while stacking LSTM layers

我有以下图层序列。在混合中添加额外的 LSTM 会产生以下我无法真正理解的错误。

我在 Linux Ubuntu x64
上使用 python 3.7.3 海湾合作委员会 7.4.0
tensorflow-gpu='2.0.0'

print(x_train_uni.shape) # (299980, 20, 1)
simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.LSTM(16),  
    tf.keras.layers.LSTM(8),
    tf.keras.layers.Dense(1, activation='tanh')
])

simple_lstm_model.compile(optimizer='adam', loss='mae')

产生:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-32-ba40f416ca84> in <module>
      6     tf.keras.layers.LSTM(16),
      7     tf.keras.layers.LSTM(8),
----> 8     tf.keras.layers.Dense(1, activation='tanh')
      9 ])
     10 

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in __init__(self, layers, name)
    112       tf_utils.assert_no_legacy_layers(layers)
    113       for layer in layers:
--> 114         self.add(layer)
    115 
    116   @property

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
    194       # If the model is being built continuously on top of an input layer:
    195       # refresh its output.
--> 196       output_tensor = layer(self.outputs[0])
    197       if len(nest.flatten(output_tensor)) != 1:
    198         raise TypeError('All layers in a Sequential model '

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
    621 
    622     if initial_state is None and constants is None:
--> 623       return super(RNN, self).__call__(inputs, **kwargs)
    624 
    625     # If any of `initial_state` or `constants` are specified and are Keras

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    810         # are casted, not before.
    811         input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 812                                               self.name)
    813         graph = backend.get_graph()
    814         with graph.as_default(), backend.name_scope(self._name_scope()):

~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
    175                          'expected ndim=' + str(spec.ndim) + ', found ndim=' +
    176                          str(ndim) + '. Full shape received: ' +
--> 177                          str(x.shape.as_list()))
    178     if spec.max_ndim is not None:
    179       ndim = x.shape.ndims

ValueError: Input 0 of layer lstm_19 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 128]

但是,如果我像这样更改模型,它确实可以工作。

simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
#     tf.keras.layers.LSTM(64),
#     tf.keras.layers.LSTM(32),
#     tf.keras.layers.Dropout(0.25),
#     tf.keras.layers.LSTM(16),  
#     tf.keras.layers.LSTM(8),
    tf.keras.layers.Dense(1, activation='tanh')
])

simple_lstm_model.compile(optimizer='adam', loss='mae')

我错过了什么?为什么两个或多个 LSTM 层不能一层层堆叠?

LSTM 层需要作为输入序列。但是,Keras 中的默认设置是 return 最终标量。

因此,提议的架构中的第二个 LSTM 是用标量而不是所需的序列提供的。

解决方案是使用 return_sequences=True 标志(参见 LSTM arguments in docs):

import tensorflow as tf

x_train_uni = tf.zeros((100, 20, 1))

simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:], return_sequences=True),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32, return_sequences=True),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.LSTM(16, return_sequences=True),  
    tf.keras.layers.LSTM(8),
    tf.keras.layers.Dense(1, activation='tanh')
])

simple_lstm_model.compile(optimizer='adam', loss='mae')