ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1024)

Question

我正在关注使用 YAMNet 进行迁移学习以进行环境声音分类教程。这是 link： https://www.tensorflow.org/tutorials/audio/transfer_learning_audio 在教程中，他们定义了一个具有一个隐藏层和两个输出的顺序模型，以根据声音识别猫和狗。

我如何use/add其他层，如 LSTM、BiLSTM？

现在，我想开发一个模型，使用 LSTM 对来自音频源的十种不同声音进行分类。

esc50_csv = './datasets/ESC-50-master/meta/esc50.csv'
base_data_path = './datasets/ESC-50-master/audio/'
my_classes = ['airplane', 'breathing']
map_class_to_id = {'airplane':0, 'breathing':1}
filtered_pd = pd_data[pd_data.category.isin(my_classes)]
class_id = filtered_pd['category'].apply(lambdaname: 
        map_class_to_id[name])
filtered_pd = filtered_pd.assign(target=class_id)
full_path = filtered_pd['filename'].apply(lambda row: 
        os.path.join(base_data_path, row))
filtered_pd = filtered_pd.assign(filename=full_path)
filenames = filtered_pd['filename']
targets = filtered_pd['target']
folds = filtered_pd['fold']
main_ds = tf.data.Dataset.from_tensor_slices((filenames, targets, 
        folds))
main_ds.element_spec
def load_wav_for_map(filename, label, fold):
        return load_wav_16k_mono(filename), label, fold
main_ds = main_ds.map(load_wav_for_map)
main_ds.element_spec

# applies the embedding extraction model to a wav data
def extract_embedding(wav_data, label, fold):
         scores, embeddings, spectrogram = yamnet_model(wav_data)
         num_embeddings = tf.shape(embeddings)[0]
         return (embeddings, tf.repeat(label, num_embeddings),
                 tf.repeat(fold, num_embeddings))
# extract embedding
main_ds = main_ds.map(extract_embedding).unbatch()
main_ds.element_spec

cached_ds = main_ds.cache()
train_ds = cached_ds.filter(lambda embedding, label, fold: fold<4)
val_ds = cached_ds.filter(lambda embedding, label, fold: fold == 4)
test_ds = cached_ds.filter(lambda embedding, label, fold: fold == 5)

# remove the folds column now that it's not needed anymore
remove_fold_column = lambda embedding, label, fold: (embedding,label)
train_ds = train_ds.map(remove_fold_column)
val_ds = val_ds.map(remove_fold_column)
test_ds = test_ds.map(remove_fold_column)

train_ds = 
  train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

val_ds = val_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

model = tf.keras.Sequential()
model.add(LSTM(32, input_shape=(1024, 16)))
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dense(len(my_classes)))
model.summary()
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(       
   from_logits=True), optimizer="adam", metrics=['accuracy'])
callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                        patience=3,
                                        restore_best_weights=True)
history = model.fit(train_ds, epochs=150, validation_data=val_ds, 
                                       callbacks=callback)

我收到以下错误：

   Epoch 1/150
WARNING:tensorflow:Model was constructed with shape (None, 1024, 16) for input KerasTensor(type_spec=TensorSpec(shape=(None, 1024, 16), dtype=tf.float32, name='lstm_input'), name='lstm_input', description="created by layer 'lstm_input'"), but it was called on an input with incompatible shape (None, 1024).
WARNING:tensorflow:Model was constructed with shape (None, 1024, 16) for input KerasTensor(type_spec=TensorSpec(shape=(None, 1024, 16), dtype=tf.float32, name='lstm_input'), name='lstm_input', description="created by layer 'lstm_input'"), but it was called on an input with incompatible shape (None, 1024).
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-d976cb77f840> in <module>()
      7                                             restore_best_weights=True)
      8 
----> 9 history = model.fit(train_ds, epochs=150, callbacks=callback)

1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
   1145           except Exception as e:  # pylint:disable=broad-except
   1146             if hasattr(e, "ag_error_metadata"):
-> 1147               raise e.ag_error_metadata.to_exception(e)
   1148             else:
   1149               raise

ValueError: in user code:

    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py", line 214, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" '

    ValueError: Exception encountered when calling layer "sequential_1" (type Sequential).
    
    Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1024)
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(None, 1024), dtype=float32)
      • training=True
      • mask=None

我该如何解决这个错误？

Answer 1

stacktrace 中指出的第 2 行缺少元组的第二部分（维度）：

      1 model = tf.keras.Sequential()
----> 2 model.add(LSTM(32, input_shape=(1024, )))
      3 model.add(tf.keras.layers.Dense(512, activation='relu'))

我假设它应该有一个数字而不是空的，例如：

model.add(LSTM(32, input_shape=(1024, 16)))

Answer 2

这很简单，但您需要了解您的输入模型，我读到您的目标是 TESTNET 模型，请参阅附件 link：restnet 当您使用他们拥有的图像或波浪时属性，然后您可以拥有模板并对其进行修改。

[样本]:

import os
from os.path import exists

import tensorflow as tf
import matplotlib.pyplot as plt

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 1
IMG_SIZE = (32, 32)

PATH = 'F:\datasets\downloads\cats_name\'
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
PATH = os.path.join('F:\datasets\downloads\cats_name\train\Symbols', '*.jpg')
PATH_2 = os.path.join('F:\datasets\downloads\cats_name\train\Term', '*.jpg')
files = tf.data.Dataset.list_files(PATH)
files_2 = tf.data.Dataset.list_files(PATH_2)

list_file = []
list_file_actual = []
list_label = []
list_label_actual = [ 'Symbols', 'Symbols', 'Symbols', 'Symbols', 'Symbols', 'Term', 'Term', 'Term', 'Term', 'Term' ]
for file in files.take(5):
    image = tf.keras.utils.load_img( file.numpy(), grayscale=False, interpolation='nearest')
    list_file_actual.append(image)
    image = tf.image.resize(image, [32,32], method='nearest')
    list_file.append(image)
    list_label.append(1)

for file in files_2.take(5):
    image = tf.keras.utils.load_img( file.numpy(), grayscale=False, interpolation='nearest')
    list_file_actual.append(image)
    image = tf.image.resize(image, [32,32], method='nearest')
    list_file.append(image)
    list_label.append(9)

list_file = tf.cast( list_file, dtype=tf.int64 )
list_label = tf.cast( list_label, dtype=tf.int64 )
list_file = tf.constant( list_file, shape=(9, 1, 32, 32, 3) )
list_label = tf.constant( list_label, shape=(9, 1, 1, 1) )

dataset = tf.data.Dataset.from_tensor_slices(( list_file, list_label ))

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 32, 32, 3 )),
    
    tf.keras.layers.ConvLSTM1D( 4, 1, padding='valid', activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(192, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
])

model.summary()

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
    learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Nadam'
)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""                               
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=False,
    reduction=tf.keras.losses.Reduction.AUTO,
    name='sparse_categorical_crossentropy'
)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit( dataset, batch_size=1, epochs=40 )

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Prediction
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
PATH = os.path.join('F:\datasets\downloads\cats_name\train\Symbols', '*.jpg')
PATH_2 = os.path.join('F:\datasets\downloads\cats_name\train\Term', '*.jpg')
files = tf.data.Dataset.list_files(PATH)
files_2 = tf.data.Dataset.list_files(PATH_2)

list_file = []
list_file_actual = []
list_label = []
list_label_actual = [ 'Symbols', 'Symbols', 'Symbols', 'Symbols', 'Symbols', 'Term', 'Term', 'Term', 'Term', 'Term' ]

for file in files.take(5):
    image = tf.keras.utils.load_img( file.numpy(), grayscale=False, interpolation='nearest')
    list_file_actual.append(image)
    image = tf.image.resize(image, [32,32], method='nearest')
    list_file.append(image)
    list_label.append(1)
    
for file in files_2.take(5):
    image = tf.keras.utils.load_img( file.numpy(), grayscale=False, interpolation='nearest')
    list_file_actual.append(image)
    image = tf.image.resize(image, [32,32], method='nearest')
    list_file.append(image)
    list_label.append(9)

plt.figure(figsize=(5,2))
plt.title("Cats recognitions")
for i in range(len(list_file)):
    img = tf.keras.preprocessing.image.array_to_img(
        list_file[i],
        data_format=None,
        scale=True
    )
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)
    predictions = model.predict(img_array)
    score = tf.nn.softmax(predictions[0])
    plt.subplot(5, 2, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(list_file_actual[i])
    plt.xlabel(str(round(score[tf.math.argmax(score).numpy()].numpy(), 2)) + ":" +  str(list_label_actual[tf.math.argmax(score)]))
    
plt.show()

input('...')

[输出]:

ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1024)

ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1024)

python

sequence

lstm

keras

tensorflow