ValueError: Shapes are incompatible in Tensorflow LSTM using RandomizedSearchCV

ValueError: Shapes are incompatible in Tensorflow LSTM using RandomizedSearchCV

问题

给定几个输入特征(序列)我想训练一个 LSTM RNN 同时对多个特征进行(单热编码)多class 预测。也就是说,我的输入 X 的形状为 (observations, sequence length, features_in),而输出 y 的形状为 (observations, class_prediction, features_out)。此特定设置要求最后一层的 softmax 激活函数仅沿一个轴应用(afaik),这可以通过使用 Lambda 层来实现。我想要的输出类似于同一线程中描述的 here, but with the difference that the individual "softmax arrays" are organized as another dimension. Actually, in my case, I'm using this answer

现在,训练这个模型似乎按预期工作(至少没有抛出错误)。然而,当使用 RandomizedSearchCV 查找超参数时,会出现以下错误:

ValueError: in user code:

    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:788 train_step
        loss = self.compiled_loss(
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/compile_utils.py:201 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:141 __call__
        losses = call_fn(y_true, y_pred)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:245 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:1665 categorical_crossentropy
        return backend.categorical_crossentropy(
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/backend.py:4839 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 3, 9, 2) and (None, 3, 9) are incompatible

MWE

注意:CategoricalCrossentropy 中的 axis 参数仅在 Tensorflow 版本 >= 2.6 中添加。也就是说,您将无法 运行 使用 TF v2.5 或更低版本的 MWE。

import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.utils import to_categorical


def build_model(input_shape, n_neurons, output_shape):
    inp = Input(shape=input_shape)
    x = LSTM(
        n_neurons,
        return_sequences=False,
        input_shape=[None, input_shape[1]],
    )(inp)
    x = Dense(np.product(output_shape))(x)
    x = Reshape(output_shape)(x)
    out = Lambda(lambda x: softmax(x, axis=1))(x)
    model = Model(inp, out)

    optimizer = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
    model.compile(
        loss=loss,
        optimizer=optimizer,
    )
    return model


n_observations = 100
sequence_length = 5
n_features_in = 7

n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)

X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)

X.shape  # (observations, sequence_length, features_in) -> (100, 5, 7)
y.shape  # (observations, target classes, features_out) -> (100, 3, 9)

# Fit model (this works!)
# -----------------------------------------------------------------------------
model = build_model(X.shape[1:], 10, y.shape[1:])
_ = model.fit(X, y, epochs=4)

# Randomized Search CV (ERROR!)
# -----------------------------------------------------------------------------
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
param_distribs = {
    'n_neurons'     : np.arange(1, 1000),
    'input_shape'   : [X.shape[1:]],
    'output_shape'  : [y.shape[1:]],
}

grid_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=2)
grid_search_cv.fit(X, y, epochs=4)

如评论中所述,您可以使用 keras-tuner:

轻松解决此问题
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt

def build_model(hp):
    inp = Input(shape=(5, 7))
    x = LSTM(
        hp.Int('n_neurons',min_value=1,max_value=1000, step=1),return_sequences=False,input_shape=[None, 7])(inp)
    x = Dense(np.product((3, 9)))(x)
    x = Reshape((3, 9))(x)
    out = Lambda(lambda x: softmax(x, axis=1))(x)
    model = Model(inp, out)
    optimizer = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
    model.compile(loss=loss,optimizer=optimizer)
    return model

n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)

tuner = kt.RandomSearch(
    build_model,
    objective='loss',
    max_trials=5)
tuner.search(X, y, epochs=5)
best_model = tuner.get_best_models()[0]

您可以使用 HyperParameters.Int 来定义您要测试的范围。