ValueError: Shapes are incompatible in Tensorflow LSTM using RandomizedSearchCV
ValueError: Shapes are incompatible in Tensorflow LSTM using RandomizedSearchCV
问题
给定几个输入特征(序列)我想训练一个 LSTM RNN 同时对多个特征进行(单热编码)多class 预测。也就是说,我的输入 X
的形状为 (observations, sequence length, features_in)
,而输出 y
的形状为 (observations, class_prediction, features_out)
。此特定设置要求最后一层的 softmax 激活函数仅沿一个轴应用(afaik),这可以通过使用 Lambda
层来实现。我想要的输出类似于同一线程中描述的 here, but with the difference that the individual "softmax arrays" are organized as another dimension. Actually, in my case, I'm using this answer。
现在,训练这个模型似乎按预期工作(至少没有抛出错误)。然而,当使用 RandomizedSearchCV
查找超参数时,会出现以下错误:
ValueError: in user code:
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:788 train_step
loss = self.compiled_loss(
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:1665 categorical_crossentropy
return backend.categorical_crossentropy(
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 3, 9, 2) and (None, 3, 9) are incompatible
MWE
注意:CategoricalCrossentropy
中的 axis
参数仅在 Tensorflow 版本 >= 2.6 中添加。也就是说,您将无法 运行 使用 TF v2.5 或更低版本的 MWE。
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.utils import to_categorical
def build_model(input_shape, n_neurons, output_shape):
inp = Input(shape=input_shape)
x = LSTM(
n_neurons,
return_sequences=False,
input_shape=[None, input_shape[1]],
)(inp)
x = Dense(np.product(output_shape))(x)
x = Reshape(output_shape)(x)
out = Lambda(lambda x: softmax(x, axis=1))(x)
model = Model(inp, out)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
model.compile(
loss=loss,
optimizer=optimizer,
)
return model
n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)
X.shape # (observations, sequence_length, features_in) -> (100, 5, 7)
y.shape # (observations, target classes, features_out) -> (100, 3, 9)
# Fit model (this works!)
# -----------------------------------------------------------------------------
model = build_model(X.shape[1:], 10, y.shape[1:])
_ = model.fit(X, y, epochs=4)
# Randomized Search CV (ERROR!)
# -----------------------------------------------------------------------------
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
param_distribs = {
'n_neurons' : np.arange(1, 1000),
'input_shape' : [X.shape[1:]],
'output_shape' : [y.shape[1:]],
}
grid_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=2)
grid_search_cv.fit(X, y, epochs=4)
如评论中所述,您可以使用 keras-tuner
:
轻松解决此问题
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt
def build_model(hp):
inp = Input(shape=(5, 7))
x = LSTM(
hp.Int('n_neurons',min_value=1,max_value=1000, step=1),return_sequences=False,input_shape=[None, 7])(inp)
x = Dense(np.product((3, 9)))(x)
x = Reshape((3, 9))(x)
out = Lambda(lambda x: softmax(x, axis=1))(x)
model = Model(inp, out)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
model.compile(loss=loss,optimizer=optimizer)
return model
n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)
tuner = kt.RandomSearch(
build_model,
objective='loss',
max_trials=5)
tuner.search(X, y, epochs=5)
best_model = tuner.get_best_models()[0]
您可以使用 HyperParameters.Int
来定义您要测试的范围。
问题
给定几个输入特征(序列)我想训练一个 LSTM RNN 同时对多个特征进行(单热编码)多class 预测。也就是说,我的输入 X
的形状为 (observations, sequence length, features_in)
,而输出 y
的形状为 (observations, class_prediction, features_out)
。此特定设置要求最后一层的 softmax 激活函数仅沿一个轴应用(afaik),这可以通过使用 Lambda
层来实现。我想要的输出类似于同一线程中描述的 here, but with the difference that the individual "softmax arrays" are organized as another dimension. Actually, in my case, I'm using this answer。
现在,训练这个模型似乎按预期工作(至少没有抛出错误)。然而,当使用 RandomizedSearchCV
查找超参数时,会出现以下错误:
ValueError: in user code:
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/training.py:788 train_step
loss = self.compiled_loss(
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/engine/compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:141 __call__
losses = call_fn(y_true, y_pred)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/losses.py:1665 categorical_crossentropy
return backend.categorical_crossentropy(
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/keras/backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/home/nrieger/anaconda3/envs/tf26/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 3, 9, 2) and (None, 3, 9) are incompatible
MWE
注意:CategoricalCrossentropy
中的 axis
参数仅在 Tensorflow 版本 >= 2.6 中添加。也就是说,您将无法 运行 使用 TF v2.5 或更低版本的 MWE。
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.utils import to_categorical
def build_model(input_shape, n_neurons, output_shape):
inp = Input(shape=input_shape)
x = LSTM(
n_neurons,
return_sequences=False,
input_shape=[None, input_shape[1]],
)(inp)
x = Dense(np.product(output_shape))(x)
x = Reshape(output_shape)(x)
out = Lambda(lambda x: softmax(x, axis=1))(x)
model = Model(inp, out)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
model.compile(
loss=loss,
optimizer=optimizer,
)
return model
n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)
X.shape # (observations, sequence_length, features_in) -> (100, 5, 7)
y.shape # (observations, target classes, features_out) -> (100, 3, 9)
# Fit model (this works!)
# -----------------------------------------------------------------------------
model = build_model(X.shape[1:], 10, y.shape[1:])
_ = model.fit(X, y, epochs=4)
# Randomized Search CV (ERROR!)
# -----------------------------------------------------------------------------
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
param_distribs = {
'n_neurons' : np.arange(1, 1000),
'input_shape' : [X.shape[1:]],
'output_shape' : [y.shape[1:]],
}
grid_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=2)
grid_search_cv.fit(X, y, epochs=4)
如评论中所述,您可以使用 keras-tuner
:
import numpy as np
import tensorflow as tf
from numpy.random import standard_normal, choice
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Reshape, Lambda
from tensorflow.keras.activations import softmax
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt
def build_model(hp):
inp = Input(shape=(5, 7))
x = LSTM(
hp.Int('n_neurons',min_value=1,max_value=1000, step=1),return_sequences=False,input_shape=[None, 7])(inp)
x = Dense(np.product((3, 9)))(x)
x = Reshape((3, 9))(x)
out = Lambda(lambda x: softmax(x, axis=1))(x)
model = Model(inp, out)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.CategoricalCrossentropy(axis=1)
model.compile(loss=loss,optimizer=optimizer)
return model
n_observations = 100
sequence_length = 5
n_features_in = 7
n_classes = 3
target_classes = [0, 1, 2] * 3
n_features_out = len(target_classes)
X = standard_normal((n_observations, sequence_length, n_features_in))
y = [to_categorical(choice(target_classes, n_features_out, replace=False)).T for i in range(n_observations)]
y = np.stack(y)
tuner = kt.RandomSearch(
build_model,
objective='loss',
max_trials=5)
tuner.search(X, y, epochs=5)
best_model = tuner.get_best_models()[0]
您可以使用 HyperParameters.Int
来定义您要测试的范围。