无法对接收多个输入的模型执行网格搜索 (Keras)
Unable to perform Grid Search for models receiving more than one input (Keras)
我创建了 final_model
来接收两个输入(长度为 8 的序列)。它们中的每一个都由两个不同的模型处理,model_A
和 model_B
。然后合并两个输出,返回 model_C
的输入,最终 returns 整个模型的输出。
这是图形概览
这是代码:
model_A = models.Sequential()
model_A.add(layers.Dense(16, activation='relu',input_shape=(n,)))
model_A.add(layers.Dense(3))
model_B = models.Sequential()
model_B.add(layers.Dense(16, activation='relu',input_shape=(n,)))
model_B.add(layers.Dense(3))
concatenated = layers.concatenate([model_A.output, model_B.output])
model_C = layers.Dense(16, activation='relu')(concatenated)
out = layers.Dense(3, activation='softmax')(model_C)
final_model = models.Model([model_A.input, model_B.input], out)
当我适合我的模型时一切正常:
opt = keras.optimizers.Adam(learning_rate=0.001)
final_model.compile(optimizer=opt,loss='categorical_crossentropy',metrics ['accuracy'])
history = final_model.fit([X_train,x_train], y_train, epochs=500, batch_size=1000)
但是,我无法使用网格搜索来优化超参数。的确,用下面的代码
batch_size = [10, 20]
epochs = [10, 50]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=final_model, param_grid=param_grid, n_jobs=-1,
cv=3,scoring="accuracy")
grid_result = grid.fit([X_train,x_train], y_train)
我收到这个错误:
Found input variables with inconsistent numbers of samples: [2, 40000]
注意X_train
和x_train
的形状都是[40000,8]。
有没有办法在多输入的情况下使用网格搜索?
- 这可能会回答您的问题。
另一方面,还有其他特殊用途的超参数搜索库,例如ray。
请注意,@Sean 链接的答案中提到的 tf.keras.wrappers.scikit_learn.KerasClassifier
现在已弃用。目前的做法是 using scikeras
. The models are wrapped in using subclasses of BaseWrapper
(more details here).
这是一个玩具示例,用于演示如何使用您的模型执行此操作:
import numpy as np
from tensorflow.keras import models, layers, Input
from tensorflow import keras
from sklearn.base import BaseEstimator
from sklearn.preprocessing import FunctionTransformer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier, BaseWrapper
# Splits X and returns the two inputs.
def split_input(X):
# Get back 2x(40000, 8).
return [X[:, :int(np.shape(X)[1]/2)], X[:, int(np.shape(X)[1]/2):]]
# Wrapper estimator that will call split_input via the FunctionTransformer.
# This will split the input before feeding them to the model.
# Instead of BaseWrapper, you can also subclass scikeras.wrappers.KerasClassifier (see documentation for differences).
class MultiInputEstimator(BaseWrapper):
@property
def feature_encoder(self):
return FunctionTransformer(func=split_input)
# Score passed to the grid search.
@staticmethod
def scorer(y_true, y_pred, **kwargs):
return accuracy_score(np.argmax(y_true, axis=1), np.argmax(y_pred, axis=1))
# Should return the fully compiled model. Pass eventual parameters here.
def get_model(input_shape, n_dense_1):
model_A = models.Sequential()
model_A.add(layers.Dense(n_dense_1, activation='relu',input_shape=input_shape))
model_A.add(layers.Dense(3))
model_B = models.Sequential()
model_B.add(layers.Dense(n_dense_1, activation='relu',input_shape=input_shape))
model_B.add(layers.Dense(3))
concatenated = layers.concatenate([model_A.output, model_B.output])
model_C = layers.Dense(16, activation='relu')(concatenated)
out = layers.Dense(3, activation='softmax')(model_C)
final_model = models.Model([model_A.input, model_B.input], out)
opt = keras.optimizers.Adam(learning_rate=0.001)
final_model.compile(optimizer=opt,loss='categorical_crossentropy', metrics=['accuracy'])
return final_model
X1 = np.random.random((40000, 8))
X2 = np.random.random((40000, 8))
# Stick the two datasets together. `split_input` will take care of separating them.
X = np.hstack([X1, X2]) # (40000, 16) important: of course X.shape[0] should match y.shape[0].
y = np.zeros((40000, 3))
y[0:20000, 0] = 1
y[20000:30000, 1] = 1
y[30000:, 2] = 1
# get_model is called to return the fully compiled model which is wrapped in our MultiInputEstimator instance.
clf = MultiInputEstimator(model=get_model, model__input_shape=(int(X.shape[1]/2),), model__n_dense_1=16)
params = {'model__n_dense_1': [16, 32 ,128]}
grid = GridSearchCV(estimator=clf, param_grid=params, cv=5, verbose=True)
grid_res = grid.fit(X=X, y=y)
这里唯一的新元素是 MultiInputEstimator
包装器和 split_input
函数。这个想法是通过合并输入来欺骗网格搜索具有单个输入数据集。然后,我们在输入数据上使用 scikeras.wrappers.BaseWrapper
到 运行 和 FunctionTransformer
,这将在将数据集提供给模型之前拆分数据集。
正在访问模型:
由于模型现在“包装”在 clf
中,我们可以在构建后通过 clf.model_
访问它。当我们适应它时会发生这种情况,例如 clf.fit(X, y)
或初始化它 (clf.initialize(X, y)
)。如果我们传递 Keras Model
实例而不是像 get_model
这样的构建函数,那么模型将直接可用。绘制模型的示例:
from tensorflow import keras
clf.initialize(X, y)
keras.utils.plot_model(clf.model_, show_shapes=True)
试试这个解决方案,使用 keras.wrappers.scikit_learn
和 KerasClassifier
,您只需要重塑数据:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
models=tf.keras
num_classes = 10
input_shape = (28, 28, 1)
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
x_train = np.expand_dims(x_train, -1).reshape(-1,28,28,)
x_test = np.expand_dims(x_test, -1).reshape(-1,28,28,)
X_train=x_train
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
def create_model():
model_A = models.Sequential()
model_A.add(layers.Dense(16, activation='relu',input_shape=(28,28,1)))
model_A.add(layers.Dense(10))
model_A.add(layers.Flatten())
model_B = models.Sequential()
model_B.add(layers.Dense(16, activation='relu',input_shape=(28,28,1)))
model_B.add(layers.Dense(10))
model_B.add(layers.Flatten())
concatenated = layers.concatenate([model_A.output, model_B.output])
model_C = layers.Dense(16, activation='relu')(concatenated)
out = layers.Dense(10, activation='softmax')(model_C)
final_model = models.Model([model_A.input, model_B.input], out)
final_model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"])
return final_model
#opt =
from keras.wrappers.scikit_learn import KerasClassifier
model = KerasClassifier(build_fn=create_model)
history = model.fit([X_train,x_train], y_train,epochs=500,batch_size=1000)
from sklearn.model_selection import GridSearchCV
batch_size = [300, 600]
epochs = [100, 200]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3,scoring="accuracy")
grid_result = grid.fit(np.array([X_train,x_train]).reshape(-1,28,28,2), y_train)
我创建了 final_model
来接收两个输入(长度为 8 的序列)。它们中的每一个都由两个不同的模型处理,model_A
和 model_B
。然后合并两个输出,返回 model_C
的输入,最终 returns 整个模型的输出。
这是图形概览
这是代码:
model_A = models.Sequential()
model_A.add(layers.Dense(16, activation='relu',input_shape=(n,)))
model_A.add(layers.Dense(3))
model_B = models.Sequential()
model_B.add(layers.Dense(16, activation='relu',input_shape=(n,)))
model_B.add(layers.Dense(3))
concatenated = layers.concatenate([model_A.output, model_B.output])
model_C = layers.Dense(16, activation='relu')(concatenated)
out = layers.Dense(3, activation='softmax')(model_C)
final_model = models.Model([model_A.input, model_B.input], out)
当我适合我的模型时一切正常:
opt = keras.optimizers.Adam(learning_rate=0.001)
final_model.compile(optimizer=opt,loss='categorical_crossentropy',metrics ['accuracy'])
history = final_model.fit([X_train,x_train], y_train, epochs=500, batch_size=1000)
但是,我无法使用网格搜索来优化超参数。的确,用下面的代码
batch_size = [10, 20]
epochs = [10, 50]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=final_model, param_grid=param_grid, n_jobs=-1,
cv=3,scoring="accuracy")
grid_result = grid.fit([X_train,x_train], y_train)
我收到这个错误:
Found input variables with inconsistent numbers of samples: [2, 40000]
注意X_train
和x_train
的形状都是[40000,8]。
有没有办法在多输入的情况下使用网格搜索?
另一方面,还有其他特殊用途的超参数搜索库,例如ray。
请注意,@Sean 链接的答案中提到的 tf.keras.wrappers.scikit_learn.KerasClassifier
现在已弃用。目前的做法是 using scikeras
. The models are wrapped in using subclasses of BaseWrapper
(more details here).
这是一个玩具示例,用于演示如何使用您的模型执行此操作:
import numpy as np
from tensorflow.keras import models, layers, Input
from tensorflow import keras
from sklearn.base import BaseEstimator
from sklearn.preprocessing import FunctionTransformer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier, BaseWrapper
# Splits X and returns the two inputs.
def split_input(X):
# Get back 2x(40000, 8).
return [X[:, :int(np.shape(X)[1]/2)], X[:, int(np.shape(X)[1]/2):]]
# Wrapper estimator that will call split_input via the FunctionTransformer.
# This will split the input before feeding them to the model.
# Instead of BaseWrapper, you can also subclass scikeras.wrappers.KerasClassifier (see documentation for differences).
class MultiInputEstimator(BaseWrapper):
@property
def feature_encoder(self):
return FunctionTransformer(func=split_input)
# Score passed to the grid search.
@staticmethod
def scorer(y_true, y_pred, **kwargs):
return accuracy_score(np.argmax(y_true, axis=1), np.argmax(y_pred, axis=1))
# Should return the fully compiled model. Pass eventual parameters here.
def get_model(input_shape, n_dense_1):
model_A = models.Sequential()
model_A.add(layers.Dense(n_dense_1, activation='relu',input_shape=input_shape))
model_A.add(layers.Dense(3))
model_B = models.Sequential()
model_B.add(layers.Dense(n_dense_1, activation='relu',input_shape=input_shape))
model_B.add(layers.Dense(3))
concatenated = layers.concatenate([model_A.output, model_B.output])
model_C = layers.Dense(16, activation='relu')(concatenated)
out = layers.Dense(3, activation='softmax')(model_C)
final_model = models.Model([model_A.input, model_B.input], out)
opt = keras.optimizers.Adam(learning_rate=0.001)
final_model.compile(optimizer=opt,loss='categorical_crossentropy', metrics=['accuracy'])
return final_model
X1 = np.random.random((40000, 8))
X2 = np.random.random((40000, 8))
# Stick the two datasets together. `split_input` will take care of separating them.
X = np.hstack([X1, X2]) # (40000, 16) important: of course X.shape[0] should match y.shape[0].
y = np.zeros((40000, 3))
y[0:20000, 0] = 1
y[20000:30000, 1] = 1
y[30000:, 2] = 1
# get_model is called to return the fully compiled model which is wrapped in our MultiInputEstimator instance.
clf = MultiInputEstimator(model=get_model, model__input_shape=(int(X.shape[1]/2),), model__n_dense_1=16)
params = {'model__n_dense_1': [16, 32 ,128]}
grid = GridSearchCV(estimator=clf, param_grid=params, cv=5, verbose=True)
grid_res = grid.fit(X=X, y=y)
这里唯一的新元素是 MultiInputEstimator
包装器和 split_input
函数。这个想法是通过合并输入来欺骗网格搜索具有单个输入数据集。然后,我们在输入数据上使用 scikeras.wrappers.BaseWrapper
到 运行 和 FunctionTransformer
,这将在将数据集提供给模型之前拆分数据集。
正在访问模型:
由于模型现在“包装”在 clf
中,我们可以在构建后通过 clf.model_
访问它。当我们适应它时会发生这种情况,例如 clf.fit(X, y)
或初始化它 (clf.initialize(X, y)
)。如果我们传递 Keras Model
实例而不是像 get_model
这样的构建函数,那么模型将直接可用。绘制模型的示例:
from tensorflow import keras
clf.initialize(X, y)
keras.utils.plot_model(clf.model_, show_shapes=True)
试试这个解决方案,使用 keras.wrappers.scikit_learn
和 KerasClassifier
,您只需要重塑数据:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
models=tf.keras
num_classes = 10
input_shape = (28, 28, 1)
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
x_train = np.expand_dims(x_train, -1).reshape(-1,28,28,)
x_test = np.expand_dims(x_test, -1).reshape(-1,28,28,)
X_train=x_train
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
def create_model():
model_A = models.Sequential()
model_A.add(layers.Dense(16, activation='relu',input_shape=(28,28,1)))
model_A.add(layers.Dense(10))
model_A.add(layers.Flatten())
model_B = models.Sequential()
model_B.add(layers.Dense(16, activation='relu',input_shape=(28,28,1)))
model_B.add(layers.Dense(10))
model_B.add(layers.Flatten())
concatenated = layers.concatenate([model_A.output, model_B.output])
model_C = layers.Dense(16, activation='relu')(concatenated)
out = layers.Dense(10, activation='softmax')(model_C)
final_model = models.Model([model_A.input, model_B.input], out)
final_model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"])
return final_model
#opt =
from keras.wrappers.scikit_learn import KerasClassifier
model = KerasClassifier(build_fn=create_model)
history = model.fit([X_train,x_train], y_train,epochs=500,batch_size=1000)
from sklearn.model_selection import GridSearchCV
batch_size = [300, 600]
epochs = [100, 200]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3,scoring="accuracy")
grid_result = grid.fit(np.array([X_train,x_train]).reshape(-1,28,28,2), y_train)