连接嵌入式层后在 Keras 中拟合模型时出现断言错误
I got Assertion Error when fitting a model in Keras after concatenating Embedded layers
我是嵌入式层的新手,对此我有疑问。我试图拟合神经网络模型,但首先我使用嵌入层对数据集的分类特征进行了编码。这是我的代码:
import numpy as np
def preproc(X_train, categorical_vars, other_cols) :
input_list_train = []
for c in categorical_vars :
jjj = np.asarray(X_train[c].tolist())
jjj = pd.factorize( jjj )[0]
input_list_train.append( np.asarray(jjj) )
"""
raw_vals = np.unique(X_train[c])
val_map = {}
for i in range(len(raw_vals)):
val_map[raw_vals[i]] = i
input_list_train.append(X_train[c].map(val_map).values)
"""
#the rest of the columns
input_list_train.append(X_train[other_cols].values)
return input_list_train
X_train = preproc(X_train, categorical_columns, numeric_columns)
X_validation = preproc(X_validation, categorical_columns, numeric_columns)
X_test = preproc(X_test, categorical_columns, numeric_columns)
from keras.layers import *
from keras.models import *
models = []
for categorical_var in categorical_columns:
model = Sequential()
model.reset_states( )
no_of_unique_cat = train_df[categorical_var].nunique()
embedding_size = min(np.ceil((no_of_unique_cat)/2), 50 )
embedding_size = int(embedding_size)
model.add( Embedding( no_of_unique_cat+1, embedding_size, input_length = 1 ) )
model.add(Reshape(target_shape=(embedding_size,)))
models.append( model )
model_rest = Sequential()
model_rest.add(Dense( 64 , input_dim = (train_df.shape[1] - len(categorical_columns)) ))
model_rest.reset_states( )
models.append(model_rest)
layer_nodes = len(X_train) + 1
dropout_hidden_layers = 0.5
output_nodes = 1
full_model = Sequential()
full_model.add(Concatenate(models))
full_model.add(Dense(units = layer_nodes, kernel_initializer = "uniform"))
full_model.add(Activation('relu'))
full_model.add(Dropout(dropout_hidden_layers))
full_model.add(Dense(units = layer_nodes, kernel_initializer = "uniform"))
full_model.add(Activation('relu'))
full_model.add(Dropout(dropout_hidden_layers))
full_model.add(Dense(units = output_nodes, kernel_initializer = "uniform",
activation = "sigmoid"))
full_model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
history = full_model.fit(X_train, y_train, epochs = 200, batch_size = 20)
我遇到下一个错误:
Traceback (most recent call last):
File "<ipython-input-28-d5c2b04c2cc3>", line 51, in <module>
history = full_model.fit(X_train, y_train, epochs = 200, batch_size = 20)
File "/home/javier/anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 1154, in fit
batch_size=batch_size)
File "/home/javier/anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 504, in _standardize_user_data
self._set_inputs(x)
File "/home/javier/anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 414, in _set_inputs
assert len(inputs) == 1
AssertionError
可能是哪个问题?
这种模型,建议大家使用keras函数式结构。此处您的模型已调整
models = []
inps = []
for categorical_var in categorical_columns:
inp = Input((1,))
no_of_unique_cat = df[categorical_var].max()
embedding_size = int(min(np.ceil((no_of_unique_cat)/2), 50 ))
x = Embedding( no_of_unique_cat+1, embedding_size )(inp)
x = Flatten()(x)
inps.append( inp )
models.append( x )
inp = Input(((df.shape[1] - len(categorical_columns)),))
x = Dense( 64 )(inp)
inps.append( inp )
models.append( x )
dropout_hidden_layers = 0.5
output_nodes = 1
x = Concatenate()(models)
x = Dense(128, kernel_initializer = "uniform")(x)
x = Activation('relu')(x)
x = Dropout(dropout_hidden_layers)(x)
x = Dense(64, kernel_initializer = "uniform")(x)
x = Activation('relu')(x)
x = Dropout(dropout_hidden_layers)(x)
out = Dense(units = output_nodes, kernel_initializer = "uniform", activation = "sigmoid")(x)
full_model = Model(inps, out)
full_model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
full_model.summary()
我还在此处提供了一个虚拟示例:https://colab.research.google.com/drive/1LuyC_MosbU9wqvU9azjzBpG3c2oUDwMU?usp=sharing
我是嵌入式层的新手,对此我有疑问。我试图拟合神经网络模型,但首先我使用嵌入层对数据集的分类特征进行了编码。这是我的代码:
import numpy as np
def preproc(X_train, categorical_vars, other_cols) :
input_list_train = []
for c in categorical_vars :
jjj = np.asarray(X_train[c].tolist())
jjj = pd.factorize( jjj )[0]
input_list_train.append( np.asarray(jjj) )
"""
raw_vals = np.unique(X_train[c])
val_map = {}
for i in range(len(raw_vals)):
val_map[raw_vals[i]] = i
input_list_train.append(X_train[c].map(val_map).values)
"""
#the rest of the columns
input_list_train.append(X_train[other_cols].values)
return input_list_train
X_train = preproc(X_train, categorical_columns, numeric_columns)
X_validation = preproc(X_validation, categorical_columns, numeric_columns)
X_test = preproc(X_test, categorical_columns, numeric_columns)
from keras.layers import *
from keras.models import *
models = []
for categorical_var in categorical_columns:
model = Sequential()
model.reset_states( )
no_of_unique_cat = train_df[categorical_var].nunique()
embedding_size = min(np.ceil((no_of_unique_cat)/2), 50 )
embedding_size = int(embedding_size)
model.add( Embedding( no_of_unique_cat+1, embedding_size, input_length = 1 ) )
model.add(Reshape(target_shape=(embedding_size,)))
models.append( model )
model_rest = Sequential()
model_rest.add(Dense( 64 , input_dim = (train_df.shape[1] - len(categorical_columns)) ))
model_rest.reset_states( )
models.append(model_rest)
layer_nodes = len(X_train) + 1
dropout_hidden_layers = 0.5
output_nodes = 1
full_model = Sequential()
full_model.add(Concatenate(models))
full_model.add(Dense(units = layer_nodes, kernel_initializer = "uniform"))
full_model.add(Activation('relu'))
full_model.add(Dropout(dropout_hidden_layers))
full_model.add(Dense(units = layer_nodes, kernel_initializer = "uniform"))
full_model.add(Activation('relu'))
full_model.add(Dropout(dropout_hidden_layers))
full_model.add(Dense(units = output_nodes, kernel_initializer = "uniform",
activation = "sigmoid"))
full_model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
history = full_model.fit(X_train, y_train, epochs = 200, batch_size = 20)
我遇到下一个错误:
Traceback (most recent call last):
File "<ipython-input-28-d5c2b04c2cc3>", line 51, in <module>
history = full_model.fit(X_train, y_train, epochs = 200, batch_size = 20)
File "/home/javier/anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 1154, in fit
batch_size=batch_size)
File "/home/javier/anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 504, in _standardize_user_data
self._set_inputs(x)
File "/home/javier/anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 414, in _set_inputs
assert len(inputs) == 1
AssertionError
可能是哪个问题?
这种模型,建议大家使用keras函数式结构。此处您的模型已调整
models = []
inps = []
for categorical_var in categorical_columns:
inp = Input((1,))
no_of_unique_cat = df[categorical_var].max()
embedding_size = int(min(np.ceil((no_of_unique_cat)/2), 50 ))
x = Embedding( no_of_unique_cat+1, embedding_size )(inp)
x = Flatten()(x)
inps.append( inp )
models.append( x )
inp = Input(((df.shape[1] - len(categorical_columns)),))
x = Dense( 64 )(inp)
inps.append( inp )
models.append( x )
dropout_hidden_layers = 0.5
output_nodes = 1
x = Concatenate()(models)
x = Dense(128, kernel_initializer = "uniform")(x)
x = Activation('relu')(x)
x = Dropout(dropout_hidden_layers)(x)
x = Dense(64, kernel_initializer = "uniform")(x)
x = Activation('relu')(x)
x = Dropout(dropout_hidden_layers)(x)
out = Dense(units = output_nodes, kernel_initializer = "uniform", activation = "sigmoid")(x)
full_model = Model(inps, out)
full_model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
full_model.summary()
我还在此处提供了一个虚拟示例:https://colab.research.google.com/drive/1LuyC_MosbU9wqvU9azjzBpG3c2oUDwMU?usp=sharing