keras 注意:不兼容的形状:[32,2] 与 [1200,2]
keras Attention: Incompatible shapes: [32,2] vs. [1200,2]
我正在尝试将注意力层添加到我的模型中以进行文本分类。
但是在添加图层然后拟合模型后出现错误。
这是我的代码:
model = Sequential()
for i in range(len(kernel_size)):
model.add(Conv1D(filters=nb_filter, kernel_size=kernel_size[i], padding='valid', activation='relu',
input_shape=(data_batch_size, emb_dim)))
model.add(MaxPooling1D(pool_size=pool_size))
model.add(Bidirectional(LSTM(units=lstm_out, return_sequences=True), merge_mode='concat',
input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units=lstm_out, go_backwards=True)))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Attention(return_sequences=True))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Dropout(DropoutP))
model.add(Dense(cat_output, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Y_tmp = np.zeros([Y_train.shape[0], 2])
Y_tmp[:, 0] = 2 - Y_train
Y_tmp[:, 1] = Y_train - 1
Y_train = Y_tmp
history = model.fit(X_train, Y_train, validation_split=test_size, epochs=nb_epoch, verbose=1,
callbacks=[EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)])
这就是注意力class:
class Attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(Attention, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1), initializer="normal")
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
super(Attention, self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x, self.W) + self.b)
a = K.softmax(e, axis=1)
output = x * a
if self.return_sequences:
return output
return K.sum(output, axis=1)
这是错误:不兼容的形状:[32,2] 与 [1200,2]
我做错了什么?
有问题:
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
应该是:
self.b = self.add_weight(name="att_bias", shape=(1,), initializer="zeros")
其实你是在重新定义Dense层。要亲自查看,您可以查看 layers and models via sub-classing.
中的自定义 Linear
层
custom attention layer 实际上是您想要使用 Dense 层并且更通用(Bahdanau 注意力层)。
我正在尝试将注意力层添加到我的模型中以进行文本分类。 但是在添加图层然后拟合模型后出现错误。 这是我的代码:
model = Sequential()
for i in range(len(kernel_size)):
model.add(Conv1D(filters=nb_filter, kernel_size=kernel_size[i], padding='valid', activation='relu',
input_shape=(data_batch_size, emb_dim)))
model.add(MaxPooling1D(pool_size=pool_size))
model.add(Bidirectional(LSTM(units=lstm_out, return_sequences=True), merge_mode='concat',
input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units=lstm_out, go_backwards=True)))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Attention(return_sequences=True))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Dropout(DropoutP))
model.add(Dense(cat_output, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Y_tmp = np.zeros([Y_train.shape[0], 2])
Y_tmp[:, 0] = 2 - Y_train
Y_tmp[:, 1] = Y_train - 1
Y_train = Y_tmp
history = model.fit(X_train, Y_train, validation_split=test_size, epochs=nb_epoch, verbose=1,
callbacks=[EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)])
这就是注意力class:
class Attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(Attention, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1), initializer="normal")
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
super(Attention, self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x, self.W) + self.b)
a = K.softmax(e, axis=1)
output = x * a
if self.return_sequences:
return output
return K.sum(output, axis=1)
这是错误:不兼容的形状:[32,2] 与 [1200,2]
我做错了什么?
有问题:
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
应该是:
self.b = self.add_weight(name="att_bias", shape=(1,), initializer="zeros")
其实你是在重新定义Dense层。要亲自查看,您可以查看 layers and models via sub-classing.
中的自定义 Linear
层
custom attention layer 实际上是您想要使用 Dense 层并且更通用(Bahdanau 注意力层)。