断言失败:[条件 x == y 在元素方面不成立:]
assertion failed: [Condition x == y did not hold element-wise:]
我已经为句子分类任务构建了一个带有注意层的 BiLSTM 模型,但我收到一个错误,由于参数数量不匹配,我的断言失败了。注意层代码在这里,错误在代码下方
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
当我训练包含注意力层的模型时,出现断言失败的错误。
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history = model.fit(sequences_matrix, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history = model.fit(sequences_matrix, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
train_function
我的模特是
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(SpatialDropout1D(0.1))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(attention(return_sequences=True))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
activity_regularizer=regularizers.l2(1e-5)))
model.summary()
Y_train 的形状是
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
print(Y_train.shape)
(43532, 1)
您的目标是二维的,因此您需要在最后一个注意力层中设置 return_sequences=False
,以便 return 以二维格式输出
在Dropout之前添加flatten层,然后执行。
model.add(Flatten())
我已经为句子分类任务构建了一个带有注意层的 BiLSTM 模型,但我收到一个错误,由于参数数量不匹配,我的断言失败了。注意层代码在这里,错误在代码下方
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
当我训练包含注意力层的模型时,出现断言失败的错误。
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history = model.fit(sequences_matrix, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history = model.fit(sequences_matrix, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
train_function
我的模特是
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(SpatialDropout1D(0.1))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(attention(return_sequences=True))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
activity_regularizer=regularizers.l2(1e-5)))
model.summary()
Y_train 的形状是
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
print(Y_train.shape)
(43532, 1)
您的目标是二维的,因此您需要在最后一个注意力层中设置 return_sequences=False
,以便 return 以二维格式输出
在Dropout之前添加flatten层,然后执行。
model.add(Flatten())