将 gensim doc2vec 与 Keras Conv1d 结合使用。值错误

Using gensim doc2vec with Keras Conv1d. ValueError

我目前正在尝试使用 keras 实现卷积 lstm 网络。我没有使用 keras 的嵌入层,而是使用了 Gensim 的 doc2vec 嵌入并从中创建了输入数据。

预处理

preprocessed_train = utils.preprocess_text(train_vect)
preprocessed_test = utils.preprocess_text(test_vect)

print preprocessed_train[0]

result: [u'snes_classic', u'preorders_open', u'later_month', u'ever_since', u'nintendo', u'announce', u'snes_classic', u'edition', u'earlier', u'fan', u'desperate', u'register', u'interest', u'ensure', u'come', u'launch', u'however', u'although', u'system', u'pre-orders', u'make', u'available', u'retailers', u'every', u'store', u'plan', u'sell', u'console', u'allow', u'people', u'place', u'pre-orders', u'yet', u'today', u'though', u'nintendo', u'confirm', u'snes_classic', u'edition', u'pre-orders', u'soon', u'available', u'fan', u'post_official', u'facebook', u'company', u'console', u'make', u'available_pre-order', u'various_retailers', u'late', u'month', u'nintendo', u'appreciate', u'incredible', u'anticipation', u'hardware', u'reference', u'fact', u'snes_classic', u'edition', u'already', u'sell', u'many', u'place', u'across_globe', u'unfortunately', u'nintendo', u'clarify', u'exactly', u'retailers', u'open', u'snes_classic', u'pre-orders', u'provide', u'exact_date', u'however', u'stand_reason', u'wal-mart', u'retailers', u'force', u'cancel_pre-orders', u'hardware', u'website', u'error', u'saw', u'go_live', u'prematurely', u'currently_unclear', u'wal-mart', u'help', u'cancel', u'reservations', u'sign-up', u'pre-orders', u'go_live', u'properly', u'month', u'appreciate', u'incredible', u'anticipation', u'exist', u'super_nintendo', u'entertainment_system', u'super_nes', u'classic', u'post', u'nintendo', u'tuesday_august', u'1', u'2017', u'post', u'nintendo', u'mention', u'ship', u'significant_amount', u'snes_classic', u'edition', u'units', u'retailers', u'launch', u'company', u'make', u'units', u'available', u'throughout', u'balance', u'calendar', u'snes_classic', u'edition', u'first', u'announce', u'nintendo', u'explain', u'make', u'units', u'nes_classic', u'constantly', u'sell', u'leave', u'many', u'glad', u'nintendo', u'offer_clarification', u'others', u'however', u'remain_unconvinced', u'nintendo', u'able', u'keep', u'demand', u'console', u'incredibly_hard', u'fan', u'place', u'legitimate', u'order', u'snes_classic', u'edition', u'end', u'even_harder', u'find', u'throughout', u'scalpers', u'place', u'pre-orders', u'pick', u'console', u'post-launch', u'order', u'sell', u'higher_price', u'later_date', u'retailers', u'like', u'ebay', u'enforce_rule', u'scalpers', u'unclear_whether', u'enough', u'snes_classic', u'edition', u'launch', u'september_29', u'2017_source', u'nintendo', u'facebook']

数据标签

y_test = [x for x in test_data['slabel']]
y_train = [x for x in train_data['slabel']]

y_test = keras.utils.to_categorical(y_test)
y_train = keras.utils.to_categorical(y_train)

result:
array([[ 0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.]])

加载 doc2vec 模型

doc2vec_model = gensim.models.Doc2Vec.load('./doc2vec-models/dmbbv_300_epoch_500_size_model')

推断数据并创建输入向量infer_vector 函数根据我创建的 doc2vec 模型创建文档嵌入。

X_train = []
for text in preprocessed_train:
    inferred_vec = doc2vec_model.infer_vector(text)
    X_train.append(inferred_vec)

X_test = []
for text in preprocessed_test:
    inferred_vec = doc2vec_model.infer_vector(text)
    X_test.append(inferred_vec)

重塑数据

X_train = np.array(X_train)
X_test = np.array(X_test)
X_train = X_train.reshape((X_train.shape[0],1,X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0],1,X_test.shape[1]))
X_train.shape,X_test.shape

result: ((1476, 1, 500), (370, 1, 500))

建筑模型

model = Sequential()
model.add(Conv1D(filters = 128,
                 kernel_size = 5,
                 input_shape = (X_train.shape[1],X_train.shape[2]), 
                 padding = 'valid',
                 activation = 'relu'))
model.add(MaxPooling1D(2))
model.add(LSTM(X_train.shape[1],return_sequences = True, 
               implementation=2, 
               kernel_regularizer=regularizers.l1_l2(0.001),
               activity_regularizer=regularizers.l1(0.01)
              ))
model.add(Dropout(0.7))
model.add(Activation('relu'))
model.add(LSTM(256,return_sequences = True))
model.add(Activation('relu'))
model.add(LSTM(128))
model.add(Activation('relu'))
model.add(LSTM(64,return_sequences = True))
model.add(Activation('relu'))
model.add(LSTM(32,return_sequences = True))
model.add(Activation('relu'))
model.add(LSTM(16))
model.add(Activation('relu'))
model.add(Dense(5, activation = 'sigmoid'))
model.compile(loss="categorical_crossentropy", optimizer='adamax',metrics=['categorical_accuracy', 'accuracy'])

然后我得到这个错误


-----------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-488-b29db30c3ee7> in <module>()
      5 #                  use_bias=True,
      6                  padding = 'valid',
----> 7                  activation = 'relu'))
      8 model.add(MaxPooling1D(2))
      9 model.add(LSTM(X_train.shape[1],return_sequences = True, 

/usr/local/lib/python2.7/dist-packages/keras/models.pyc in add(self, layer)
    434                 # and create the node connecting the current layer
    435                 # to the input layer we just created.
--> 436                 layer(x)
    437 
    438             if len(layer.inbound_nodes) != 1:

/usr/local/lib/python2.7/dist-packages/keras/engine/topology.pyc in __call__(self, inputs, **kwargs)
    594 
    595             # Actually call the layer, collecting output(s), mask(s), and shape(s).
--> 596             output = self.call(inputs, **kwargs)
    597             output_mask = self.compute_mask(inputs, previous_mask)
    598 

/usr/local/lib/python2.7/dist-packages/keras/layers/convolutional.pyc in call(self, inputs)
    154                 padding=self.padding,
    155                 data_format=self.data_format,
--> 156                 dilation_rate=self.dilation_rate[0])
    157         if self.rank == 2:
    158             outputs = K.conv2d(

/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.pyc in conv1d(x, kernel, strides, padding, data_format, dilation_rate)
   3114         strides=(strides,),
   3115         padding=padding,
-> 3116         data_format=tf_data_format)
   3117     return x
   3118 

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.pyc in convolution(input, filter, padding, strides, dilation_rate, name, data_format)
    670         dilation_rate=dilation_rate,
    671         padding=padding,
--> 672         op=op)
    673 
    674 

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.pyc in with_space_to_batch(input, dilation_rate, padding, op, filter_shape, spatial_dims, data_format)
    336       raise ValueError("dilation_rate must be positive")
    337     if np.all(const_rate == 1):
--> 338       return op(input, num_spatial_dims, padding)
    339 
    340   # We have two padding contributions. The first is used for converting "SAME"

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.pyc in op(input_converted, _, padding)
    662           data_format=data_format,
    663           strides=strides,
--> 664           name=name)
    665 
    666     return with_space_to_batch(

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.pyc in _non_atrous_convolution(input, filter, padding, data_format, strides, name)
    114           padding=padding,
    115           data_format=data_format_2d,
--> 116           name=scope)
    117     elif conv_dims == 2:
    118       if data_format is None or data_format == "NHWC":

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.pyc in conv1d(value, filters, stride, padding, use_cudnn_on_gpu, data_format, name)
   2011     result = gen_nn_ops.conv2d(value, filters, strides, padding,
   2012                                use_cudnn_on_gpu=use_cudnn_on_gpu,
-> 2013                                data_format=data_format)
   2014     return array_ops.squeeze(result, [spatial_start_dim])
   2015 

/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.pyc in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, data_format, name)
    395                                 strides=strides, padding=padding,
    396                                 use_cudnn_on_gpu=use_cudnn_on_gpu,
--> 397                                 data_format=data_format, name=name)
    398   return result
    399 

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.pyc in apply_op(self, op_type_name, name, **keywords)
    765         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    766                          input_types=input_types, attrs=attr_protos,
--> 767                          op_def=op_def)
    768         if output_structure:
    769           outputs = op.outputs

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
   2630                     original_op=self._default_original_op, op_def=op_def)
   2631     if compute_shapes:
-> 2632       set_shapes_for_outputs(ret)
   2633     self._add_op(ret)
   2634     self._record_op_seen_by_control_dependencies(ret)

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in set_shapes_for_outputs(op)
   1909       shape_func = _call_cpp_shape_fn_and_require_op
   1910 
-> 1911   shapes = shape_func(op)
   1912   if shapes is None:
   1913     raise RuntimeError(

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.pyc in call_with_requiring(op)
   1859 
   1860   def call_with_requiring(op):
-> 1861     return call_cpp_shape_fn(op, require_shape_fn=True)
   1862 
   1863   _call_cpp_shape_fn_and_require_op = call_with_requiring

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.pyc in call_cpp_shape_fn(op, require_shape_fn)
    593     res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
    594                                   input_tensors_as_shapes_needed,
--> 595                                   require_shape_fn)
    596     if not isinstance(res, dict):
    597       # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.pyc in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    657       missing_shape_fn = True
    658     else:
--> 659       raise ValueError(err.message)
    660 
    661   if missing_shape_fn:

ValueError: Negative dimension size caused by subtracting 5 from 1 for 'conv1d_55/convolution/Conv2D' (op: 'Conv2D') with input shapes: [?,1,1,500], [1,5,500,128].

输入形状有问题。您可以尝试使用 (None, 500, 1).

而不是 (None, 1, 500)

你可以通过查看异常来判断

`Negative dimension size caused by subtracting 5 from 1 for 'conv1d'` 

内核 (5) 大于第二维。