ValueError: Dimensions must be equal, but are 512 and 256

Question

我正在尝试使用 Tensorflow 1.3.0 实现用于文本摘要的 seq2seq 模型。

我正在尝试在编码层中使用 MultiRNNCell 和 bidirectional_dynamic_rnn。我丢失了一些东西，但无法找到它。错误堆栈跟踪不是直接的，因此更难理解。

我在构建图形时遇到错误。

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    653           graph_def_version, node_def_str, input_shapes, input_tensors,
--> 654           input_tensors_as_shapes, status)
    655   except errors.InvalidArgumentError as err:

~/anaconda2/envs/tensorflow/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback)
     65             try:
---> 66                 next(self.gen)
     67             except StopIteration:

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
    465           compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466           pywrap_tensorflow.TF_GetCode(status))
    467   finally:

InvalidArgumentError: Dimensions must be equal, but are 512 and 256 for 'decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul' (op: 'Mul') with input shapes: [?,512], [?,256].

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-119-85ee67bc88e5> in <module>()
      9     # Create the training and inference logits
     10     training_logits, inference_logits = seq2seq_model(input_,target,embeding_matrix,vocab_to_int,source_seq_length,target_seq_length,
---> 11                   max_target_seq_length,rnn_size,keep_probability,num_layers,batch_size)
     12 
     13     # Create tensors for the training logits and inference logits

<ipython-input-114-5ad1bf459bd7> in seq2seq_model(source_input, target_input, embeding_matrix, vocab_to_int, source_sequence_length, target_sequence_length, max_target_length, rnn_size, keep_prob, num_layers, batch_size)
     15     training_logits, inference_logits = decoding_layer(target_input,encoder_states,embedings,
     16                                                                 vocab_to_int,rnn_size,target_sequence_length,
---> 17                                                                 max_target_length,batch_size,num_layers)
     18 
     19     return training_logits, inference_logits

<ipython-input-113-c2b4542605d2> in decoding_layer(target_inputs, encoder_state, embedding, vocab_to_int, rnn_size, target_sequence_length, max_target_length, batch_size, num_layers)
     12 
     13         training_logits = training_decoder(embed,decoder_cell,encoder_state,output_layer,
---> 14                                          target_sequence_length,max_target_length)
     15 
     16 

<ipython-input-117-012bbcdcf997> in training_decoder(dec_embed_input, decoder_cell, encoder_state, output_layer, target_sequence_length, max_target_length)
     17 
     18     final_outputs, final_state = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,impute_finished=True,
---> 19                                                      maximum_iterations=max_target_length)
     20 
     21     return final_outputs

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
    284         ],
    285         parallel_iterations=parallel_iterations,
--> 286         swap_memory=swap_memory)
    287 
    288     final_outputs_ta = res[1]

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
   2773     context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
   2774     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775     result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
   2776     return result
   2777 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
   2602       self.Enter()
   2603       original_body_result, exit_vars = self._BuildLoop(
-> 2604           pred, body, original_loop_vars, loop_vars, shape_invariants)
   2605     finally:
   2606       self.Exit()

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
   2552         structure=original_loop_vars,
   2553         flat_sequence=vars_for_body_with_tensor_arrays)
-> 2554     body_result = body(*packed_vars_for_body)
   2555     if not nest.is_sequence(body_result):
   2556       body_result = [body_result]

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
    232       """
    233       (next_outputs, decoder_state, next_inputs,
--> 234        decoder_finished) = decoder.step(time, inputs, state)
    235       next_finished = math_ops.logical_or(decoder_finished, finished)
    236       if maximum_iterations is not None:

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name)
    137     """
    138     with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 139       cell_outputs, cell_state = self._cell(inputs, state)
    140       if self._output_layer is not None:
    141         cell_outputs = self._output_layer(cell_outputs)

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
    178       with vs.variable_scope(vs.get_variable_scope(),
    179                              custom_getter=self._rnn_get_variable):
--> 180         return super(RNNCell, self).__call__(inputs, state)
    181 
    182   def _rnn_get_variable(self, getter, *args, **kwargs):

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
    448         # Check input assumptions set after layer building, e.g. input shape.
    449         self._assert_input_compatibility(inputs)
--> 450         outputs = self.call(inputs, *args, **kwargs)
    451 
    452         # Apply activity regularization.

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
    936                                       [-1, cell.state_size])
    937           cur_state_pos += cell.state_size
--> 938         cur_inp, new_state = cell(cur_inp, cur_state)
    939         new_states.append(new_state)
    940 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
    772                              self._recurrent_input_noise,
    773                              self._input_keep_prob)
--> 774     output, new_state = self._cell(inputs, state, scope)
    775     if _should_dropout(self._state_keep_prob):
    776       new_state = self._dropout(new_state, "state",

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
    178       with vs.variable_scope(vs.get_variable_scope(),
    179                              custom_getter=self._rnn_get_variable):
--> 180         return super(RNNCell, self).__call__(inputs, state)
    181 
    182   def _rnn_get_variable(self, getter, *args, **kwargs):

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
    448         # Check input assumptions set after layer building, e.g. input shape.
    449         self._assert_input_compatibility(inputs)
--> 450         outputs = self.call(inputs, *args, **kwargs)
    451 
    452         # Apply activity regularization.

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
    405 
    406     new_c = (
--> 407         c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
    408     new_h = self._activation(new_c) * sigmoid(o)
    409 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py in binary_op_wrapper(x, y)
    863           else:
    864             raise
--> 865       return func(x, y, name=name)
    866 
    867   def binary_op_wrapper_sparse(sp_x, y):

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py in _mul_dispatch(x, y, name)
   1086   is_tensor_y = isinstance(y, ops.Tensor)
   1087   if is_tensor_y:
-> 1088     return gen_math_ops._mul(x, y, name=name)
   1089   else:
   1090     assert isinstance(y, sparse_tensor.SparseTensor)  # Case: Dense * Sparse.

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py in _mul(x, y, name)
   1447     A `Tensor`. Has the same type as `x`.
   1448   """
-> 1449   result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
   1450   return result
   1451 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py in apply_op(self, op_type_name, name, **keywords)
    765         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    766                          input_types=input_types, attrs=attr_protos,
--> 767                          op_def=op_def)
    768         if output_structure:
    769           outputs = op.outputs

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
   2630                     original_op=self._default_original_op, op_def=op_def)
   2631     if compute_shapes:
-> 2632       set_shapes_for_outputs(ret)
   2633     self._add_op(ret)
   2634     self._record_op_seen_by_control_dependencies(ret)

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in set_shapes_for_outputs(op)
   1909       shape_func = _call_cpp_shape_fn_and_require_op
   1910 
-> 1911   shapes = shape_func(op)
   1912   if shapes is None:
   1913     raise RuntimeError(

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in call_with_requiring(op)
   1859 
   1860   def call_with_requiring(op):
-> 1861     return call_cpp_shape_fn(op, require_shape_fn=True)
   1862 
   1863   _call_cpp_shape_fn_and_require_op = call_with_requiring

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
    593     res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
    594                                   input_tensors_as_shapes_needed,
--> 595                                   require_shape_fn)
    596     if not isinstance(res, dict):
    597       # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    657       missing_shape_fn = True
    658     else:
--> 659       raise ValueError(err.message)
    660 
    661   if missing_shape_fn:

ValueError: Dimensions must be equal, but are 512 and 256 for 'decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul' (op: 'Mul') with input shapes: [?,512], [?,256].

我无法理解错误。它试图引用哪个矩阵？请帮助我，我是 Tensorflow 的新手。

Answer 1

错误表明在解码器 (decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul) 的 LSTM 内部，在乘法 (Mul) 期间存在维度不匹配。

我的猜测是，对于您的实现，解码器 LSTM 需要的单元数是编码器 LSTM 的两倍，因为您使用的是双向编码器。如果您有一个双向编码器和一个包含 256 个单元的 LSTM，那么结果将有 512 个单元（当您连接前向和反向 LSTM 的输出时）。目前解码器似乎期望输入 256 个单元格。

ValueError: Dimensions must be equal, but are 512 and 256

ValueError: Dimensions must be equal, but are 512 and 256

python

deep-learning

lstm

tensorflow

rnn