应用具有注意力的编码器-解码器 (Seq2Seq) 推理模型
Apply an Encoder-Decoder (Seq2Seq) inference model with Attention
你好 Whosebug 社区!
我正在尝试为具有 Attention 的 seq2seq(Encoded-Decoded)模型创建推理模型。这是推理模型的定义。
model = compile_model(tf.keras.models.load_model(constant.MODEL_PATH, compile=False))
encoder_input = model.input[0]
encoder_output, encoder_h, encoder_c = model.layers[1].output
encoder_state = [encoder_h, encoder_c]
encoder_model = tf.keras.Model(encoder_input, encoder_state)
decoder_input = model.input[1]
decoder = model.layers[3]
decoder_new_h = tf.keras.Input(shape=(n_units,), name='input_3')
decoder_new_c = tf.keras.Input(shape=(n_units,), name='input_4')
decoder_input_initial_state = [decoder_new_h, decoder_new_c]
decoder_output, decoder_h, decoder_c = decoder(decoder_input, initial_state=decoder_input_initial_state)
decoder_output_state = [decoder_h, decoder_c]
# These lines cause an error
context = model.layers[4]([encoder_output, decoder_output])
decoder_combined_context = model.layers[5]([context, decoder_output])
output = model.layers[6](decoder_combined_context)
output = model.layers[7](output)
# end
decoder_model = tf.keras.Model([decoder_input] + decoder_input_initial_state, [output] + decoder_output_state)
return encoder_model, decoder_model
当我运行这段代码时出现以下错误。
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_5:0", shape=(None, None, 20), dtype=float32) at layer "lstm_4". The following previous layers were accessed without issue: ['lstm_5']
如果我排除注意力块,模型将完全没有任何错误。
model = compile_model(tf.keras.models.load_model(constant.MODEL_PATH, compile=False))
encoder_input = model.input[0]
encoder_output, encoder_h, encoder_c = model.layers[1].output
encoder_state = [encoder_h, encoder_c]
encoder_model = tf.keras.Model(encoder_input, encoder_state)
decoder_input = model.input[1]
decoder = model.layers[3]
decoder_new_h = tf.keras.Input(shape=(n_units,), name='input_3')
decoder_new_c = tf.keras.Input(shape=(n_units,), name='input_4')
decoder_input_initial_state = [decoder_new_h, decoder_new_c]
decoder_output, decoder_h, decoder_c = decoder(decoder_input, initial_state=decoder_input_initial_state)
decoder_output_state = [decoder_h, decoder_c]
# These lines cause an error
# context = model.layers[4]([encoder_output, decoder_output])
# decoder_combined_context = model.layers[5]([context, decoder_output])
# output = model.layers[6](decoder_combined_context)
# output = model.layers[7](output)
# end
decoder_model = tf.keras.Model([decoder_input] + decoder_input_initial_state, [decoder_output] + decoder_output_state)
return encoder_model, decoder_model
我认为您还需要将编码器输出作为编码器模型的输出,然后根据注意力部分的需要将其作为解码器模型的输入。也许这个变化可以帮助-
model = compile_model(tf.keras.models.load_model(constant.MODEL_PATH, compile=False))
encoder_input = model.input[0]
encoder_output, encoder_h, encoder_c = model.layers[1].output
encoder_state = [encoder_h, encoder_c]
encoder_model = tf.keras.Model(inputs=[encoder_input],outputs=[encoder_state,encoder_output])
decoder_input = model.input[1]
decoder_input2 = tf.keras.Input(shape=x) #where x is the shape of encoder output
decoder = model.layers[3]
decoder_new_h = tf.keras.Input(shape=(n_units,), name='input_3')
decoder_new_c = tf.keras.Input(shape=(n_units,), name='input_4')
decoder_input_initial_state = [decoder_new_h, decoder_new_c]
decoder_output, decoder_h, decoder_c = decoder(decoder_input, initial_state=decoder_input_initial_state)
decoder_output_state = [decoder_h, decoder_c]
context = model.layers[4]([decoder_input2, decoder_output])
decoder_combined_context = model.layers[5]([context, decoder_output])
output = model.layers[6](decoder_combined_context)
output = model.layers[7](output)
decoder_model = tf.keras.Model([decoder_input,decoder_input2,decoder_input_initial_state], [output] + decoder_output_state)`
你好 Whosebug 社区!
我正在尝试为具有 Attention 的 seq2seq(Encoded-Decoded)模型创建推理模型。这是推理模型的定义。
model = compile_model(tf.keras.models.load_model(constant.MODEL_PATH, compile=False))
encoder_input = model.input[0]
encoder_output, encoder_h, encoder_c = model.layers[1].output
encoder_state = [encoder_h, encoder_c]
encoder_model = tf.keras.Model(encoder_input, encoder_state)
decoder_input = model.input[1]
decoder = model.layers[3]
decoder_new_h = tf.keras.Input(shape=(n_units,), name='input_3')
decoder_new_c = tf.keras.Input(shape=(n_units,), name='input_4')
decoder_input_initial_state = [decoder_new_h, decoder_new_c]
decoder_output, decoder_h, decoder_c = decoder(decoder_input, initial_state=decoder_input_initial_state)
decoder_output_state = [decoder_h, decoder_c]
# These lines cause an error
context = model.layers[4]([encoder_output, decoder_output])
decoder_combined_context = model.layers[5]([context, decoder_output])
output = model.layers[6](decoder_combined_context)
output = model.layers[7](output)
# end
decoder_model = tf.keras.Model([decoder_input] + decoder_input_initial_state, [output] + decoder_output_state)
return encoder_model, decoder_model
当我运行这段代码时出现以下错误。
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_5:0", shape=(None, None, 20), dtype=float32) at layer "lstm_4". The following previous layers were accessed without issue: ['lstm_5']
如果我排除注意力块,模型将完全没有任何错误。
model = compile_model(tf.keras.models.load_model(constant.MODEL_PATH, compile=False))
encoder_input = model.input[0]
encoder_output, encoder_h, encoder_c = model.layers[1].output
encoder_state = [encoder_h, encoder_c]
encoder_model = tf.keras.Model(encoder_input, encoder_state)
decoder_input = model.input[1]
decoder = model.layers[3]
decoder_new_h = tf.keras.Input(shape=(n_units,), name='input_3')
decoder_new_c = tf.keras.Input(shape=(n_units,), name='input_4')
decoder_input_initial_state = [decoder_new_h, decoder_new_c]
decoder_output, decoder_h, decoder_c = decoder(decoder_input, initial_state=decoder_input_initial_state)
decoder_output_state = [decoder_h, decoder_c]
# These lines cause an error
# context = model.layers[4]([encoder_output, decoder_output])
# decoder_combined_context = model.layers[5]([context, decoder_output])
# output = model.layers[6](decoder_combined_context)
# output = model.layers[7](output)
# end
decoder_model = tf.keras.Model([decoder_input] + decoder_input_initial_state, [decoder_output] + decoder_output_state)
return encoder_model, decoder_model
我认为您还需要将编码器输出作为编码器模型的输出,然后根据注意力部分的需要将其作为解码器模型的输入。也许这个变化可以帮助-
model = compile_model(tf.keras.models.load_model(constant.MODEL_PATH, compile=False))
encoder_input = model.input[0]
encoder_output, encoder_h, encoder_c = model.layers[1].output
encoder_state = [encoder_h, encoder_c]
encoder_model = tf.keras.Model(inputs=[encoder_input],outputs=[encoder_state,encoder_output])
decoder_input = model.input[1]
decoder_input2 = tf.keras.Input(shape=x) #where x is the shape of encoder output
decoder = model.layers[3]
decoder_new_h = tf.keras.Input(shape=(n_units,), name='input_3')
decoder_new_c = tf.keras.Input(shape=(n_units,), name='input_4')
decoder_input_initial_state = [decoder_new_h, decoder_new_c]
decoder_output, decoder_h, decoder_c = decoder(decoder_input, initial_state=decoder_input_initial_state)
decoder_output_state = [decoder_h, decoder_c]
context = model.layers[4]([decoder_input2, decoder_output])
decoder_combined_context = model.layers[5]([context, decoder_output])
output = model.layers[6](decoder_combined_context)
output = model.layers[7](output)
decoder_model = tf.keras.Model([decoder_input,decoder_input2,decoder_input_initial_state], [output] + decoder_output_state)`