对 Tensorflow 中的多层双向 RNN 感到困惑
Confused about multi-layered Bidirectional RNN in Tensorflow
我正在使用 Tensorflow 构建多层双向 RNN。虽然我对实现有点困惑。
我构建了两个创建多层双向 RNN 的函数,第一个工作正常,但我不确定它所做的预测,因为它作为单向多层 RNN 执行。下面是我的实现:
def encoding_layer_old(rnn_inputs, rnn_size, num_layers, keep_prob,
source_sequence_length, source_vocab_size,
encoding_embedding_size):
"""
Create encoding layer
:param rnn_inputs: Inputs for the RNN
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param keep_prob: Dropout keep probability
:param source_sequence_length: a list of the lengths of each sequence in the batch
:param source_vocab_size: vocabulary size of source data
:param encoding_embedding_size: embedding size of source data
:return: tuple (RNN output, RNN state)
"""
# Encoder embedding
enc_embed = tf.contrib.layers.embed_sequence(rnn_inputs, source_vocab_size, encoding_embedding_size)
def create_cell_fw(rnn_size):
with tf.variable_scope("create_cell_fw"):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2), reuse=False)
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
def create_cell_bw(rnn_size):
with tf.variable_scope("create_cell_bw"):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2), reuse=False)
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
enc_cell_fw = tf.contrib.rnn.MultiRNNCell([create_cell_fw(rnn_size) for _ in range(num_layers)])
enc_cell_bw = tf.contrib.rnn.MultiRNNCell([create_cell_bw(rnn_size) for _ in range(num_layers)])
((encoder_fw_outputs, encoder_bw_outputs),(encoder_fw_final_state,encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(enc_cell_fw,enc_cell_bw, enc_embed,
sequence_length=source_sequence_length,dtype=tf.float32)
encoder_outputs = tf.concat([encoder_fw_outputs, encoder_bw_outputs], 2)
print(encoder_outputs)
#encoder_final_state_c=[]#tf.Variable([num_layers] , dtype=tf.int32)
#encoder_final_state_h=[]#tf.Variable([num_layers] , dtype=tf.int32)
encoder_final_state = ()
for x in range((num_layers)):
encoder_final_state_c=tf.concat((encoder_fw_final_state[x].c, encoder_bw_final_state[x].c), 1)#tf.stack(tf.concat((encoder_fw_final_state[x].c, encoder_bw_final_state[x].c), 1))
encoder_final_state_h=tf.concat((encoder_fw_final_state[x].h, encoder_bw_final_state[x].h), 1)# tf.stack(tf.concat((encoder_fw_final_state[x].h, encoder_bw_final_state[x].h), 1))
encoder_final_state =encoder_final_state+ (tf.contrib.rnn.LSTMStateTuple(c=encoder_final_state_c,h=encoder_final_state_h),)
#encoder_final_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_final_state_c,h=encoder_final_state_h)
print('before')
print(encoder_fw_final_state)
return encoder_outputs, encoder_final_state
我找到了另一个实现 ,如下所示:
t
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob,
source_sequence_length, source_vocab_size,
encoding_embedding_size):
"""
Create encoding layer
:param rnn_inputs: Inputs for the RNN
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param keep_prob: Dropout keep probability
:param source_sequence_length: a list of the lengths of each sequence in the batch
:param source_vocab_size: vocabulary size of source data
:param encoding_embedding_size: embedding size of source data
:return: tuple (RNN output, RNN state)
"""
# Encoder embedding
enc_embed = tf.contrib.layers.embed_sequence(rnn_inputs, source_vocab_size, encoding_embedding_size)
def create_cell_fw(rnn_size,x):
with tf.variable_scope("create_cell_fw_"+str(x)):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2) , reuse=tf.AUTO_REUSE )
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
def create_cell_bw(rnn_size,x):
with tf.variable_scope("create_cell_bw_"+str(x)):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2) ,reuse=tf.AUTO_REUSE )
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
enc_cell_fw = [create_cell_fw(rnn_size,x) for x in range(num_layers)]
enc_cell_bw = [create_cell_bw(rnn_size,x) for x in range(num_layers)]
output=enc_embed
for n in range(num_layers):
cell_fw = enc_cell_fw[n]
cell_bw = enc_cell_bw[n]
state_fw = cell_fw.zero_state(batch_size, tf.float32)
state_bw = cell_bw.zero_state(batch_size, tf.float32)
((output_fw, output_bw),(encoder_fw_final_state,encoder_bw_final_state))= tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, output,source_sequence_length,
state_fw, state_bw, dtype=tf.float32)
output = tf.concat([output_fw, output_bw], axis=2)
final_state=tf.concat([encoder_fw_final_state,encoder_bw_final_state], axis=2 )
return output , final_state
此实现的问题是出现形状错误:
Trying to share variable bidirectional_rnn/fw/lstm_cell/kernel, but specified shape (168, 224) and found shape (256, 224).
看来其他人在创建 RNN 单元时也遇到过类似问题,解决方案是使用 MultiRNNCell 来创建分层单元。但是如果使用 MultiRNNCell,我将无法使用第二个实现,因为 multiRNNCell 不支持索引。因此我不会循环遍历单元格列表并创建多个 RNN。
非常感谢你在这方面的帮助指导。
我正在使用 tensorflow 1.3
这两个代码看起来确实有点过于复杂。无论如何,我尝试了一个更简单的版本并且它起作用了。在您的代码中,从 create_cell_fw
和 create_cell_bw
中删除 reuse=tf.AUTO_REUSE
后尝试。下面是我的简单实现。
def encoding_layer(input_data, num_layers, rnn_size, sequence_length, keep_prob):
output = input_data
for layer in range(num_layers):
with tf.variable_scope('encoder_{}'.format(layer),reuse=tf.AUTO_REUSE):
cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)
cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
cell_bw,
output,
sequence_length,
dtype=tf.float32)
output = tf.concat(outputs,2)
state = tf.concat(states,2)
return output, state
我正在使用 Tensorflow 构建多层双向 RNN。虽然我对实现有点困惑。
我构建了两个创建多层双向 RNN 的函数,第一个工作正常,但我不确定它所做的预测,因为它作为单向多层 RNN 执行。下面是我的实现:
def encoding_layer_old(rnn_inputs, rnn_size, num_layers, keep_prob,
source_sequence_length, source_vocab_size,
encoding_embedding_size):
"""
Create encoding layer
:param rnn_inputs: Inputs for the RNN
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param keep_prob: Dropout keep probability
:param source_sequence_length: a list of the lengths of each sequence in the batch
:param source_vocab_size: vocabulary size of source data
:param encoding_embedding_size: embedding size of source data
:return: tuple (RNN output, RNN state)
"""
# Encoder embedding
enc_embed = tf.contrib.layers.embed_sequence(rnn_inputs, source_vocab_size, encoding_embedding_size)
def create_cell_fw(rnn_size):
with tf.variable_scope("create_cell_fw"):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2), reuse=False)
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
def create_cell_bw(rnn_size):
with tf.variable_scope("create_cell_bw"):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2), reuse=False)
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
enc_cell_fw = tf.contrib.rnn.MultiRNNCell([create_cell_fw(rnn_size) for _ in range(num_layers)])
enc_cell_bw = tf.contrib.rnn.MultiRNNCell([create_cell_bw(rnn_size) for _ in range(num_layers)])
((encoder_fw_outputs, encoder_bw_outputs),(encoder_fw_final_state,encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(enc_cell_fw,enc_cell_bw, enc_embed,
sequence_length=source_sequence_length,dtype=tf.float32)
encoder_outputs = tf.concat([encoder_fw_outputs, encoder_bw_outputs], 2)
print(encoder_outputs)
#encoder_final_state_c=[]#tf.Variable([num_layers] , dtype=tf.int32)
#encoder_final_state_h=[]#tf.Variable([num_layers] , dtype=tf.int32)
encoder_final_state = ()
for x in range((num_layers)):
encoder_final_state_c=tf.concat((encoder_fw_final_state[x].c, encoder_bw_final_state[x].c), 1)#tf.stack(tf.concat((encoder_fw_final_state[x].c, encoder_bw_final_state[x].c), 1))
encoder_final_state_h=tf.concat((encoder_fw_final_state[x].h, encoder_bw_final_state[x].h), 1)# tf.stack(tf.concat((encoder_fw_final_state[x].h, encoder_bw_final_state[x].h), 1))
encoder_final_state =encoder_final_state+ (tf.contrib.rnn.LSTMStateTuple(c=encoder_final_state_c,h=encoder_final_state_h),)
#encoder_final_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_final_state_c,h=encoder_final_state_h)
print('before')
print(encoder_fw_final_state)
return encoder_outputs, encoder_final_state
我找到了另一个实现
t
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob,
source_sequence_length, source_vocab_size,
encoding_embedding_size):
"""
Create encoding layer
:param rnn_inputs: Inputs for the RNN
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param keep_prob: Dropout keep probability
:param source_sequence_length: a list of the lengths of each sequence in the batch
:param source_vocab_size: vocabulary size of source data
:param encoding_embedding_size: embedding size of source data
:return: tuple (RNN output, RNN state)
"""
# Encoder embedding
enc_embed = tf.contrib.layers.embed_sequence(rnn_inputs, source_vocab_size, encoding_embedding_size)
def create_cell_fw(rnn_size,x):
with tf.variable_scope("create_cell_fw_"+str(x)):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2) , reuse=tf.AUTO_REUSE )
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
def create_cell_bw(rnn_size,x):
with tf.variable_scope("create_cell_bw_"+str(x)):
lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2) ,reuse=tf.AUTO_REUSE )
drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
return drop
enc_cell_fw = [create_cell_fw(rnn_size,x) for x in range(num_layers)]
enc_cell_bw = [create_cell_bw(rnn_size,x) for x in range(num_layers)]
output=enc_embed
for n in range(num_layers):
cell_fw = enc_cell_fw[n]
cell_bw = enc_cell_bw[n]
state_fw = cell_fw.zero_state(batch_size, tf.float32)
state_bw = cell_bw.zero_state(batch_size, tf.float32)
((output_fw, output_bw),(encoder_fw_final_state,encoder_bw_final_state))= tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, output,source_sequence_length,
state_fw, state_bw, dtype=tf.float32)
output = tf.concat([output_fw, output_bw], axis=2)
final_state=tf.concat([encoder_fw_final_state,encoder_bw_final_state], axis=2 )
return output , final_state
此实现的问题是出现形状错误:
Trying to share variable bidirectional_rnn/fw/lstm_cell/kernel, but specified shape (168, 224) and found shape (256, 224).
看来其他人在创建 RNN 单元时也遇到过类似问题,解决方案是使用 MultiRNNCell 来创建分层单元。但是如果使用 MultiRNNCell,我将无法使用第二个实现,因为 multiRNNCell 不支持索引。因此我不会循环遍历单元格列表并创建多个 RNN。
非常感谢你在这方面的帮助指导。
我正在使用 tensorflow 1.3
这两个代码看起来确实有点过于复杂。无论如何,我尝试了一个更简单的版本并且它起作用了。在您的代码中,从 create_cell_fw
和 create_cell_bw
中删除 reuse=tf.AUTO_REUSE
后尝试。下面是我的简单实现。
def encoding_layer(input_data, num_layers, rnn_size, sequence_length, keep_prob):
output = input_data
for layer in range(num_layers):
with tf.variable_scope('encoder_{}'.format(layer),reuse=tf.AUTO_REUSE):
cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)
cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
cell_bw,
output,
sequence_length,
dtype=tf.float32)
output = tf.concat(outputs,2)
state = tf.concat(states,2)
return output, state