如何在 Tensorflow 中使用多层双向 LSTM?
How to use multilayered bidirectional LSTM in Tensorflow?
我想知道如何在Tensorflow中使用多层双向LSTM。
我已经实现了双向LSTM的内容,但我想将这个模型与添加多层的模型进行比较。
我应该如何在这部分添加一些代码?
x = tf.unstack(tf.transpose(x, perm=[1, 0, 2]))
#print(x[0].get_shape())
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Backward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
try:
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
except Exception: # Old TensorFlow version only returns outputs not states
outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
# Linear activation, using rnn inner loop last output
outputs = tf.stack(outputs, axis=1)
outputs = tf.reshape(outputs, (batch_size*n_steps, n_hidden*2))
outputs = tf.matmul(outputs, weights['out']) + biases['out']
outputs = tf.reshape(outputs, (batch_size, n_steps, n_classes))
您可以使用两种不同的方法来应用多层 bilstm 模型:
1) 使用前一个 bilstm 层的输出作为下一个 bilstm 的输入。一开始,您应该创建长度为 num_layers 的前向和后向单元格数组。并且
for n in range(num_layers):
cell_fw = cell_forw[n]
cell_bw = cell_back[n]
state_fw = cell_fw.zero_state(batch_size, tf.float32)
state_bw = cell_bw.zero_state(batch_size, tf.float32)
(output_fw, output_bw), last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, output,
initial_state_fw=state_fw,
initial_state_bw=state_bw,
scope='BLSTM_'+ str(n),
dtype=tf.float32)
output = tf.concat([output_fw, output_bw], axis=2)
2) 另一种方法也值得一看 stacked bilstm.
在塔拉斯的回答之上。这是另一个仅使用带有 GRU 单元的 2 层双向 RNN 的示例
embedding_weights = tf.Variable(tf.random_uniform([vocabulary_size, state_size], -1.0, 1.0))
embedding_vectors = tf.nn.embedding_lookup(embedding_weights, tokens)
#First BLSTM
cell = tf.nn.rnn_cell.GRUCell(state_size)
cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=1-dropout)
(forward_output, backward_output), _ = \
tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs=embedding_vectors,
sequence_length=lengths, dtype=tf.float32,scope='BLSTM_1')
outputs = tf.concat([forward_output, backward_output], axis=2)
#Second BLSTM using the output of previous layer as an input.
cell2 = tf.nn.rnn_cell.GRUCell(state_size)
cell2 = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=1-dropout)
(forward_output, backward_output), _ = \
tf.nn.bidirectional_dynamic_rnn(cell2, cell2, inputs=outputs,
sequence_length=lengths, dtype=tf.float32,scope='BLSTM_2')
outputs = tf.concat([forward_output, backward_output], axis=2)
顺便说一句,不要忘记添加不同的范围名称。希望对您有所帮助。
这与第一个答案基本相同,但作用域名称的用法略有不同,并添加了 dropout 包装器。它还处理第一个答案给出的关于变量范围的错误。
def bidirectional_lstm(input_data, num_layers, rnn_size, keep_prob):
output = input_data
for layer in range(num_layers):
with tf.variable_scope('encoder_{}'.format(layer),reuse=tf.AUTO_REUSE):
# By giving a different variable scope to each layer, I've ensured that
# the weights are not shared among the layers. If you want to share the
# weights, you can do that by giving variable_scope as "encoder" but do
# make sure first that reuse is set to tf.AUTO_REUSE
cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)
cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
cell_bw,
output,
dtype=tf.float32)
# Concat the forward and backward outputs
output = tf.concat(outputs,2)
return output
正如@Taras 指出的那样,您可以使用:
(1) tf.nn.bidirectional_dynamic_rnn()
(2) tf.contrib.rnn.stack_bidirectional_dynamic_rnn()
.
之前的所有答案都只捕获了 (1),所以我给出了 (2) 的一些细节,特别是因为它通常优于 (1)。对于不同连接的直觉
.
假设您要创建一个包含 3 个 BLSTM 层的堆栈,每个层有 64 个节点:
num_layers = 3
num_nodes = 64
# Define LSTM cells
enc_fw_cells = [LSTMCell(num_nodes)for layer in range(num_layers)]
enc_bw_cells = [LSTMCell(num_nodes) for layer in range(num_layers)]
# Connect LSTM cells bidirectionally and stack
(all_states, fw_state, bw_state) = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
cells_fw=enc_fw_cells, cells_bw=enc_bw_cells, inputs=input_embed, dtype=tf.float32)
# Concatenate results
for k in range(num_layers):
if k == 0:
con_c = tf.concat((fw_state[k].c, bw_state[k].c), 1)
con_h = tf.concat((fw_state[k].h, bw_state[k].h), 1)
else:
con_c = tf.concat((con_c, fw_state[k].c, bw_state[k].c), 1)
con_h = tf.concat((con_h, fw_state[k].h, bw_state[k].h), 1)
output = tf.contrib.rnn.LSTMStateTuple(c=con_c, h=con_h)
在这种情况下,我使用堆叠的 biRNN 的最终状态而不是所有时间步长的状态(保存在 all_states
),因为我使用的是编码解码方案,其中上面的代码只是编码器。
我想知道如何在Tensorflow中使用多层双向LSTM。
我已经实现了双向LSTM的内容,但我想将这个模型与添加多层的模型进行比较。
我应该如何在这部分添加一些代码?
x = tf.unstack(tf.transpose(x, perm=[1, 0, 2]))
#print(x[0].get_shape())
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Backward direction cell
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
try:
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
except Exception: # Old TensorFlow version only returns outputs not states
outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
dtype=tf.float32)
# Linear activation, using rnn inner loop last output
outputs = tf.stack(outputs, axis=1)
outputs = tf.reshape(outputs, (batch_size*n_steps, n_hidden*2))
outputs = tf.matmul(outputs, weights['out']) + biases['out']
outputs = tf.reshape(outputs, (batch_size, n_steps, n_classes))
您可以使用两种不同的方法来应用多层 bilstm 模型:
1) 使用前一个 bilstm 层的输出作为下一个 bilstm 的输入。一开始,您应该创建长度为 num_layers 的前向和后向单元格数组。并且
for n in range(num_layers):
cell_fw = cell_forw[n]
cell_bw = cell_back[n]
state_fw = cell_fw.zero_state(batch_size, tf.float32)
state_bw = cell_bw.zero_state(batch_size, tf.float32)
(output_fw, output_bw), last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, output,
initial_state_fw=state_fw,
initial_state_bw=state_bw,
scope='BLSTM_'+ str(n),
dtype=tf.float32)
output = tf.concat([output_fw, output_bw], axis=2)
2) 另一种方法也值得一看 stacked bilstm.
在塔拉斯的回答之上。这是另一个仅使用带有 GRU 单元的 2 层双向 RNN 的示例
embedding_weights = tf.Variable(tf.random_uniform([vocabulary_size, state_size], -1.0, 1.0))
embedding_vectors = tf.nn.embedding_lookup(embedding_weights, tokens)
#First BLSTM
cell = tf.nn.rnn_cell.GRUCell(state_size)
cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=1-dropout)
(forward_output, backward_output), _ = \
tf.nn.bidirectional_dynamic_rnn(cell, cell, inputs=embedding_vectors,
sequence_length=lengths, dtype=tf.float32,scope='BLSTM_1')
outputs = tf.concat([forward_output, backward_output], axis=2)
#Second BLSTM using the output of previous layer as an input.
cell2 = tf.nn.rnn_cell.GRUCell(state_size)
cell2 = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=1-dropout)
(forward_output, backward_output), _ = \
tf.nn.bidirectional_dynamic_rnn(cell2, cell2, inputs=outputs,
sequence_length=lengths, dtype=tf.float32,scope='BLSTM_2')
outputs = tf.concat([forward_output, backward_output], axis=2)
顺便说一句,不要忘记添加不同的范围名称。希望对您有所帮助。
这与第一个答案基本相同,但作用域名称的用法略有不同,并添加了 dropout 包装器。它还处理第一个答案给出的关于变量范围的错误。
def bidirectional_lstm(input_data, num_layers, rnn_size, keep_prob):
output = input_data
for layer in range(num_layers):
with tf.variable_scope('encoder_{}'.format(layer),reuse=tf.AUTO_REUSE):
# By giving a different variable scope to each layer, I've ensured that
# the weights are not shared among the layers. If you want to share the
# weights, you can do that by giving variable_scope as "encoder" but do
# make sure first that reuse is set to tf.AUTO_REUSE
cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)
cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.truncated_normal_initializer(-0.1, 0.1, seed=2))
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
cell_bw,
output,
dtype=tf.float32)
# Concat the forward and backward outputs
output = tf.concat(outputs,2)
return output
正如@Taras 指出的那样,您可以使用:
(1) tf.nn.bidirectional_dynamic_rnn()
(2) tf.contrib.rnn.stack_bidirectional_dynamic_rnn()
.
之前的所有答案都只捕获了 (1),所以我给出了 (2) 的一些细节,特别是因为它通常优于 (1)。对于不同连接的直觉
假设您要创建一个包含 3 个 BLSTM 层的堆栈,每个层有 64 个节点:
num_layers = 3
num_nodes = 64
# Define LSTM cells
enc_fw_cells = [LSTMCell(num_nodes)for layer in range(num_layers)]
enc_bw_cells = [LSTMCell(num_nodes) for layer in range(num_layers)]
# Connect LSTM cells bidirectionally and stack
(all_states, fw_state, bw_state) = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
cells_fw=enc_fw_cells, cells_bw=enc_bw_cells, inputs=input_embed, dtype=tf.float32)
# Concatenate results
for k in range(num_layers):
if k == 0:
con_c = tf.concat((fw_state[k].c, bw_state[k].c), 1)
con_h = tf.concat((fw_state[k].h, bw_state[k].h), 1)
else:
con_c = tf.concat((con_c, fw_state[k].c, bw_state[k].c), 1)
con_h = tf.concat((con_h, fw_state[k].h, bw_state[k].h), 1)
output = tf.contrib.rnn.LSTMStateTuple(c=con_c, h=con_h)
在这种情况下,我使用堆叠的 biRNN 的最终状态而不是所有时间步长的状态(保存在 all_states
),因为我使用的是编码解码方案,其中上面的代码只是编码器。