带有 GPU 的张量流 LSTM
tensorlfow LSTM with GPU
我想使用 GPU 构建 LSTM 模型。但是有一个例外:
InvalidArgumentError (see above for traceback): Cannot assign a device to node 'model_1/tower_0/model/drnn/Assert/Assert': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
[[Node: model_1/tower_0/model/drnn/Assert/Assert = Assert[T=[DT_STRING, DT_INT32, DT_STRING, DT_INT32], summarize=3, _device="/device:GPU:0"](model_1/tower_0/model/drnn/All, model_1/tower_0/model/drnn/Assert/Assert/data_0, model_1/tower_0/model/drnn/stack, model_1/tower_0/model/drnn/Assert/Assert/data_2, model_1/tower_0/model/drnn/Shape_1)]]
好像是一些GPU不支持的OP造成的
有关回溯的更多信息如下:
Caused by op 'model_1/tower_0/model/drnn/Assert/Assert', defined at:
File "train.py", line 351, in <module>
runner.run()
File "train.py", line 61, in run
is_train=False)
File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 38, in __init__
self.build_graph(config, is_train)
File "/home/liuziqi/keyword_spotting/utils/common.py", line 40, in wrapper
result = func(*args, **kwargs)
File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 57, in build_graph
seqLengths)
File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 182, in build_multi_dynamic_brnn
scope="drnn")
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 540, in dynamic_rnn
[_assert_has_shape(sequence_length, [batch_size])]):
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 535, in _assert_has_shape
packed_shape, " but saw shape: ", x_shape])
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 121, in Assert
condition, data, summarize, name="Assert")
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 39, in _assert
summarize=summarize, name=name)
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
构建 LSTM 的代码是:
def build_graph(self, config, is_train):
outputs = self.build_multi_dynamic_brnn(config, self.inputX,
self.seqLengths)
with tf.name_scope('fc-layer'):
if config.use_project:
weightsClasses = tf.Variable(
tf.truncated_normal(
[config.num_proj, config.num_classes],
name='weightsClasses'))
flatten_outputs = tf.reshape(outputs, (-1, config.num_proj))
else:
weightsClasses = tf.Variable(
tf.truncated_normal(
[config.hidden_size, config.num_classes],
name='weightsClasses'))
flatten_outputs = tf.reshape(outputs,
(-1, config.hidden_size))
biasesClasses = tf.Variable(tf.zeros([config.num_classes]),
name='biasesClasses')
flatten_logits = tf.matmul(flatten_outputs,
weightsClasses) + biasesClasses
self.softmax = tf.reshape(tf.nn.softmax(flatten_logits),
(config.batch_size, -1,
config.num_classes))
if is_train:
flatten_labels = tf.reshape(self.labels,
(-1, config.num_classes))
self.xent_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=flatten_labels,
logits=flatten_logits))
# calculating maxpooling loss
self.log_softmax = -tf.log(self.softmax)
self.crop_log_softmax = tf.slice(self.log_softmax, [0, 0, 1],
[-1, -1, -1])
self.crop_labels = tf.slice(self.labels, [0, 0, 1], [-1, -1, -1])
self.masked_log_softmax = self.crop_log_softmax * self.crop_labels
self.segment_len = tf.count_nonzero(self.masked_log_softmax, 1,
dtype=tf.float32) # shape (batchsize,class_num)
self.max_frame = tf.reduce_max(self.masked_log_softmax,
1) # shape (batchsize,class_num)
self.xent_max_frame = tf.reduce_sum(
self.max_frame * self.segment_len)
self.background_log_softmax = tf.slice(self.log_softmax, [0, 0, 0],
[-1, -1, 1])
self.background_label = tf.slice(self.labels, [0, 0, 0],
[-1, -1, 1])
self.xent_background = tf.reduce_sum(
tf.reduce_sum(
self.background_log_softmax * self.background_label,
(1, 2)) / tf.cast(self.seqLengths,
tf.float32))
self.flatten_masked_softmax = tf.reshape(self.masked_log_softmax,
(config.batch_size, -1))
self.max_index = tf.arg_max(self.flatten_masked_softmax, 1)
self.max_pooling_loss = self.xent_background + self.xent_max_frame
self.var_trainable_op = tf.trainable_variables()
if config.max_pooling_loss:
self.loss = self.max_pooling_loss
else:
self.loss = self.xent_loss
if config.grad_clip == -1:
# not apply gradient clipping
self.optimizer = tf.train.AdamOptimizer(
config.learning_rate).minimize(self.loss)
else:
# apply gradient clipping
grads, _ = tf.clip_by_global_norm(
tf.gradients(self.loss, self.var_trainable_op),
config.grad_clip)
opti = tf.train.AdamOptimizer(config.learning_rate)
self.optimizer = opti.apply_gradients(
zip(grads, self.var_trainable_op))
def build_multi_dynamic_brnn(self,
config,
inputX,
seqLengths):
hid_input = inputX
cell = cell_fn(num_units=config.hidden_size,
use_peepholes=True,
cell_clip=config.cell_clip,
initializer=tf.contrib.layers.xavier_initializer(),
num_proj=config.num_proj if config.use_project else None,
proj_clip=None,
forget_bias=1.0,
state_is_tuple=True,
activation=tf.tanh,
reuse=tf.get_variable_scope().reuse
)
for i in range(config.num_layers):
outputs, output_states = dynamic_rnn(cell,
inputs=hid_input,
sequence_length=seqLengths,
initial_state=None,
dtype=tf.float32,
scope="drnn")
# tensor of shape: [batch_size, max_time, input_size]
hidden = outputs
if config.mode == 'train':
hidden = self.dropout(hidden, config.keep_prob)
if i != config.num_layers - 1:
hid_input = hidden
return hidden
谁能帮我找出是哪个步骤或操作导致了这个错误?
(之前我运行这个代码在cpu的时候,还不错。当我切换到GPU版本的时候出现这个错误,所以我相信有一些GPU不兼容的操作)
dynamic_rnn
操作在内部使用 tf.Assert
进行一些字符串数据检查,没有 GPU 实现。
您可以使用 soft_placement
。由于它不是真正的计算操作,因此它不应降低您的性能。
我想使用 GPU 构建 LSTM 模型。但是有一个例外:
InvalidArgumentError (see above for traceback): Cannot assign a device to node 'model_1/tower_0/model/drnn/Assert/Assert': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
[[Node: model_1/tower_0/model/drnn/Assert/Assert = Assert[T=[DT_STRING, DT_INT32, DT_STRING, DT_INT32], summarize=3, _device="/device:GPU:0"](model_1/tower_0/model/drnn/All, model_1/tower_0/model/drnn/Assert/Assert/data_0, model_1/tower_0/model/drnn/stack, model_1/tower_0/model/drnn/Assert/Assert/data_2, model_1/tower_0/model/drnn/Shape_1)]]
好像是一些GPU不支持的OP造成的
有关回溯的更多信息如下:
Caused by op 'model_1/tower_0/model/drnn/Assert/Assert', defined at:
File "train.py", line 351, in <module>
runner.run()
File "train.py", line 61, in run
is_train=False)
File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 38, in __init__
self.build_graph(config, is_train)
File "/home/liuziqi/keyword_spotting/utils/common.py", line 40, in wrapper
result = func(*args, **kwargs)
File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 57, in build_graph
seqLengths)
File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 182, in build_multi_dynamic_brnn
scope="drnn")
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 540, in dynamic_rnn
[_assert_has_shape(sequence_length, [batch_size])]):
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 535, in _assert_has_shape
packed_shape, " but saw shape: ", x_shape])
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 121, in Assert
condition, data, summarize, name="Assert")
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 39, in _assert
summarize=summarize, name=name)
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
构建 LSTM 的代码是:
def build_graph(self, config, is_train):
outputs = self.build_multi_dynamic_brnn(config, self.inputX,
self.seqLengths)
with tf.name_scope('fc-layer'):
if config.use_project:
weightsClasses = tf.Variable(
tf.truncated_normal(
[config.num_proj, config.num_classes],
name='weightsClasses'))
flatten_outputs = tf.reshape(outputs, (-1, config.num_proj))
else:
weightsClasses = tf.Variable(
tf.truncated_normal(
[config.hidden_size, config.num_classes],
name='weightsClasses'))
flatten_outputs = tf.reshape(outputs,
(-1, config.hidden_size))
biasesClasses = tf.Variable(tf.zeros([config.num_classes]),
name='biasesClasses')
flatten_logits = tf.matmul(flatten_outputs,
weightsClasses) + biasesClasses
self.softmax = tf.reshape(tf.nn.softmax(flatten_logits),
(config.batch_size, -1,
config.num_classes))
if is_train:
flatten_labels = tf.reshape(self.labels,
(-1, config.num_classes))
self.xent_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=flatten_labels,
logits=flatten_logits))
# calculating maxpooling loss
self.log_softmax = -tf.log(self.softmax)
self.crop_log_softmax = tf.slice(self.log_softmax, [0, 0, 1],
[-1, -1, -1])
self.crop_labels = tf.slice(self.labels, [0, 0, 1], [-1, -1, -1])
self.masked_log_softmax = self.crop_log_softmax * self.crop_labels
self.segment_len = tf.count_nonzero(self.masked_log_softmax, 1,
dtype=tf.float32) # shape (batchsize,class_num)
self.max_frame = tf.reduce_max(self.masked_log_softmax,
1) # shape (batchsize,class_num)
self.xent_max_frame = tf.reduce_sum(
self.max_frame * self.segment_len)
self.background_log_softmax = tf.slice(self.log_softmax, [0, 0, 0],
[-1, -1, 1])
self.background_label = tf.slice(self.labels, [0, 0, 0],
[-1, -1, 1])
self.xent_background = tf.reduce_sum(
tf.reduce_sum(
self.background_log_softmax * self.background_label,
(1, 2)) / tf.cast(self.seqLengths,
tf.float32))
self.flatten_masked_softmax = tf.reshape(self.masked_log_softmax,
(config.batch_size, -1))
self.max_index = tf.arg_max(self.flatten_masked_softmax, 1)
self.max_pooling_loss = self.xent_background + self.xent_max_frame
self.var_trainable_op = tf.trainable_variables()
if config.max_pooling_loss:
self.loss = self.max_pooling_loss
else:
self.loss = self.xent_loss
if config.grad_clip == -1:
# not apply gradient clipping
self.optimizer = tf.train.AdamOptimizer(
config.learning_rate).minimize(self.loss)
else:
# apply gradient clipping
grads, _ = tf.clip_by_global_norm(
tf.gradients(self.loss, self.var_trainable_op),
config.grad_clip)
opti = tf.train.AdamOptimizer(config.learning_rate)
self.optimizer = opti.apply_gradients(
zip(grads, self.var_trainable_op))
def build_multi_dynamic_brnn(self,
config,
inputX,
seqLengths):
hid_input = inputX
cell = cell_fn(num_units=config.hidden_size,
use_peepholes=True,
cell_clip=config.cell_clip,
initializer=tf.contrib.layers.xavier_initializer(),
num_proj=config.num_proj if config.use_project else None,
proj_clip=None,
forget_bias=1.0,
state_is_tuple=True,
activation=tf.tanh,
reuse=tf.get_variable_scope().reuse
)
for i in range(config.num_layers):
outputs, output_states = dynamic_rnn(cell,
inputs=hid_input,
sequence_length=seqLengths,
initial_state=None,
dtype=tf.float32,
scope="drnn")
# tensor of shape: [batch_size, max_time, input_size]
hidden = outputs
if config.mode == 'train':
hidden = self.dropout(hidden, config.keep_prob)
if i != config.num_layers - 1:
hid_input = hidden
return hidden
谁能帮我找出是哪个步骤或操作导致了这个错误? (之前我运行这个代码在cpu的时候,还不错。当我切换到GPU版本的时候出现这个错误,所以我相信有一些GPU不兼容的操作)
dynamic_rnn
操作在内部使用 tf.Assert
进行一些字符串数据检查,没有 GPU 实现。
您可以使用 soft_placement
。由于它不是真正的计算操作,因此它不应降低您的性能。