CNN compiling error : Failed to get convolution algorithm. This is probably because cuDNN failed to initialize,
CNN compiling error : Failed to get convolution algorithm. This is probably because cuDNN failed to initialize,
我只是尝试自己构建 VGG-19 模型,但每次我尝试编译时,错误都会增加,尽管它在 Alexnet 上运行得很清楚。请让我知道问题所在。
设置
CPU:AMD 锐龙 2700x
显卡:RTX 2080
OS : Window 10
CUDA:10.0
Tensorflow 版本:Tensorflow-2.0.0beta1
工作工具:Jupyter 实验室
时间:凌晨 3 点
气候 : 多雨
感觉:阴郁:(
代码
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from functools import partial
import skimage.io as io
import numpy as np
EPOCH = 5
BATCH = 32
Learning_rate = 0.0001
leaky_relu = partial(keras.activations.relu, alpha=0.1)
def to_float(image, label) :
float_image = image/255
return (float_image, label)
data, info = tfds.load('cifar10',as_supervised=True,with_info=True)
train, test = data.get('train'), data.get('test')
train_data = train.map(to_float).shuffle(10000).batch(BATCH).prefetch(3)
test_data = test.map(to_float).batch(BATCH).prefetch(4)
sample_batch = next(iter(train_data))[0]
print(io.imshow_collection(io.concatenate_images(np.array(sample_batch))))
class VGG19(tf.keras.Model) :
def __init__(self) :
super(VGG19, self).__init__()
self.conv1_1 = layers.Conv2D(filters = 64,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv1_2 = layers.Conv2D(filters = 64,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool1 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv2_1 = layers.Conv2D(filters = 128,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv2_2 = layers.Conv2D(filters = 128,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool2 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv3_1 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv3_2 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv3_3 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv3_4 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool3 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv4_1 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv4_2 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv4_3 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv4_4 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool4 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv5_1 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv5_2 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv5_3 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv5_4 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool5 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.flat6 = layers.Flatten()
self.fc6 = layers.Dense(units=4096,
activation=leaky_relu)
self.drop6 = layers.Dropout(rate=0.5)
self.fc7 = layers.Dense(units=4096,
activation=leaky_relu)
self.drop7 = layers.Dropout(rate=0.5)
self.fc8 = layers.Dense(units=10,
activation=tf.keras.activations.softmax)
def call(self, inputs):
x = self.conv1_1(inputs)
x = self.conv1_2(x)
x = self.pool1(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.pool2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
x = self.conv3_4(x)
x = self.pool3(x)
x = self.conv4_1(x)
x = self.conv4_2(x)
x = self.conv4_3(x)
x = self.conv4_4(x)
x = self.pool4(x)
x = self.conv5_1(x)
x = self.conv5_2(x)
x = self.conv5_3(x)
x = self.conv5_4(x)
x = self.pool5(x)
x = self.flat6(x)
x = self.fc6(x)
x = self.drop6(x)
x = self.fc7(x)
x = self.drop7(x)
output = self.fc8(x)
return output
VGG19_model = VGG19()
my_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
VGG19_model.compile(optimizer=tf.keras.optimizers.Adam(Learning_rate),
loss = my_loss,
metrics = ['accuracy'],
)
VGG19_model.fit(train_data, epochs = EPOCH, verbose=2)
错误信息
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-21-1e291c11711c> in <module>
4 )
5
----> 6 VGG19_model.fit(train_data, epochs = EPOCH, verbose=2)
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
641 max_queue_size=max_queue_size,
642 workers=workers,
--> 643 use_multiprocessing=use_multiprocessing)
644
645 def evaluate(self,
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training_generator.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
692 shuffle=shuffle,
693 initial_epoch=initial_epoch,
--> 694 steps_name='steps_per_epoch')
695
696 def evaluate(self,
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
262
263 is_deferred = not model._is_compiled
--> 264 batch_outs = batch_function(*batch_data)
265 if not isinstance(batch_outs, list):
266 batch_outs = [batch_outs]
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
916 self._update_sample_weight_modes(sample_weights=sample_weights)
917 self._make_train_function()
--> 918 outputs = self.train_function(ins) # pylint: disable=not-callable
919
920 if reset_metrics:
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\backend.py in __call__(self, inputs)
3508 value = math_ops.cast(value, tensor.dtype)
3509 converted_inputs.append(value)
-> 3510 outputs = self._graph_fn(*converted_inputs)
3511
3512 # EagerTensor.numpy() will often make a copy to ensure memory safety.
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
570 raise TypeError("Keyword arguments {} unknown. Expected {}.".format(
571 list(kwargs.keys()), list(self._arg_keywords)))
--> 572 return self._call_flat(args)
573
574 def _filtered_call(self, args, kwargs):
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args)
669 # Only need to override the gradient in graph mode and when we have outputs.
670 if context.executing_eagerly() or not self.outputs:
--> 671 outputs = self._inference_function.call(ctx, args)
672 else:
673 self._register_gradient()
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args)
443 attrs=("executor_type", executor_type,
444 "config_proto", config),
--> 445 ctx=ctx)
446 # Replace empty list with None
447 outputs = outputs or None
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 if any(ops._is_keras_symbolic_tensor(x) for x in inputs):
~\Anaconda3\envs\cuda\lib\site-packages\six.py in raise_from(value, from_value)
未知错误:发现 2 个根本错误。
(0)未知:无法获取卷积算法。这可能是因为 cuDNN 初始化失败,所以尝试查看上面是否打印了警告日志消息。
[[节点 vg_g19_2/conv2d_32/Conv2D(定义于 :6)]]
[[Func/Adam/gradients_2/vg_g19_2/dropout_5/cond_grad/If/then/_22/input/_75/_64]]
(1)未知:获取卷积算法失败。这可能是因为 cuDNN 初始化失败,所以尝试查看上面是否打印了警告日志消息。
[[节点 vg_g19_2/conv2d_32/Conv2D(定义于 :6)]]
0 次成功操作。
忽略 0 个派生错误。 [操作:__inference_keras_scratch_graph_10115]
函数调用堆栈:
keras_scratch_graph -> keras_scratch_graph
首先要检查是否正确安装了兼容的 CUDA、cuDNN 驱动程序。然后您可以通过允许 gpu 内存增长来尝试 gpu 内存资源管理。
allow_growth 选项,尝试根据 运行 时间分配仅分配尽可能多的 GPU 内存:它开始分配非常少的内存,并且随着会话数 运行 并且需要更多 GPU 内存,它扩展了 TensorFlow 进程所需的 GPU 内存区域。
要了解更多信息,请参阅 https://www.tensorflow.org/guide/using_gpu#allowing_gpu_memory_growth
您可以尝试允许 GPU 内存增长:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
我只是尝试自己构建 VGG-19 模型,但每次我尝试编译时,错误都会增加,尽管它在 Alexnet 上运行得很清楚。请让我知道问题所在。
设置
CPU:AMD 锐龙 2700x
显卡:RTX 2080
OS : Window 10
CUDA:10.0
Tensorflow 版本:Tensorflow-2.0.0beta1
工作工具:Jupyter 实验室
时间:凌晨 3 点
气候 : 多雨
感觉:阴郁:(
代码
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from functools import partial
import skimage.io as io
import numpy as np
EPOCH = 5
BATCH = 32
Learning_rate = 0.0001
leaky_relu = partial(keras.activations.relu, alpha=0.1)
def to_float(image, label) :
float_image = image/255
return (float_image, label)
data, info = tfds.load('cifar10',as_supervised=True,with_info=True)
train, test = data.get('train'), data.get('test')
train_data = train.map(to_float).shuffle(10000).batch(BATCH).prefetch(3)
test_data = test.map(to_float).batch(BATCH).prefetch(4)
sample_batch = next(iter(train_data))[0]
print(io.imshow_collection(io.concatenate_images(np.array(sample_batch))))
class VGG19(tf.keras.Model) :
def __init__(self) :
super(VGG19, self).__init__()
self.conv1_1 = layers.Conv2D(filters = 64,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv1_2 = layers.Conv2D(filters = 64,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool1 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv2_1 = layers.Conv2D(filters = 128,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv2_2 = layers.Conv2D(filters = 128,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool2 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv3_1 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv3_2 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv3_3 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv3_4 = layers.Conv2D(filters = 256,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool3 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv4_1 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv4_2 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv4_3 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv4_4 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool4 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.conv5_1 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv5_2 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv5_3 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.conv5_4 = layers.Conv2D(filters = 512,
kernel_size=(3,3),
strides=(1,1),
padding='same',
activation=leaky_relu)
self.pool5 = layers.MaxPool2D(pool_size=(2,2),
strides=(2,2),
padding='same')
self.flat6 = layers.Flatten()
self.fc6 = layers.Dense(units=4096,
activation=leaky_relu)
self.drop6 = layers.Dropout(rate=0.5)
self.fc7 = layers.Dense(units=4096,
activation=leaky_relu)
self.drop7 = layers.Dropout(rate=0.5)
self.fc8 = layers.Dense(units=10,
activation=tf.keras.activations.softmax)
def call(self, inputs):
x = self.conv1_1(inputs)
x = self.conv1_2(x)
x = self.pool1(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.pool2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
x = self.conv3_4(x)
x = self.pool3(x)
x = self.conv4_1(x)
x = self.conv4_2(x)
x = self.conv4_3(x)
x = self.conv4_4(x)
x = self.pool4(x)
x = self.conv5_1(x)
x = self.conv5_2(x)
x = self.conv5_3(x)
x = self.conv5_4(x)
x = self.pool5(x)
x = self.flat6(x)
x = self.fc6(x)
x = self.drop6(x)
x = self.fc7(x)
x = self.drop7(x)
output = self.fc8(x)
return output
VGG19_model = VGG19()
my_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
VGG19_model.compile(optimizer=tf.keras.optimizers.Adam(Learning_rate),
loss = my_loss,
metrics = ['accuracy'],
)
VGG19_model.fit(train_data, epochs = EPOCH, verbose=2)
错误信息
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-21-1e291c11711c> in <module>
4 )
5
----> 6 VGG19_model.fit(train_data, epochs = EPOCH, verbose=2)
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
641 max_queue_size=max_queue_size,
642 workers=workers,
--> 643 use_multiprocessing=use_multiprocessing)
644
645 def evaluate(self,
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training_generator.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
692 shuffle=shuffle,
693 initial_epoch=initial_epoch,
--> 694 steps_name='steps_per_epoch')
695
696 def evaluate(self,
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
262
263 is_deferred = not model._is_compiled
--> 264 batch_outs = batch_function(*batch_data)
265 if not isinstance(batch_outs, list):
266 batch_outs = [batch_outs]
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
916 self._update_sample_weight_modes(sample_weights=sample_weights)
917 self._make_train_function()
--> 918 outputs = self.train_function(ins) # pylint: disable=not-callable
919
920 if reset_metrics:
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\keras\backend.py in __call__(self, inputs)
3508 value = math_ops.cast(value, tensor.dtype)
3509 converted_inputs.append(value)
-> 3510 outputs = self._graph_fn(*converted_inputs)
3511
3512 # EagerTensor.numpy() will often make a copy to ensure memory safety.
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
570 raise TypeError("Keyword arguments {} unknown. Expected {}.".format(
571 list(kwargs.keys()), list(self._arg_keywords)))
--> 572 return self._call_flat(args)
573
574 def _filtered_call(self, args, kwargs):
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args)
669 # Only need to override the gradient in graph mode and when we have outputs.
670 if context.executing_eagerly() or not self.outputs:
--> 671 outputs = self._inference_function.call(ctx, args)
672 else:
673 self._register_gradient()
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args)
443 attrs=("executor_type", executor_type,
444 "config_proto", config),
--> 445 ctx=ctx)
446 # Replace empty list with None
447 outputs = outputs or None
~\Anaconda3\envs\cuda\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 if any(ops._is_keras_symbolic_tensor(x) for x in inputs):
~\Anaconda3\envs\cuda\lib\site-packages\six.py in raise_from(value, from_value)
未知错误:发现 2 个根本错误。
(0)未知:无法获取卷积算法。这可能是因为 cuDNN 初始化失败,所以尝试查看上面是否打印了警告日志消息。
[[节点 vg_g19_2/conv2d_32/Conv2D(定义于 :6)]]
[[Func/Adam/gradients_2/vg_g19_2/dropout_5/cond_grad/If/then/_22/input/_75/_64]]
(1)未知:获取卷积算法失败。这可能是因为 cuDNN 初始化失败,所以尝试查看上面是否打印了警告日志消息。
[[节点 vg_g19_2/conv2d_32/Conv2D(定义于 :6)]]
0 次成功操作。
忽略 0 个派生错误。 [操作:__inference_keras_scratch_graph_10115]
函数调用堆栈:
keras_scratch_graph -> keras_scratch_graph
首先要检查是否正确安装了兼容的 CUDA、cuDNN 驱动程序。然后您可以通过允许 gpu 内存增长来尝试 gpu 内存资源管理。
allow_growth 选项,尝试根据 运行 时间分配仅分配尽可能多的 GPU 内存:它开始分配非常少的内存,并且随着会话数 运行 并且需要更多 GPU 内存,它扩展了 TensorFlow 进程所需的 GPU 内存区域。
要了解更多信息,请参阅 https://www.tensorflow.org/guide/using_gpu#allowing_gpu_memory_growth
您可以尝试允许 GPU 内存增长:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)