tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized
tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized
我正在关注此 Link 以在我自己的数据集上实施 cDCGAN。我的数据集包含近 391510 张图像。我的数据集的图像大小是 64,而此 link 中使用的 MNIST 是 28。我的数据集有 2350 个标签,而 MNIST 数据集有 10 个标签。
我的数据集采用 .tfrecords 格式,因此我使用 get_image() 函数从中检索一批图像和标签,如下所示。当我 运行 我的代码出现以下错误
`tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: _arg_Placeholder_3_0_3/_43 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_2488__arg_Placeholder_3_0_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[Node: discriminator_1/batch_normalization/AssignMovingAvg_1/_86 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2364_discriminator_1/batch_normalization/AssignMovingAvg_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]`
当我搜索这个错误时,我发现如果批量大小很大,就会发生这种情况,所以我将批量大小更改为 32,然后我得到了这个新错误。
` tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[32,64,64,2351] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: discriminator/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_Placeholder_0_0/_41, _arg_Placeholder_3_0_3/_43, discriminator/concat/axis)]]
Caused by op 'discriminator/concat', defined at:
File "cdcgan.py", line 221, in <module>
D_real, D_real_logits = discriminator(x, y_fill, isTrain)
File "cdcgan.py", line 48, in discriminator
cat1 = tf.concat([x, y_fill], 3)
`
我更改默认代码的代码部分如下
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
# G(z)
def generator(x, y_label, isTrain=True, reuse=False):
with tf.variable_scope('generator', reuse=reuse):
# initializer
w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02)
b_init = tf.constant_initializer(0.0)
# concat layer
cat1 = tf.concat([x, y_label], 3)
# 1st hidden layer
deconv1 = tf.layers.conv2d_transpose(cat1, 256, [16, 16], strides=(1, 1), padding='valid', kernel_initializer=w_init, bias_initializer=b_init)
lrelu1 = lrelu(tf.layers.batch_normalization(deconv1, training=isTrain), 0.2)
# 2nd hidden layer
deconv2 = tf.layers.conv2d_transpose(lrelu1, 128, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu2 = lrelu(tf.layers.batch_normalization(deconv2, training=isTrain), 0.2)
# output layer
deconv3 = tf.layers.conv2d_transpose(lrelu2, 1, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
o = tf.nn.tanh(deconv3)
return o
# D(x)
def discriminator(x, y_fill, isTrain=True, reuse=False):
with tf.variable_scope('discriminator', reuse=reuse):
# initializer
w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02)
b_init = tf.constant_initializer(0.0)
# concat layer
cat1 = tf.concat([x, y_fill], 3)
# 1st hidden layer
conv1 = tf.layers.conv2d(cat1, 128, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu1 = lrelu(conv1, 0.2)
# 2nd hidden layer
conv2 = tf.layers.conv2d(lrelu1, 256, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu2 = lrelu(tf.layers.batch_normalization(conv2, training=isTrain), 0.2)
# output layer
conv3 = tf.layers.conv2d(lrelu2, 1, [16, 16], strides=(1, 1), padding='valid', kernel_initializer=w_init)
o = tf.nn.sigmoid(conv3)
return o, conv3
def get_image(files, num_classes):
"""This method defines the retrieval image examples from TFRecords files.
Here we will define how the images will be represented (grayscale,
flattened, floating point arrays) and how labels will be represented
(one-hot vectors).
"""
# Convert filenames to a queue for an input pipeline.
file_queue = tf.train.string_input_producer(files)
# Create object to read TFRecords.
reader = tf.TFRecordReader()
# Read the full set of features for a single example.
key, example = reader.read(file_queue)
# Parse the example to get a dict mapping feature keys to tensors.
# image/class/label: integer denoting the index in a classification layer.
# image/encoded: string containing JPEG encoded image
features = tf.parse_single_example(
example,
features={
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value='')
})
label = features['image/class/label']
image_encoded = features['image/encoded']
# Decode the JPEG.
image = tf.image.decode_jpeg(image_encoded, channels=1)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.reshape(image, [IMAGE_WIDTH*IMAGE_HEIGHT])
# Represent the label as a one hot vector.
label = tf.stack(tf.one_hot(label, num_classes))
return label, image
# training parameters
batch_size = 32
# lr = 0.0002
train_epoch = 30
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.0002, global_step, 500, 0.95, staircase=True)
# load MNIST
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=[])
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
# Default paths.
DEFAULT_LABEL_FILE = os.path.join(SCRIPT_PATH, './labels.txt')
DEFAULT_TFRECORDS_DIR = os.path.join(SCRIPT_PATH, 'tfrecords-output')
MODEL_NAME = 'hangul_tensorflow'
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
DEFAULT_NUM_TRAIN_STEPS = 117453 # (for 30 epochs as my training set is 391510)
"""Perform graph definition and model training.
Here we will first create our input pipeline for reading in TFRecords
files and producing random batches of images and labels.
"""
labels = io.open(DEFAULT_LABEL_FILE, 'r', encoding='utf-8').read().splitlines()
num_classes = len(labels)
print('Processing data...')
tf_record_pattern = os.path.join(DEFAULT_TFRECORDS_DIR, '%s-*' % 'train')
train_data_files = tf.gfile.Glob(tf_record_pattern)
label, image = get_image(train_data_files, num_classes)
# Associate objects with a randomly selected batch of labels and images.
image_batch, label_batch = tf.train.shuffle_batch(
[image, label], batch_size=batch_size,
capacity=2000,
min_after_dequeue=1000)
# variables : input
x = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 1))
z = tf.placeholder(tf.float32, shape=(None, 1, 1, 100))
y_label = tf.placeholder(tf.float32, shape=(None, 1, 1, 2350))
y_fill = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 2350))
isTrain = tf.placeholder(dtype=tf.bool)
# Initialize the queue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# training-loop
print('training start!')
for epoch in range(train_epoch):
G_losses = []
D_losses = []
for iter in range(117453): #steps for 1 epoch
# update discriminator
train_images, train_labels = sess.run([image_batch, label_batch])
x_ = train_images.reshape(-1, img_size, img_size, 1)
y_label_ = train_labels.reshape([batch_size, 1, 1, 2350])
y_fill_ = y_label_ * np.ones([batch_size, img_size, img_size, 2350])
z_ = np.random.normal(0, 1, (batch_size, 1, 1, 100))
loss_d_, _ = sess.run([D_loss, D_optim], {x: x_, z: z_, y_fill: y_fill_, y_label: y_label_, isTrain: True})
# update generator
z_ = np.random.normal(0, 1, (batch_size, 1, 1, 100))
y_ = np.random.randint(0, 9, (batch_size, 1))
y_label_ = onehot[y_.astype(np.int32)].reshape([batch_size, 1, 1, 2350])
y_fill_ = y_label_ * np.ones([batch_size, img_size, img_size, 2350])
loss_g_, _ = sess.run([G_loss, G_optim], {z: z_, x: x_, y_fill: y_fill_, y_label: y_label_, isTrain: True})
这些是我的系统规格
名称:GeForce GTX 1070 主要:6 次要:1 内存时钟频率(GHz):1.645
pciBusID:0000:01:00.0
总内存:8.00GiB 空闲内存:6.62GiB
我该如何解决我的问题?
所以在自我搜索之后,我找到了解决方案。我应用了这个 answer 中的一些技巧。我将批量大小从 32 减少到 16,这导致训练速度变慢,但我不得不做出一些权衡 :)。我还通过减少编号来更改 D 和 G 的结构。隐藏层中的神经元。最后,我从上面的答案中应用了一些对我有帮助的张量流内存分配技巧。
希望我的回答能帮到像我这样的初学者
我正在关注此 Link 以在我自己的数据集上实施 cDCGAN。我的数据集包含近 391510 张图像。我的数据集的图像大小是 64,而此 link 中使用的 MNIST 是 28。我的数据集有 2350 个标签,而 MNIST 数据集有 10 个标签。
我的数据集采用 .tfrecords 格式,因此我使用 get_image() 函数从中检索一批图像和标签,如下所示。当我 运行 我的代码出现以下错误
`tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: _arg_Placeholder_3_0_3/_43 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_2488__arg_Placeholder_3_0_3", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[Node: discriminator_1/batch_normalization/AssignMovingAvg_1/_86 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2364_discriminator_1/batch_normalization/AssignMovingAvg_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]`
当我搜索这个错误时,我发现如果批量大小很大,就会发生这种情况,所以我将批量大小更改为 32,然后我得到了这个新错误。
` tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[32,64,64,2351] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: discriminator/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_Placeholder_0_0/_41, _arg_Placeholder_3_0_3/_43, discriminator/concat/axis)]]
Caused by op 'discriminator/concat', defined at:
File "cdcgan.py", line 221, in <module>
D_real, D_real_logits = discriminator(x, y_fill, isTrain)
File "cdcgan.py", line 48, in discriminator
cat1 = tf.concat([x, y_fill], 3)
`
我更改默认代码的代码部分如下
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
# G(z)
def generator(x, y_label, isTrain=True, reuse=False):
with tf.variable_scope('generator', reuse=reuse):
# initializer
w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02)
b_init = tf.constant_initializer(0.0)
# concat layer
cat1 = tf.concat([x, y_label], 3)
# 1st hidden layer
deconv1 = tf.layers.conv2d_transpose(cat1, 256, [16, 16], strides=(1, 1), padding='valid', kernel_initializer=w_init, bias_initializer=b_init)
lrelu1 = lrelu(tf.layers.batch_normalization(deconv1, training=isTrain), 0.2)
# 2nd hidden layer
deconv2 = tf.layers.conv2d_transpose(lrelu1, 128, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu2 = lrelu(tf.layers.batch_normalization(deconv2, training=isTrain), 0.2)
# output layer
deconv3 = tf.layers.conv2d_transpose(lrelu2, 1, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
o = tf.nn.tanh(deconv3)
return o
# D(x)
def discriminator(x, y_fill, isTrain=True, reuse=False):
with tf.variable_scope('discriminator', reuse=reuse):
# initializer
w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02)
b_init = tf.constant_initializer(0.0)
# concat layer
cat1 = tf.concat([x, y_fill], 3)
# 1st hidden layer
conv1 = tf.layers.conv2d(cat1, 128, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu1 = lrelu(conv1, 0.2)
# 2nd hidden layer
conv2 = tf.layers.conv2d(lrelu1, 256, [5, 5], strides=(2, 2), padding='same', kernel_initializer=w_init, bias_initializer=b_init)
lrelu2 = lrelu(tf.layers.batch_normalization(conv2, training=isTrain), 0.2)
# output layer
conv3 = tf.layers.conv2d(lrelu2, 1, [16, 16], strides=(1, 1), padding='valid', kernel_initializer=w_init)
o = tf.nn.sigmoid(conv3)
return o, conv3
def get_image(files, num_classes):
"""This method defines the retrieval image examples from TFRecords files.
Here we will define how the images will be represented (grayscale,
flattened, floating point arrays) and how labels will be represented
(one-hot vectors).
"""
# Convert filenames to a queue for an input pipeline.
file_queue = tf.train.string_input_producer(files)
# Create object to read TFRecords.
reader = tf.TFRecordReader()
# Read the full set of features for a single example.
key, example = reader.read(file_queue)
# Parse the example to get a dict mapping feature keys to tensors.
# image/class/label: integer denoting the index in a classification layer.
# image/encoded: string containing JPEG encoded image
features = tf.parse_single_example(
example,
features={
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value='')
})
label = features['image/class/label']
image_encoded = features['image/encoded']
# Decode the JPEG.
image = tf.image.decode_jpeg(image_encoded, channels=1)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.reshape(image, [IMAGE_WIDTH*IMAGE_HEIGHT])
# Represent the label as a one hot vector.
label = tf.stack(tf.one_hot(label, num_classes))
return label, image
# training parameters
batch_size = 32
# lr = 0.0002
train_epoch = 30
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(0.0002, global_step, 500, 0.95, staircase=True)
# load MNIST
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=[])
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
# Default paths.
DEFAULT_LABEL_FILE = os.path.join(SCRIPT_PATH, './labels.txt')
DEFAULT_TFRECORDS_DIR = os.path.join(SCRIPT_PATH, 'tfrecords-output')
MODEL_NAME = 'hangul_tensorflow'
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
DEFAULT_NUM_TRAIN_STEPS = 117453 # (for 30 epochs as my training set is 391510)
"""Perform graph definition and model training.
Here we will first create our input pipeline for reading in TFRecords
files and producing random batches of images and labels.
"""
labels = io.open(DEFAULT_LABEL_FILE, 'r', encoding='utf-8').read().splitlines()
num_classes = len(labels)
print('Processing data...')
tf_record_pattern = os.path.join(DEFAULT_TFRECORDS_DIR, '%s-*' % 'train')
train_data_files = tf.gfile.Glob(tf_record_pattern)
label, image = get_image(train_data_files, num_classes)
# Associate objects with a randomly selected batch of labels and images.
image_batch, label_batch = tf.train.shuffle_batch(
[image, label], batch_size=batch_size,
capacity=2000,
min_after_dequeue=1000)
# variables : input
x = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 1))
z = tf.placeholder(tf.float32, shape=(None, 1, 1, 100))
y_label = tf.placeholder(tf.float32, shape=(None, 1, 1, 2350))
y_fill = tf.placeholder(tf.float32, shape=(None, img_size, img_size, 2350))
isTrain = tf.placeholder(dtype=tf.bool)
# Initialize the queue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# training-loop
print('training start!')
for epoch in range(train_epoch):
G_losses = []
D_losses = []
for iter in range(117453): #steps for 1 epoch
# update discriminator
train_images, train_labels = sess.run([image_batch, label_batch])
x_ = train_images.reshape(-1, img_size, img_size, 1)
y_label_ = train_labels.reshape([batch_size, 1, 1, 2350])
y_fill_ = y_label_ * np.ones([batch_size, img_size, img_size, 2350])
z_ = np.random.normal(0, 1, (batch_size, 1, 1, 100))
loss_d_, _ = sess.run([D_loss, D_optim], {x: x_, z: z_, y_fill: y_fill_, y_label: y_label_, isTrain: True})
# update generator
z_ = np.random.normal(0, 1, (batch_size, 1, 1, 100))
y_ = np.random.randint(0, 9, (batch_size, 1))
y_label_ = onehot[y_.astype(np.int32)].reshape([batch_size, 1, 1, 2350])
y_fill_ = y_label_ * np.ones([batch_size, img_size, img_size, 2350])
loss_g_, _ = sess.run([G_loss, G_optim], {z: z_, x: x_, y_fill: y_fill_, y_label: y_label_, isTrain: True})
这些是我的系统规格
名称:GeForce GTX 1070 主要:6 次要:1 内存时钟频率(GHz):1.645 pciBusID:0000:01:00.0 总内存:8.00GiB 空闲内存:6.62GiB
我该如何解决我的问题?
所以在自我搜索之后,我找到了解决方案。我应用了这个 answer 中的一些技巧。我将批量大小从 32 减少到 16,这导致训练速度变慢,但我不得不做出一些权衡 :)。我还通过减少编号来更改 D 和 G 的结构。隐藏层中的神经元。最后,我从上面的答案中应用了一些对我有帮助的张量流内存分配技巧。
希望我的回答能帮到像我这样的初学者